ImportOds.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. <?php
  2. /**
  3. * OpenDocument Spreadsheet import plugin for phpMyAdmin
  4. *
  5. * @todo Pretty much everything
  6. * @todo Importing of accented characters seems to fail
  7. */
  8. declare(strict_types=1);
  9. namespace PhpMyAdmin\Plugins\Import;
  10. use PhpMyAdmin\File;
  11. use PhpMyAdmin\Import;
  12. use PhpMyAdmin\Message;
  13. use PhpMyAdmin\Plugins\ImportPlugin;
  14. use PhpMyAdmin\Properties\Options\Groups\OptionsPropertyMainGroup;
  15. use PhpMyAdmin\Properties\Options\Groups\OptionsPropertyRootGroup;
  16. use PhpMyAdmin\Properties\Options\Items\BoolPropertyItem;
  17. use PhpMyAdmin\Properties\Plugins\ImportPluginProperties;
  18. use SimpleXMLElement;
  19. use function __;
  20. use function count;
  21. use function implode;
  22. use function libxml_disable_entity_loader;
  23. use function rtrim;
  24. use function simplexml_load_string;
  25. use function strcmp;
  26. use function strlen;
  27. use const LIBXML_COMPACT;
  28. use const PHP_VERSION_ID;
  29. /**
  30. * Handles the import for the ODS format
  31. */
  32. class ImportOds extends ImportPlugin
  33. {
  34. /**
  35. * @psalm-return non-empty-lowercase-string
  36. */
  37. public function getName(): string
  38. {
  39. return 'ods';
  40. }
  41. protected function setProperties(): ImportPluginProperties
  42. {
  43. $importPluginProperties = new ImportPluginProperties();
  44. $importPluginProperties->setText('OpenDocument Spreadsheet');
  45. $importPluginProperties->setExtension('ods');
  46. $importPluginProperties->setOptionsText(__('Options'));
  47. // create the root group that will be the options field for
  48. // $importPluginProperties
  49. // this will be shown as "Format specific options"
  50. $importSpecificOptions = new OptionsPropertyRootGroup('Format Specific Options');
  51. // general options main group
  52. $generalOptions = new OptionsPropertyMainGroup('general_opts');
  53. // create primary items and add them to the group
  54. $leaf = new BoolPropertyItem(
  55. 'col_names',
  56. __(
  57. 'The first line of the file contains the table column names'
  58. . ' <i>(if this is unchecked, the first line will become part'
  59. . ' of the data)</i>'
  60. )
  61. );
  62. $generalOptions->addProperty($leaf);
  63. $leaf = new BoolPropertyItem(
  64. 'empty_rows',
  65. __('Do not import empty rows')
  66. );
  67. $generalOptions->addProperty($leaf);
  68. $leaf = new BoolPropertyItem(
  69. 'recognize_percentages',
  70. __(
  71. 'Import percentages as proper decimals <i>(ex. 12.00% to .12)</i>'
  72. )
  73. );
  74. $generalOptions->addProperty($leaf);
  75. $leaf = new BoolPropertyItem(
  76. 'recognize_currency',
  77. __('Import currencies <i>(ex. $5.00 to 5.00)</i>')
  78. );
  79. $generalOptions->addProperty($leaf);
  80. // add the main group to the root group
  81. $importSpecificOptions->addProperty($generalOptions);
  82. // set the options for the import plugin property item
  83. $importPluginProperties->setOptions($importSpecificOptions);
  84. return $importPluginProperties;
  85. }
  86. /**
  87. * Handles the whole import logic
  88. *
  89. * @param array $sql_data 2-element array with sql data
  90. */
  91. public function doImport(?File $importHandle = null, array &$sql_data = []): void
  92. {
  93. global $db, $error, $timeout_passed, $finished;
  94. $buffer = '';
  95. /**
  96. * Read in the file via Import::getNextChunk so that
  97. * it can process compressed files
  98. */
  99. while (! $finished && ! $error && ! $timeout_passed) {
  100. $data = $this->import->getNextChunk($importHandle);
  101. if ($data === false) {
  102. /* subtract data we didn't handle yet and stop processing */
  103. $GLOBALS['offset'] -= strlen($buffer);
  104. break;
  105. }
  106. if ($data === true) {
  107. continue;
  108. }
  109. /* Append new data to buffer */
  110. $buffer .= $data;
  111. }
  112. /**
  113. * Disable loading of external XML entities for PHP versions below 8.0.
  114. */
  115. if (PHP_VERSION_ID < 80000) {
  116. // phpcs:ignore Generic.PHP.DeprecatedFunctions.Deprecated
  117. libxml_disable_entity_loader();
  118. }
  119. /**
  120. * Load the XML string
  121. *
  122. * The option LIBXML_COMPACT is specified because it can
  123. * result in increased performance without the need to
  124. * alter the code in any way. It's basically a freebee.
  125. */
  126. $xml = @simplexml_load_string($buffer, SimpleXMLElement::class, LIBXML_COMPACT);
  127. unset($buffer);
  128. if ($xml === false) {
  129. $sheets = [];
  130. $GLOBALS['message'] = Message::error(
  131. __(
  132. 'The XML file specified was either malformed or incomplete. Please correct the issue and try again.'
  133. )
  134. );
  135. $GLOBALS['error'] = true;
  136. } else {
  137. /** @var SimpleXMLElement $root */
  138. $root = $xml->children('office', true)->{'body'}->{'spreadsheet'};
  139. if (empty($root)) {
  140. $sheets = [];
  141. $GLOBALS['message'] = Message::error(
  142. __('Could not parse OpenDocument Spreadsheet!')
  143. );
  144. $GLOBALS['error'] = true;
  145. } else {
  146. $sheets = $root->children('table', true);
  147. }
  148. }
  149. [$tables, $rows] = $this->iterateOverTables($sheets);
  150. /**
  151. * Bring accumulated rows into the corresponding table
  152. */
  153. $num_tables = count($tables);
  154. for ($i = 0; $i < $num_tables; ++$i) {
  155. $num_rows = count($rows);
  156. for ($j = 0; $j < $num_rows; ++$j) {
  157. if (strcmp($tables[$i][Import::TBL_NAME], $rows[$j][Import::TBL_NAME])) {
  158. continue;
  159. }
  160. if (! isset($tables[$i][Import::COL_NAMES])) {
  161. $tables[$i][] = $rows[$j][Import::COL_NAMES];
  162. }
  163. $tables[$i][Import::ROWS] = $rows[$j][Import::ROWS];
  164. }
  165. }
  166. /* No longer needed */
  167. unset($rows);
  168. /* Obtain the best-fit MySQL types for each column */
  169. $analyses = [];
  170. $len = count($tables);
  171. for ($i = 0; $i < $len; ++$i) {
  172. $analyses[] = $this->import->analyzeTable($tables[$i]);
  173. }
  174. /**
  175. * string $db_name (no backquotes)
  176. *
  177. * array $table = array(table_name, array() column_names, array()() rows)
  178. * array $tables = array of "$table"s
  179. *
  180. * array $analysis = array(array() column_types, array() column_sizes)
  181. * array $analyses = array of "$analysis"s
  182. *
  183. * array $create = array of SQL strings
  184. *
  185. * array $options = an associative array of options
  186. */
  187. /* Set database name to the currently selected one, if applicable */
  188. [$db_name, $options] = $this->getDbnameAndOptions($db, 'ODS_DB');
  189. /* Non-applicable parameters */
  190. $create = null;
  191. /* Created and execute necessary SQL statements from data */
  192. $this->import->buildSql($db_name, $tables, $analyses, $create, $options, $sql_data);
  193. unset($tables, $analyses);
  194. /* Commit any possible data in buffers */
  195. $this->import->runQuery('', '', $sql_data);
  196. }
  197. /**
  198. * Get value
  199. *
  200. * @param SimpleXMLElement $cell_attrs Cell attributes
  201. * @param SimpleXMLElement $text Texts
  202. *
  203. * @return float|string
  204. */
  205. protected function getValue($cell_attrs, $text)
  206. {
  207. if (
  208. isset($_REQUEST['ods_recognize_percentages'])
  209. && $_REQUEST['ods_recognize_percentages']
  210. && ! strcmp('percentage', (string) $cell_attrs['value-type'])
  211. ) {
  212. return (float) $cell_attrs['value'];
  213. }
  214. if (
  215. isset($_REQUEST['ods_recognize_currency'])
  216. && $_REQUEST['ods_recognize_currency']
  217. && ! strcmp('currency', (string) $cell_attrs['value-type'])
  218. ) {
  219. return (float) $cell_attrs['value'];
  220. }
  221. /* We need to concatenate all paragraphs */
  222. $values = [];
  223. foreach ($text as $paragraph) {
  224. // Maybe a text node has the content ? (email, url, ...)
  225. // Example: <text:a ... xlink:href="mailto:contact@example.org">test@example.fr</text:a>
  226. $paragraphValue = $paragraph->__toString();
  227. if ($paragraphValue === '' && isset($paragraph->{'a'})) {
  228. $values[] = $paragraph->{'a'}->__toString();
  229. continue;
  230. }
  231. $values[] = $paragraphValue;
  232. }
  233. return implode("\n", $values);
  234. }
  235. private function iterateOverColumns(
  236. SimpleXMLElement $row,
  237. bool $col_names_in_first_row,
  238. array $tempRow,
  239. array $col_names,
  240. int $col_count
  241. ): array {
  242. $cellCount = $row->count();
  243. $a = 0;
  244. foreach ($row as $cell) {
  245. $a++;
  246. $text = $cell->children('text', true);
  247. $cell_attrs = $cell->attributes('office', true);
  248. if ($text->count() != 0) {
  249. $attr = $cell->attributes('table', true);
  250. $num_repeat = (int) $attr['number-columns-repeated'];
  251. $num_iterations = $num_repeat ?: 1;
  252. for ($k = 0; $k < $num_iterations; $k++) {
  253. $value = $this->getValue($cell_attrs, $text);
  254. if (! $col_names_in_first_row) {
  255. $tempRow[] = $value;
  256. } else {
  257. // MySQL column names can't end with a space
  258. // character.
  259. $col_names[] = rtrim((string) $value);
  260. }
  261. ++$col_count;
  262. }
  263. continue;
  264. }
  265. // skip empty repeats in the last row
  266. if ($a == $cellCount) {
  267. continue;
  268. }
  269. $attr = $cell->attributes('table', true);
  270. $num_null = (int) $attr['number-columns-repeated'];
  271. if ($num_null) {
  272. if (! $col_names_in_first_row) {
  273. for ($i = 0; $i < $num_null; ++$i) {
  274. $tempRow[] = 'NULL';
  275. ++$col_count;
  276. }
  277. } else {
  278. for ($i = 0; $i < $num_null; ++$i) {
  279. $col_names[] = $this->import->getColumnAlphaName($col_count + 1);
  280. ++$col_count;
  281. }
  282. }
  283. } else {
  284. if (! $col_names_in_first_row) {
  285. $tempRow[] = 'NULL';
  286. } else {
  287. $col_names[] = $this->import->getColumnAlphaName($col_count + 1);
  288. }
  289. ++$col_count;
  290. }
  291. }
  292. return [$tempRow, $col_names, $col_count];
  293. }
  294. private function iterateOverRows(
  295. SimpleXMLElement $sheet,
  296. bool $col_names_in_first_row,
  297. array $tempRow,
  298. array $col_names,
  299. int $col_count,
  300. int $max_cols,
  301. array $tempRows
  302. ): array {
  303. foreach ($sheet as $row) {
  304. $type = $row->getName();
  305. if (strcmp('table-row', $type)) {
  306. continue;
  307. }
  308. [$tempRow, $col_names, $col_count] = $this->iterateOverColumns(
  309. $row,
  310. $col_names_in_first_row,
  311. $tempRow,
  312. $col_names,
  313. $col_count
  314. );
  315. /* Find the widest row */
  316. if ($col_count > $max_cols) {
  317. $max_cols = $col_count;
  318. }
  319. /* Don't include a row that is full of NULL values */
  320. if (! $col_names_in_first_row) {
  321. if ($_REQUEST['ods_empty_rows'] ?? false) {
  322. foreach ($tempRow as $cell) {
  323. if (strcmp('NULL', (string) $cell)) {
  324. $tempRows[] = $tempRow;
  325. break;
  326. }
  327. }
  328. } else {
  329. $tempRows[] = $tempRow;
  330. }
  331. }
  332. $col_count = 0;
  333. $col_names_in_first_row = false;
  334. $tempRow = [];
  335. }
  336. return [$tempRow, $col_names, $max_cols, $tempRows];
  337. }
  338. /**
  339. * @param array|SimpleXMLElement $sheets Sheets of the spreadsheet.
  340. *
  341. * @return array|array[]
  342. */
  343. private function iterateOverTables($sheets): array
  344. {
  345. $tables = [];
  346. $max_cols = 0;
  347. $col_count = 0;
  348. $col_names = [];
  349. $tempRow = [];
  350. $tempRows = [];
  351. $rows = [];
  352. /** @var SimpleXMLElement $sheet */
  353. foreach ($sheets as $sheet) {
  354. $col_names_in_first_row = isset($_REQUEST['ods_col_names']);
  355. [$tempRow, $col_names, $max_cols, $tempRows] = $this->iterateOverRows(
  356. $sheet,
  357. $col_names_in_first_row,
  358. $tempRow,
  359. $col_names,
  360. $col_count,
  361. $max_cols,
  362. $tempRows
  363. );
  364. /* Skip over empty sheets */
  365. if (count($tempRows) == 0 || count($tempRows[0]) === 0) {
  366. $col_names = [];
  367. $tempRow = [];
  368. $tempRows = [];
  369. continue;
  370. }
  371. /**
  372. * Fill out each row as necessary to make
  373. * every one exactly as wide as the widest
  374. * row. This included column names.
  375. */
  376. /* Fill out column names */
  377. for ($i = count($col_names); $i < $max_cols; ++$i) {
  378. $col_names[] = $this->import->getColumnAlphaName($i + 1);
  379. }
  380. /* Fill out all rows */
  381. $num_rows = count($tempRows);
  382. for ($i = 0; $i < $num_rows; ++$i) {
  383. for ($j = count($tempRows[$i]); $j < $max_cols; ++$j) {
  384. $tempRows[$i][] = 'NULL';
  385. }
  386. }
  387. /* Store the table name so we know where to place the row set */
  388. $tbl_attr = $sheet->attributes('table', true);
  389. $tables[] = [(string) $tbl_attr['name']];
  390. /* Store the current sheet in the accumulator */
  391. $rows[] = [
  392. (string) $tbl_attr['name'],
  393. $col_names,
  394. $tempRows,
  395. ];
  396. $tempRows = [];
  397. $col_names = [];
  398. $max_cols = 0;
  399. }
  400. return [$tables, $rows];
  401. }
  402. }