ImportCsv.php 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718
  1. <?php
  2. /* vim: set expandtab sw=4 ts=4 sts=4: */
  3. /**
  4. * CSV import plugin for phpMyAdmin
  5. *
  6. * @todo add an option for handling NULL values
  7. * @package PhpMyAdmin-Import
  8. * @subpackage CSV
  9. */
  10. namespace PhpMyAdmin\Plugins\Import;
  11. use PhpMyAdmin\Import;
  12. use PhpMyAdmin\Message;
  13. use PhpMyAdmin\Properties\Options\Items\BoolPropertyItem;
  14. use PhpMyAdmin\Properties\Options\Items\TextPropertyItem;
  15. use PhpMyAdmin\Util;
  16. /**
  17. * Handles the import for the CSV format
  18. *
  19. * @package PhpMyAdmin-Import
  20. * @subpackage CSV
  21. */
  22. class ImportCsv extends AbstractImportCsv
  23. {
  24. /**
  25. * Whether to analyze tables
  26. *
  27. * @var bool
  28. */
  29. private $_analyze;
  30. /**
  31. * Constructor
  32. */
  33. public function __construct()
  34. {
  35. $this->setProperties();
  36. }
  37. /**
  38. * Sets the import plugin properties.
  39. * Called in the constructor.
  40. *
  41. * @return void
  42. */
  43. protected function setProperties()
  44. {
  45. $this->_setAnalyze(false);
  46. if ($GLOBALS['plugin_param'] !== 'table') {
  47. $this->_setAnalyze(true);
  48. }
  49. $generalOptions = parent::setProperties();
  50. $this->properties->setText('CSV');
  51. $this->properties->setExtension('csv');
  52. if ($GLOBALS['plugin_param'] !== 'table') {
  53. $leaf = new BoolPropertyItem(
  54. "col_names",
  55. __(
  56. 'The first line of the file contains the table column names'
  57. . ' <i>(if this is unchecked, the first line will become part'
  58. . ' of the data)</i>'
  59. )
  60. );
  61. $generalOptions->addProperty($leaf);
  62. } else {
  63. $hint = new Message(
  64. __(
  65. 'If the data in each row of the file is not'
  66. . ' in the same order as in the database, list the corresponding'
  67. . ' column names here. Column names must be separated by commas'
  68. . ' and not enclosed in quotations.'
  69. )
  70. );
  71. $leaf = new TextPropertyItem(
  72. "columns",
  73. __('Column names: ') . Util::showHint($hint)
  74. );
  75. $generalOptions->addProperty($leaf);
  76. }
  77. $leaf = new BoolPropertyItem(
  78. "ignore",
  79. __('Do not abort on INSERT error')
  80. );
  81. $generalOptions->addProperty($leaf);
  82. }
  83. /**
  84. * Handles the whole import logic
  85. *
  86. * @param array &$sql_data 2-element array with sql data
  87. *
  88. * @return void
  89. */
  90. public function doImport(array &$sql_data = array())
  91. {
  92. global $db, $table, $csv_terminated, $csv_enclosed, $csv_escaped,
  93. $csv_new_line, $csv_columns, $err_url;
  94. // $csv_replace and $csv_ignore should have been here,
  95. // but we use directly from $_POST
  96. global $error, $timeout_passed, $finished, $message;
  97. $replacements = array(
  98. '\\n' => "\n",
  99. '\\t' => "\t",
  100. '\\r' => "\r",
  101. );
  102. $csv_terminated = strtr($csv_terminated, $replacements);
  103. $csv_enclosed = strtr($csv_enclosed, $replacements);
  104. $csv_escaped = strtr($csv_escaped, $replacements);
  105. $csv_new_line = strtr($csv_new_line, $replacements);
  106. $param_error = false;
  107. if (strlen($csv_terminated) === 0) {
  108. $message = Message::error(
  109. __('Invalid parameter for CSV import: %s')
  110. );
  111. $message->addParam(__('Columns terminated with'));
  112. $error = true;
  113. $param_error = true;
  114. // The default dialog of MS Excel when generating a CSV produces a
  115. // semi-colon-separated file with no chance of specifying the
  116. // enclosing character. Thus, users who want to import this file
  117. // tend to remove the enclosing character on the Import dialog.
  118. // I could not find a test case where having no enclosing characters
  119. // confuses this script.
  120. // But the parser won't work correctly with strings so we allow just
  121. // one character.
  122. } elseif (mb_strlen($csv_enclosed) > 1) {
  123. $message = Message::error(
  124. __('Invalid parameter for CSV import: %s')
  125. );
  126. $message->addParam(__('Columns enclosed with'));
  127. $error = true;
  128. $param_error = true;
  129. // I could not find a test case where having no escaping characters
  130. // confuses this script.
  131. // But the parser won't work correctly with strings so we allow just
  132. // one character.
  133. } elseif (mb_strlen($csv_escaped) > 1) {
  134. $message = Message::error(
  135. __('Invalid parameter for CSV import: %s')
  136. );
  137. $message->addParam(__('Columns escaped with'));
  138. $error = true;
  139. $param_error = true;
  140. } elseif (mb_strlen($csv_new_line) != 1
  141. && $csv_new_line != 'auto'
  142. ) {
  143. $message = Message::error(
  144. __('Invalid parameter for CSV import: %s')
  145. );
  146. $message->addParam(__('Lines terminated with'));
  147. $error = true;
  148. $param_error = true;
  149. }
  150. // If there is an error in the parameters entered,
  151. // indicate that immediately.
  152. if ($param_error) {
  153. Util::mysqlDie(
  154. $message->getMessage(),
  155. '',
  156. false,
  157. $err_url
  158. );
  159. }
  160. $buffer = '';
  161. $required_fields = 0;
  162. if (!$this->_getAnalyze()) {
  163. $sql_template = 'INSERT';
  164. if (isset($_POST['csv_ignore'])) {
  165. $sql_template .= ' IGNORE';
  166. }
  167. $sql_template .= ' INTO ' . Util::backquote($table);
  168. $tmp_fields = $GLOBALS['dbi']->getColumns($db, $table);
  169. if (empty($csv_columns)) {
  170. $fields = $tmp_fields;
  171. } else {
  172. $sql_template .= ' (';
  173. $fields = array();
  174. $tmp = preg_split('/,( ?)/', $csv_columns);
  175. foreach ($tmp as $key => $val) {
  176. if (count($fields) > 0) {
  177. $sql_template .= ', ';
  178. }
  179. /* Trim also `, if user already included backquoted fields */
  180. $val = trim($val, " \t\r\n\0\x0B`");
  181. $found = false;
  182. foreach ($tmp_fields as $field) {
  183. if ($field['Field'] == $val) {
  184. $found = true;
  185. break;
  186. }
  187. }
  188. if (!$found) {
  189. $message = Message::error(
  190. __(
  191. 'Invalid column (%s) specified! Ensure that columns'
  192. . ' names are spelled correctly, separated by commas'
  193. . ', and not enclosed in quotes.'
  194. )
  195. );
  196. $message->addParam($val);
  197. $error = true;
  198. break;
  199. }
  200. $fields[] = $field;
  201. $sql_template .= Util::backquote($val);
  202. }
  203. $sql_template .= ') ';
  204. }
  205. $required_fields = count($fields);
  206. $sql_template .= ' VALUES (';
  207. }
  208. // Defaults for parser
  209. $i = 0;
  210. $len = 0;
  211. $lastlen = null;
  212. $line = 1;
  213. $lasti = -1;
  214. $values = array();
  215. $csv_finish = false;
  216. $tempRow = array();
  217. $rows = array();
  218. $col_names = array();
  219. $tables = array();
  220. $col_count = 0;
  221. $max_cols = 0;
  222. $csv_terminated_len = mb_strlen($csv_terminated);
  223. while (!($finished && $i >= $len) && !$error && !$timeout_passed) {
  224. $data = Import::getNextChunk();
  225. if ($data === false) {
  226. // subtract data we didn't handle yet and stop processing
  227. $GLOBALS['offset'] -= strlen($buffer);
  228. break;
  229. } elseif ($data === true) {
  230. // Handle rest of buffer
  231. } else {
  232. // Append new data to buffer
  233. $buffer .= $data;
  234. unset($data);
  235. // Force a trailing new line at EOF to prevent parsing problems
  236. if ($finished && $buffer) {
  237. $finalch = mb_substr($buffer, -1);
  238. if ($csv_new_line == 'auto'
  239. && $finalch != "\r"
  240. && $finalch != "\n"
  241. ) {
  242. $buffer .= "\n";
  243. } elseif ($csv_new_line != 'auto'
  244. && $finalch != $csv_new_line
  245. ) {
  246. $buffer .= $csv_new_line;
  247. }
  248. }
  249. // Do not parse string when we're not at the end
  250. // and don't have new line inside
  251. if (($csv_new_line == 'auto'
  252. && mb_strpos($buffer, "\r") === false
  253. && mb_strpos($buffer, "\n") === false)
  254. || ($csv_new_line != 'auto'
  255. && mb_strpos($buffer, $csv_new_line) === false)
  256. ) {
  257. continue;
  258. }
  259. }
  260. // Current length of our buffer
  261. $len = mb_strlen($buffer);
  262. // Currently parsed char
  263. $ch = mb_substr($buffer, $i, 1);
  264. if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
  265. $ch = $this->readCsvTerminatedString(
  266. $buffer,
  267. $ch,
  268. $i,
  269. $csv_terminated_len
  270. );
  271. $i += $csv_terminated_len - 1;
  272. }
  273. while ($i < $len) {
  274. // Deadlock protection
  275. if ($lasti == $i && $lastlen == $len) {
  276. $message = Message::error(
  277. __('Invalid format of CSV input on line %d.')
  278. );
  279. $message->addParam($line);
  280. $error = true;
  281. break;
  282. }
  283. $lasti = $i;
  284. $lastlen = $len;
  285. // This can happen with auto EOL and \r at the end of buffer
  286. if (!$csv_finish) {
  287. // Grab empty field
  288. if ($ch == $csv_terminated) {
  289. if ($i == $len - 1) {
  290. break;
  291. }
  292. $values[] = '';
  293. $i++;
  294. $ch = mb_substr($buffer, $i, 1);
  295. if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
  296. $ch = $this->readCsvTerminatedString(
  297. $buffer,
  298. $ch,
  299. $i,
  300. $csv_terminated_len
  301. );
  302. $i += $csv_terminated_len - 1;
  303. }
  304. continue;
  305. }
  306. // Grab one field
  307. $fallbacki = $i;
  308. if ($ch == $csv_enclosed) {
  309. if ($i == $len - 1) {
  310. break;
  311. }
  312. $need_end = true;
  313. $i++;
  314. $ch = mb_substr($buffer, $i, 1);
  315. if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
  316. $ch = $this->readCsvTerminatedString(
  317. $buffer,
  318. $ch,
  319. $i,
  320. $csv_terminated_len
  321. );
  322. $i += $csv_terminated_len - 1;
  323. }
  324. } else {
  325. $need_end = false;
  326. }
  327. $fail = false;
  328. $value = '';
  329. while (($need_end
  330. && ($ch != $csv_enclosed
  331. || $csv_enclosed == $csv_escaped))
  332. || (!$need_end
  333. && !($ch == $csv_terminated
  334. || $ch == $csv_new_line
  335. || ($csv_new_line == 'auto'
  336. && ($ch == "\r" || $ch == "\n"))))
  337. ) {
  338. if ($ch == $csv_escaped) {
  339. if ($i == $len - 1) {
  340. $fail = true;
  341. break;
  342. }
  343. $i++;
  344. $ch = mb_substr($buffer, $i, 1);
  345. if ($csv_terminated_len > 1
  346. && $ch == $csv_terminated[0]
  347. ) {
  348. $ch = $this->readCsvTerminatedString(
  349. $buffer,
  350. $ch,
  351. $i,
  352. $csv_terminated_len
  353. );
  354. $i += $csv_terminated_len - 1;
  355. }
  356. if ($csv_enclosed == $csv_escaped
  357. && ($ch == $csv_terminated
  358. || $ch == $csv_new_line
  359. || ($csv_new_line == 'auto'
  360. && ($ch == "\r" || $ch == "\n")))
  361. ) {
  362. break;
  363. }
  364. }
  365. $value .= $ch;
  366. if ($i == $len - 1) {
  367. if (!$finished) {
  368. $fail = true;
  369. }
  370. break;
  371. }
  372. $i++;
  373. $ch = mb_substr($buffer, $i, 1);
  374. if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
  375. $ch = $this->readCsvTerminatedString(
  376. $buffer,
  377. $ch,
  378. $i,
  379. $csv_terminated_len
  380. );
  381. $i += $csv_terminated_len - 1;
  382. }
  383. }
  384. // unquoted NULL string
  385. if (false === $need_end && $value === 'NULL') {
  386. $value = null;
  387. }
  388. if ($fail) {
  389. $i = $fallbacki;
  390. $ch = mb_substr($buffer, $i, 1);
  391. if ($csv_terminated_len > 1 && $ch == $csv_terminated[0]) {
  392. $i += $csv_terminated_len - 1;
  393. }
  394. break;
  395. }
  396. // Need to strip trailing enclosing char?
  397. if ($need_end && $ch == $csv_enclosed) {
  398. if ($finished && $i == $len - 1) {
  399. $ch = null;
  400. } elseif ($i == $len - 1) {
  401. $i = $fallbacki;
  402. $ch = mb_substr($buffer, $i, 1);
  403. if ($csv_terminated_len > 1
  404. && $ch == $csv_terminated[0]
  405. ) {
  406. $i += $csv_terminated_len - 1;
  407. }
  408. break;
  409. } else {
  410. $i++;
  411. $ch = mb_substr($buffer, $i, 1);
  412. if ($csv_terminated_len > 1
  413. && $ch == $csv_terminated[0]
  414. ) {
  415. $ch = $this->readCsvTerminatedString(
  416. $buffer,
  417. $ch,
  418. $i,
  419. $csv_terminated_len
  420. );
  421. $i += $csv_terminated_len - 1;
  422. }
  423. }
  424. }
  425. // Are we at the end?
  426. if ($ch == $csv_new_line
  427. || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))
  428. || ($finished && $i == $len - 1)
  429. ) {
  430. $csv_finish = true;
  431. }
  432. // Go to next char
  433. if ($ch == $csv_terminated) {
  434. if ($i == $len - 1) {
  435. $i = $fallbacki;
  436. $ch = mb_substr($buffer, $i, 1);
  437. if ($csv_terminated_len > 1
  438. && $ch == $csv_terminated[0]
  439. ) {
  440. $i += $csv_terminated_len - 1;
  441. }
  442. break;
  443. }
  444. $i++;
  445. $ch = mb_substr($buffer, $i, 1);
  446. if ($csv_terminated_len > 1
  447. && $ch == $csv_terminated[0]
  448. ) {
  449. $ch = $this->readCsvTerminatedString(
  450. $buffer,
  451. $ch,
  452. $i,
  453. $csv_terminated_len
  454. );
  455. $i += $csv_terminated_len - 1;
  456. }
  457. }
  458. // If everything went okay, store value
  459. $values[] = $value;
  460. }
  461. // End of line
  462. if ($csv_finish
  463. || $ch == $csv_new_line
  464. || ($csv_new_line == 'auto' && ($ch == "\r" || $ch == "\n"))
  465. ) {
  466. if ($csv_new_line == 'auto' && $ch == "\r") { // Handle "\r\n"
  467. if ($i >= ($len - 2) && !$finished) {
  468. break; // We need more data to decide new line
  469. }
  470. if (mb_substr($buffer, $i + 1, 1) == "\n") {
  471. $i++;
  472. }
  473. }
  474. // We didn't parse value till the end of line, so there was
  475. // empty one
  476. if (!$csv_finish) {
  477. $values[] = '';
  478. }
  479. if ($this->_getAnalyze()) {
  480. foreach ($values as $val) {
  481. $tempRow[] = $val;
  482. ++$col_count;
  483. }
  484. if ($col_count > $max_cols) {
  485. $max_cols = $col_count;
  486. }
  487. $col_count = 0;
  488. $rows[] = $tempRow;
  489. $tempRow = array();
  490. } else {
  491. // Do we have correct count of values?
  492. if (count($values) != $required_fields) {
  493. // Hack for excel
  494. if ($values[count($values) - 1] == ';') {
  495. unset($values[count($values) - 1]);
  496. } else {
  497. $message = Message::error(
  498. __(
  499. 'Invalid column count in CSV input'
  500. . ' on line %d.'
  501. )
  502. );
  503. $message->addParam($line);
  504. $error = true;
  505. break;
  506. }
  507. }
  508. $first = true;
  509. $sql = $sql_template;
  510. foreach ($values as $key => $val) {
  511. if (!$first) {
  512. $sql .= ', ';
  513. }
  514. if ($val === null) {
  515. $sql .= 'NULL';
  516. } else {
  517. $sql .= '\''
  518. . $GLOBALS['dbi']->escapeString($val)
  519. . '\'';
  520. }
  521. $first = false;
  522. }
  523. $sql .= ')';
  524. if (isset($_POST['csv_replace'])) {
  525. $sql .= " ON DUPLICATE KEY UPDATE ";
  526. foreach ($fields as $field) {
  527. $fieldName = Util::backquote(
  528. $field['Field']
  529. );
  530. $sql .= $fieldName . " = VALUES(" . $fieldName
  531. . "), ";
  532. }
  533. $sql = rtrim($sql, ', ');
  534. }
  535. /**
  536. * @todo maybe we could add original line to verbose
  537. * SQL in comment
  538. */
  539. Import::runQuery($sql, $sql, $sql_data);
  540. }
  541. $line++;
  542. $csv_finish = false;
  543. $values = array();
  544. $buffer = mb_substr($buffer, $i + 1);
  545. $len = mb_strlen($buffer);
  546. $i = 0;
  547. $lasti = -1;
  548. $ch = mb_substr($buffer, 0, 1);
  549. }
  550. } // End of parser loop
  551. } // End of import loop
  552. if ($this->_getAnalyze()) {
  553. /* Fill out all rows */
  554. $num_rows = count($rows);
  555. for ($i = 0; $i < $num_rows; ++$i) {
  556. for ($j = count($rows[$i]); $j < $max_cols; ++$j) {
  557. $rows[$i][] = 'NULL';
  558. }
  559. }
  560. if (isset($_REQUEST['csv_col_names'])) {
  561. $col_names = array_splice($rows, 0, 1);
  562. $col_names = $col_names[0];
  563. // MySQL column names can't end with a space character.
  564. foreach ($col_names as $key => $col_name) {
  565. $col_names[$key] = rtrim($col_name);
  566. }
  567. }
  568. if ((isset($col_names) && count($col_names) != $max_cols)
  569. || !isset($col_names)
  570. ) {
  571. // Fill out column names
  572. for ($i = 0; $i < $max_cols; ++$i) {
  573. $col_names[] = 'COL ' . ($i + 1);
  574. }
  575. }
  576. if (mb_strlen($db)) {
  577. $result = $GLOBALS['dbi']->fetchResult('SHOW TABLES');
  578. $tbl_name = 'TABLE ' . (count($result) + 1);
  579. } else {
  580. $tbl_name = 'TBL_NAME';
  581. }
  582. $tables[] = array($tbl_name, $col_names, $rows);
  583. /* Obtain the best-fit MySQL types for each column */
  584. $analyses = array();
  585. $analyses[] = Import::analyzeTable($tables[0]);
  586. /**
  587. * string $db_name (no backquotes)
  588. *
  589. * array $table = array(table_name, array() column_names, array()() rows)
  590. * array $tables = array of "$table"s
  591. *
  592. * array $analysis = array(array() column_types, array() column_sizes)
  593. * array $analyses = array of "$analysis"s
  594. *
  595. * array $create = array of SQL strings
  596. *
  597. * array $options = an associative array of options
  598. */
  599. /* Set database name to the currently selected one, if applicable */
  600. list($db_name, $options) = $this->getDbnameAndOptions($db, 'CSV_DB');
  601. /* Non-applicable parameters */
  602. $create = null;
  603. /* Created and execute necessary SQL statements from data */
  604. Import::buildSql($db_name, $tables, $analyses, $create, $options, $sql_data);
  605. unset($tables);
  606. unset($analyses);
  607. }
  608. // Commit any possible data in buffers
  609. Import::runQuery('', '', $sql_data);
  610. if (count($values) != 0 && !$error) {
  611. $message = Message::error(
  612. __('Invalid format of CSV input on line %d.')
  613. );
  614. $message->addParam($line);
  615. $error = true;
  616. }
  617. }
  618. /**
  619. * Read the expected column_separated_with String of length
  620. * $csv_terminated_len from the $buffer
  621. * into variable $ch and return the read string $ch
  622. *
  623. * @param string $buffer The original string buffer read from
  624. * csv file
  625. * @param string $ch Partially read "column Separated with"
  626. * string, also used to return after
  627. * reading length equal $csv_terminated_len
  628. * @param int $i Current read counter of buffer string
  629. * @param int $csv_terminated_len The length of "column separated with"
  630. * String
  631. *
  632. * @return string
  633. */
  634. public function readCsvTerminatedString($buffer, $ch, $i, $csv_terminated_len)
  635. {
  636. for ($j = 0; $j < $csv_terminated_len - 1; $j++) {
  637. $i++;
  638. $ch .= mb_substr($buffer, $i, 1);
  639. }
  640. return $ch;
  641. }
  642. /* ~~~~~~~~~~~~~~~~~~~~ Getters and Setters ~~~~~~~~~~~~~~~~~~~~ */
  643. /**
  644. * Returns true if the table should be analyzed, false otherwise
  645. *
  646. * @return bool
  647. */
  648. private function _getAnalyze()
  649. {
  650. return $this->_analyze;
  651. }
  652. /**
  653. * Sets to true if the table should be analyzed, false otherwise
  654. *
  655. * @param bool $analyze status
  656. *
  657. * @return void
  658. */
  659. private function _setAnalyze($analyze)
  660. {
  661. $this->_analyze = $analyze;
  662. }
  663. }