Charsets.php 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704
  1. <?php
  2. /* vim: set expandtab sw=4 ts=4 sts=4: */
  3. /**
  4. * MySQL charset metadata and manipulations
  5. *
  6. * @package PhpMyAdmin
  7. */
  8. namespace PhpMyAdmin;
  9. use PhpMyAdmin\DatabaseInterface;
  10. use PhpMyAdmin\Util;
  11. /**
  12. * Class used to manage MySQL charsets
  13. *
  14. * @package PhpMyAdmin
  15. */
  16. class Charsets
  17. {
  18. /**
  19. * MySQL charsets map
  20. *
  21. * @var array
  22. */
  23. public static $mysql_charset_map = array(
  24. 'big5' => 'big5',
  25. 'cp-866' => 'cp866',
  26. 'euc-jp' => 'ujis',
  27. 'euc-kr' => 'euckr',
  28. 'gb2312' => 'gb2312',
  29. 'gbk' => 'gbk',
  30. 'iso-8859-1' => 'latin1',
  31. 'iso-8859-2' => 'latin2',
  32. 'iso-8859-7' => 'greek',
  33. 'iso-8859-8' => 'hebrew',
  34. 'iso-8859-8-i' => 'hebrew',
  35. 'iso-8859-9' => 'latin5',
  36. 'iso-8859-13' => 'latin7',
  37. 'iso-8859-15' => 'latin1',
  38. 'koi8-r' => 'koi8r',
  39. 'shift_jis' => 'sjis',
  40. 'tis-620' => 'tis620',
  41. 'utf-8' => 'utf8',
  42. 'windows-1250' => 'cp1250',
  43. 'windows-1251' => 'cp1251',
  44. 'windows-1252' => 'latin1',
  45. 'windows-1256' => 'cp1256',
  46. 'windows-1257' => 'cp1257',
  47. );
  48. private static $_charsets = array();
  49. /**
  50. * The charset for the server
  51. *
  52. * @var string
  53. */
  54. private static $_charset_server;
  55. private static $_charsets_descriptions = array();
  56. private static $_collations = array();
  57. private static $_default_collations = array();
  58. /**
  59. * Loads charset data from the MySQL server.
  60. *
  61. * @param DatabaseInterface $dbi DatabaseInterface instance
  62. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  63. *
  64. * @return void
  65. */
  66. private static function loadCharsets(DatabaseInterface $dbi, $disableIs)
  67. {
  68. /* Data already loaded */
  69. if (count(self::$_charsets) > 0) {
  70. return;
  71. }
  72. if ($disableIs) {
  73. $sql = 'SHOW CHARACTER SET';
  74. } else {
  75. $sql = 'SELECT `CHARACTER_SET_NAME` AS `Charset`,'
  76. . ' `DESCRIPTION` AS `Description`'
  77. . ' FROM `information_schema`.`CHARACTER_SETS`';
  78. }
  79. $res = $dbi->query($sql);
  80. self::$_charsets = array();
  81. while ($row = $dbi->fetchAssoc($res)) {
  82. $name = $row['Charset'];
  83. self::$_charsets[] = $name;
  84. self::$_charsets_descriptions[$name] = $row['Description'];
  85. }
  86. $dbi->freeResult($res);
  87. sort(self::$_charsets, SORT_STRING);
  88. }
  89. /**
  90. * Loads collation data from the MySQL server.
  91. *
  92. * @param DatabaseInterface $dbi DatabaseInterface instance
  93. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  94. *
  95. * @return void
  96. */
  97. private static function loadCollations(DatabaseInterface $dbi, $disableIs)
  98. {
  99. /* Data already loaded */
  100. if (count(self::$_collations) > 0) {
  101. return;
  102. }
  103. if ($disableIs) {
  104. $sql = 'SHOW COLLATION';
  105. } else {
  106. $sql = 'SELECT `CHARACTER_SET_NAME` AS `Charset`,'
  107. . ' `COLLATION_NAME` AS `Collation`, `IS_DEFAULT` AS `Default`'
  108. . ' FROM `information_schema`.`COLLATIONS`';
  109. }
  110. $res = $dbi->query($sql);
  111. while ($row = $dbi->fetchAssoc($res)) {
  112. $char_set_name = $row['Charset'];
  113. $name = $row['Collation'];
  114. self::$_collations[$char_set_name][] = $name;
  115. if ($row['Default'] == 'Yes' || $row['Default'] == '1') {
  116. self::$_default_collations[$char_set_name] = $name;
  117. }
  118. }
  119. $dbi->freeResult($res);
  120. foreach (self::$_collations as $key => $value) {
  121. sort(self::$_collations[$key], SORT_STRING);
  122. }
  123. }
  124. /**
  125. * Get current MySQL server charset.
  126. *
  127. * @param DatabaseInterface $dbi DatabaseInterface instance
  128. *
  129. * @return string
  130. */
  131. public static function getServerCharset(DatabaseInterface $dbi)
  132. {
  133. if (self::$_charset_server) {
  134. return self::$_charset_server;
  135. } else {
  136. $charsetServer = $dbi->getVariable('character_set_server');
  137. if (! is_string($charsetServer)) {// MySQL 5.7.8 fallback, issue #15614
  138. $charsetServer = $dbi->fetchValue("SELECT @@character_set_server;");
  139. }
  140. self::$_charset_server = $charsetServer;
  141. return self::$_charset_server;
  142. }
  143. }
  144. /**
  145. * Get MySQL charsets
  146. *
  147. * @param DatabaseInterface $dbi DatabaseInterface instance
  148. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  149. *
  150. * @return array
  151. */
  152. public static function getMySQLCharsets(DatabaseInterface $dbi, $disableIs)
  153. {
  154. self::loadCharsets($dbi, $disableIs);
  155. return self::$_charsets;
  156. }
  157. /**
  158. * Get MySQL charsets descriptions
  159. *
  160. * @param DatabaseInterface $dbi DatabaseInterface instance
  161. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  162. *
  163. * @return array
  164. */
  165. public static function getMySQLCharsetsDescriptions(DatabaseInterface $dbi, $disableIs)
  166. {
  167. self::loadCharsets($dbi, $disableIs);
  168. return self::$_charsets_descriptions;
  169. }
  170. /**
  171. * Get MySQL collations
  172. *
  173. * @param DatabaseInterface $dbi DatabaseInterface instance
  174. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  175. *
  176. * @return array
  177. */
  178. public static function getMySQLCollations(DatabaseInterface $dbi, $disableIs)
  179. {
  180. self::loadCollations($dbi, $disableIs);
  181. return self::$_collations;
  182. }
  183. /**
  184. * Get MySQL default collations
  185. *
  186. * @param DatabaseInterface $dbi DatabaseInterface instance
  187. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  188. *
  189. * @return array
  190. */
  191. public static function getMySQLCollationsDefault(DatabaseInterface $dbi, $disableIs)
  192. {
  193. self::loadCollations($dbi, $disableIs);
  194. return self::$_default_collations;
  195. }
  196. /**
  197. * Generate charset dropdown box
  198. *
  199. * @param DatabaseInterface $dbi DatabaseInterface instance
  200. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  201. * @param string $name Element name
  202. * @param string $id Element id
  203. * @param null|string $default Default value
  204. * @param bool $label Label
  205. * @param bool $submitOnChange Submit on change
  206. *
  207. * @return string
  208. */
  209. public static function getCharsetDropdownBox(
  210. DatabaseInterface $dbi,
  211. $disableIs,
  212. $name = null,
  213. $id = null,
  214. $default = null,
  215. $label = true,
  216. $submitOnChange = false
  217. ) {
  218. self::loadCharsets($dbi, $disableIs);
  219. if (empty($name)) {
  220. $name = 'character_set';
  221. }
  222. $return_str = '<select lang="en" dir="ltr" name="'
  223. . htmlspecialchars($name) . '"'
  224. . (empty($id) ? '' : ' id="' . htmlspecialchars($id) . '"')
  225. . ($submitOnChange ? ' class="autosubmit"' : '') . '>' . "\n";
  226. if ($label) {
  227. $return_str .= '<option value="">'
  228. . __('Charset')
  229. . '</option>' . "\n";
  230. }
  231. $return_str .= '<option value=""></option>' . "\n";
  232. foreach (self::$_charsets as $current_charset) {
  233. $current_cs_descr
  234. = empty(self::$_charsets_descriptions[$current_charset])
  235. ? $current_charset
  236. : self::$_charsets_descriptions[$current_charset];
  237. $return_str .= '<option value="' . $current_charset
  238. . '" title="' . $current_cs_descr . '"'
  239. . ($default == $current_charset ? ' selected="selected"' : '') . '>'
  240. . $current_charset . '</option>' . "\n";
  241. }
  242. $return_str .= '</select>' . "\n";
  243. return $return_str;
  244. }
  245. /**
  246. * Generate collation dropdown box
  247. *
  248. * @param DatabaseInterface $dbi DatabaseInterface instance
  249. * @param boolean $disableIs Disable use of INFORMATION_SCHEMA
  250. * @param string $name Element name
  251. * @param string $id Element id
  252. * @param null|string $default Default value
  253. * @param bool $label Label
  254. * @param bool $submitOnChange Submit on change
  255. *
  256. * @return string
  257. */
  258. public static function getCollationDropdownBox(
  259. DatabaseInterface $dbi,
  260. $disableIs,
  261. $name = null,
  262. $id = null,
  263. $default = null,
  264. $label = true,
  265. $submitOnChange = false
  266. ) {
  267. self::loadCharsets($dbi, $disableIs);
  268. self::loadCollations($dbi, $disableIs);
  269. if (empty($name)) {
  270. $name = 'collation';
  271. }
  272. $return_str = '<select lang="en" dir="ltr" name="'
  273. . htmlspecialchars($name) . '"'
  274. . (empty($id) ? '' : ' id="' . htmlspecialchars($id) . '"')
  275. . ($submitOnChange ? ' class="autosubmit"' : '') . '>' . "\n";
  276. if ($label) {
  277. $return_str .= '<option value="">'
  278. . __('Collation')
  279. . '</option>' . "\n";
  280. }
  281. $return_str .= '<option value=""></option>' . "\n";
  282. foreach (self::$_charsets as $current_charset) {
  283. $current_cs_descr
  284. = empty(self::$_charsets_descriptions[$current_charset])
  285. ? $current_charset
  286. : self::$_charsets_descriptions[$current_charset];
  287. $return_str .= '<optgroup label="' . $current_charset
  288. . '" title="' . $current_cs_descr . '">' . "\n";
  289. foreach (self::$_collations[$current_charset] as $current_collation) {
  290. $return_str .= '<option value="' . $current_collation
  291. . '" title="' . self::getCollationDescr($current_collation) . '"'
  292. . ($default == $current_collation ? ' selected="selected"' : '')
  293. . '>'
  294. . $current_collation . '</option>' . "\n";
  295. }
  296. $return_str .= '</optgroup>' . "\n";
  297. }
  298. $return_str .= '</select>' . "\n";
  299. return $return_str;
  300. }
  301. /**
  302. * Returns description for given collation
  303. *
  304. * @param string $collation MySQL collation string
  305. *
  306. * @return string collation description
  307. */
  308. public static function getCollationDescr($collation)
  309. {
  310. $parts = explode('_', $collation);
  311. $name = __('Unknown');
  312. $variant = null;
  313. $suffixes = array();
  314. $unicode = false;
  315. $unknown = false;
  316. $level = 0;
  317. foreach ($parts as $part) {
  318. if ($level == 0) {
  319. /* Next will be language */
  320. $level = 1;
  321. /* First should be charset */
  322. switch ($part) {
  323. case 'binary':
  324. $name = _pgettext('Collation', 'Binary');
  325. break;
  326. // Unicode charsets
  327. case 'utf8mb4':
  328. $variant = 'UCA 4.0.0';
  329. // Fall through to other unicode
  330. case 'ucs2':
  331. case 'utf8':
  332. case 'utf16':
  333. case 'utf16le':
  334. case 'utf16be':
  335. case 'utf32':
  336. $name = _pgettext('Collation', 'Unicode');
  337. $unicode = true;
  338. break;
  339. // West European charsets
  340. case 'ascii':
  341. case 'cp850':
  342. case 'dec8':
  343. case 'hp8':
  344. case 'latin1':
  345. case 'macroman':
  346. $name = _pgettext('Collation', 'West European');
  347. break;
  348. // Central European charsets
  349. case 'cp1250':
  350. case 'cp852':
  351. case 'latin2':
  352. case 'macce':
  353. $name = _pgettext('Collation', 'Central European');
  354. break;
  355. // Russian charsets
  356. case 'cp866':
  357. case 'koi8r':
  358. $name = _pgettext('Collation', 'Russian');
  359. break;
  360. // Chinese charsets
  361. case 'gb2312':
  362. case 'gbk':
  363. $name = _pgettext('Collation', 'Simplified Chinese');
  364. break;
  365. case 'big5':
  366. $name = _pgettext('Collation', 'Traditional Chinese');
  367. break;
  368. case 'gb18030':
  369. $name = _pgettext('Collation', 'Chinese');
  370. $unicode = true;
  371. break;
  372. // Japanese charsets
  373. case 'sjis':
  374. case 'ujis':
  375. case 'cp932':
  376. case 'eucjpms':
  377. $name = _pgettext('Collation', 'Japanese');
  378. break;
  379. // Baltic charsets
  380. case 'cp1257':
  381. case 'latin7':
  382. $name = _pgettext('Collation', 'Baltic');
  383. break;
  384. // Other
  385. case 'armscii8':
  386. case 'armscii':
  387. $name = _pgettext('Collation', 'Armenian');
  388. break;
  389. case 'cp1251':
  390. $name = _pgettext('Collation', 'Cyrillic');
  391. break;
  392. case 'cp1256':
  393. $name = _pgettext('Collation', 'Arabic');
  394. break;
  395. case 'euckr':
  396. $name = _pgettext('Collation', 'Korean');
  397. break;
  398. case 'hebrew':
  399. $name = _pgettext('Collation', 'Hebrew');
  400. break;
  401. case 'geostd8':
  402. $name = _pgettext('Collation', 'Georgian');
  403. break;
  404. case 'greek':
  405. $name = _pgettext('Collation', 'Greek');
  406. break;
  407. case 'keybcs2':
  408. $name = _pgettext('Collation', 'Czech-Slovak');
  409. break;
  410. case 'koi8u':
  411. $name = _pgettext('Collation', 'Ukrainian');
  412. break;
  413. case 'latin5':
  414. $name = _pgettext('Collation', 'Turkish');
  415. break;
  416. case 'swe7':
  417. $name = _pgettext('Collation', 'Swedish');
  418. break;
  419. case 'tis620':
  420. $name = _pgettext('Collation', 'Thai');
  421. break;
  422. default:
  423. $name = _pgettext('Collation', 'Unknown');
  424. $unknown = true;
  425. break;
  426. }
  427. continue;
  428. }
  429. if ($level == 1) {
  430. /* Next will be variant unless changed later */
  431. $level = 4;
  432. /* Locale name or code */
  433. $found = true;
  434. switch ($part) {
  435. case 'general':
  436. break;
  437. case 'bulgarian':
  438. case 'bg':
  439. $name = _pgettext('Collation', 'Bulgarian');
  440. break;
  441. case 'chinese':
  442. case 'cn':
  443. case 'zh':
  444. if ($unicode) {
  445. $name = _pgettext('Collation', 'Chinese');
  446. }
  447. break;
  448. case 'croatian':
  449. case 'hr':
  450. $name = _pgettext('Collation', 'Croatian');
  451. break;
  452. case 'czech':
  453. case 'cs':
  454. $name = _pgettext('Collation', 'Czech');
  455. break;
  456. case 'danish':
  457. case 'da':
  458. $name = _pgettext('Collation', 'Danish');
  459. break;
  460. case 'english':
  461. case 'en':
  462. $name = _pgettext('Collation', 'English');
  463. break;
  464. case 'esperanto':
  465. case 'eo':
  466. $name = _pgettext('Collation', 'Esperanto');
  467. break;
  468. case 'estonian':
  469. case 'et':
  470. $name = _pgettext('Collation', 'Estonian');
  471. break;
  472. case 'german1':
  473. $name = _pgettext('Collation', 'German (dictionary order)');
  474. break;
  475. case 'german2':
  476. $name = _pgettext('Collation', 'German (phone book order)');
  477. break;
  478. case 'german':
  479. case 'de':
  480. /* Name is set later */
  481. $level = 2;
  482. break;
  483. case 'hungarian':
  484. case 'hu':
  485. $name = _pgettext('Collation', 'Hungarian');
  486. break;
  487. case 'icelandic':
  488. case 'is':
  489. $name = _pgettext('Collation', 'Icelandic');
  490. break;
  491. case 'japanese':
  492. case 'ja':
  493. $name = _pgettext('Collation', 'Japanese');
  494. break;
  495. case 'la':
  496. $name = _pgettext('Collation', 'Classical Latin');
  497. break;
  498. case 'latvian':
  499. case 'lv':
  500. $name = _pgettext('Collation', 'Latvian');
  501. break;
  502. case 'lithuanian':
  503. case 'lt':
  504. $name = _pgettext('Collation', 'Lithuanian');
  505. break;
  506. case 'korean':
  507. case 'ko':
  508. $name = _pgettext('Collation', 'Korean');
  509. break;
  510. case 'myanmar':
  511. case 'my':
  512. $name = _pgettext('Collation', 'Burmese');
  513. break;
  514. case 'persian':
  515. $name = _pgettext('Collation', 'Persian');
  516. break;
  517. case 'polish':
  518. case 'pl':
  519. $name = _pgettext('Collation', 'Polish');
  520. break;
  521. case 'roman':
  522. $name = _pgettext('Collation', 'West European');
  523. break;
  524. case 'romanian':
  525. case 'ro':
  526. $name = _pgettext('Collation', 'Romanian');
  527. break;
  528. case 'ru':
  529. $name = _pgettext('Collation', 'Russian');
  530. break;
  531. case 'si':
  532. case 'sinhala':
  533. $name = _pgettext('Collation', 'Sinhalese');
  534. break;
  535. case 'slovak':
  536. case 'sk':
  537. $name = _pgettext('Collation', 'Slovak');
  538. break;
  539. case 'slovenian':
  540. case 'sl':
  541. $name = _pgettext('Collation', 'Slovenian');
  542. break;
  543. case 'spanish':
  544. $name = _pgettext('Collation', 'Spanish (modern)');
  545. break;
  546. case 'es':
  547. /* Name is set later */
  548. $level = 3;
  549. break;
  550. case 'spanish2':
  551. $name = _pgettext('Collation', 'Spanish (traditional)');
  552. break;
  553. case 'swedish':
  554. case 'sv':
  555. $name = _pgettext('Collation', 'Swedish');
  556. break;
  557. case 'thai':
  558. case 'th':
  559. $name = _pgettext('Collation', 'Thai');
  560. break;
  561. case 'turkish':
  562. case 'tr':
  563. $name = _pgettext('Collation', 'Turkish');
  564. break;
  565. case 'ukrainian':
  566. case 'uk':
  567. $name = _pgettext('Collation', 'Ukrainian');
  568. break;
  569. case 'vietnamese':
  570. case 'vi':
  571. $name = _pgettext('Collation', 'Vietnamese');
  572. break;
  573. case 'unicode':
  574. if ($unknown) {
  575. $name = _pgettext('Collation', 'Unicode');
  576. }
  577. break;
  578. default:
  579. $found = false;
  580. }
  581. if ($found) {
  582. continue;
  583. }
  584. // Not parsed token, fall to next level
  585. }
  586. if ($level == 2) {
  587. /* Next will be variant */
  588. $level = 4;
  589. /* Germal variant */
  590. if ($part == 'pb') {
  591. $name = _pgettext('Collation', 'German (phone book order)');
  592. continue;
  593. }
  594. $name = _pgettext('Collation', 'German (dictionary order)');
  595. // Not parsed token, fall to next level
  596. }
  597. if ($level == 3) {
  598. /* Next will be variant */
  599. $level = 4;
  600. /* Spanish variant */
  601. if ($part == 'trad') {
  602. $name = _pgettext('Collation', 'Spanish (traditional)');
  603. continue;
  604. }
  605. $name = _pgettext('Collation', 'Spanish (modern)');
  606. // Not parsed token, fall to next level
  607. }
  608. if ($level == 4) {
  609. /* Next will be suffix */
  610. $level = 5;
  611. /* Variant */
  612. $found = true;
  613. switch ($part) {
  614. case '0900':
  615. $variant = 'UCA 9.0.0';
  616. break;
  617. case '520':
  618. $variant = 'UCA 5.2.0';
  619. break;
  620. case 'mysql561':
  621. $variant = 'MySQL 5.6.1';
  622. break;
  623. case 'mysql500':
  624. $variant = 'MySQL 5.0.0';
  625. break;
  626. default:
  627. $found = false;
  628. }
  629. if ($found) {
  630. continue;
  631. }
  632. // Not parsed token, fall to next level
  633. }
  634. if ($level == 5) {
  635. /* Suffixes */
  636. switch ($part) {
  637. case 'ci':
  638. $suffixes[] = _pgettext('Collation variant', 'case-insensitive');
  639. break;
  640. case 'cs':
  641. $suffixes[] = _pgettext('Collation variant', 'case-sensitive');
  642. break;
  643. case 'ai':
  644. $suffixes[] = _pgettext('Collation variant', 'accent-insensitive');
  645. break;
  646. case 'as':
  647. $suffixes[] = _pgettext('Collation variant', 'accent-sensitive');
  648. break;
  649. case 'ks':
  650. $suffixes[] = _pgettext('Collation variant', 'kana-sensitive');
  651. break;
  652. case 'w2':
  653. case 'l2':
  654. $suffixes[] = _pgettext('Collation variant', 'multi-level');
  655. break;
  656. case 'bin':
  657. $suffixes[] = _pgettext('Collation variant', 'binary');
  658. break;
  659. case 'nopad':
  660. $suffixes[] = _pgettext('Collation variant', 'no-pad');
  661. break;
  662. }
  663. }
  664. }
  665. $result = $name;
  666. if (! is_null($variant)) {
  667. $result .= ' (' . $variant . ')';
  668. }
  669. if (count($suffixes) > 0) {
  670. $result .= ', ' . implode(', ', $suffixes);
  671. }
  672. return $result;
  673. }
  674. }