Charsets.php 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. <?php
  2. /**
  3. * MySQL charset metadata and manipulations
  4. */
  5. declare(strict_types=1);
  6. namespace PhpMyAdmin;
  7. use PhpMyAdmin\Charsets\Charset;
  8. use PhpMyAdmin\Charsets\Collation;
  9. use function __;
  10. use function array_keys;
  11. use function count;
  12. use function explode;
  13. use function is_string;
  14. use function ksort;
  15. use const SORT_STRING;
  16. /**
  17. * Class used to manage MySQL charsets
  18. */
  19. class Charsets
  20. {
  21. /**
  22. * MySQL charsets map
  23. *
  24. * @var array<string, string>
  25. */
  26. public static $mysqlCharsetMap = [
  27. 'big5' => 'big5',
  28. 'cp-866' => 'cp866',
  29. 'euc-jp' => 'ujis',
  30. 'euc-kr' => 'euckr',
  31. 'gb2312' => 'gb2312',
  32. 'gbk' => 'gbk',
  33. 'iso-8859-1' => 'latin1',
  34. 'iso-8859-2' => 'latin2',
  35. 'iso-8859-7' => 'greek',
  36. 'iso-8859-8' => 'hebrew',
  37. 'iso-8859-8-i' => 'hebrew',
  38. 'iso-8859-9' => 'latin5',
  39. 'iso-8859-13' => 'latin7',
  40. 'iso-8859-15' => 'latin1',
  41. 'koi8-r' => 'koi8r',
  42. 'shift_jis' => 'sjis',
  43. 'tis-620' => 'tis620',
  44. 'utf-8' => 'utf8',
  45. 'windows-1250' => 'cp1250',
  46. 'windows-1251' => 'cp1251',
  47. 'windows-1252' => 'latin1',
  48. 'windows-1256' => 'cp1256',
  49. 'windows-1257' => 'cp1257',
  50. ];
  51. /**
  52. * The charset for the server
  53. *
  54. * @var Charset|null
  55. */
  56. private static $serverCharset = null;
  57. /** @var array<string, Charset> */
  58. private static $charsets = [];
  59. /** @var array<string, array<string, Collation>> */
  60. private static $collations = [];
  61. /**
  62. * Loads charset data from the server
  63. *
  64. * @param DatabaseInterface $dbi DatabaseInterface instance
  65. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  66. */
  67. private static function loadCharsets(DatabaseInterface $dbi, bool $disableIs): void
  68. {
  69. /* Data already loaded */
  70. if (count(self::$charsets) > 0) {
  71. return;
  72. }
  73. $sql = 'SELECT `CHARACTER_SET_NAME` AS `Charset`,'
  74. . ' `DEFAULT_COLLATE_NAME` AS `Default collation`,'
  75. . ' `DESCRIPTION` AS `Description`,'
  76. . ' `MAXLEN` AS `Maxlen`'
  77. . ' FROM `information_schema`.`CHARACTER_SETS`';
  78. if ($disableIs) {
  79. $sql = 'SHOW CHARACTER SET';
  80. }
  81. $res = $dbi->query($sql);
  82. self::$charsets = [];
  83. foreach ($res as $row) {
  84. self::$charsets[$row['Charset']] = Charset::fromServer($row);
  85. }
  86. ksort(self::$charsets, SORT_STRING);
  87. }
  88. /**
  89. * Loads collation data from the server
  90. *
  91. * @param DatabaseInterface $dbi DatabaseInterface instance
  92. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  93. */
  94. private static function loadCollations(DatabaseInterface $dbi, bool $disableIs): void
  95. {
  96. /* Data already loaded */
  97. if (count(self::$collations) > 0) {
  98. return;
  99. }
  100. if ($dbi->isMariaDB() && $dbi->getVersion() >= 101000) {
  101. /* Use query to accommodate new structure of MariaDB collations.
  102. Note, that SHOW COLLATION command is not applicable at the time of writing.
  103. Refer https://jira.mariadb.org/browse/MDEV-27009 */
  104. $sql = 'SELECT `collapp`.`FULL_COLLATION_NAME` AS `Collation`,'
  105. . ' `collapp`.`CHARACTER_SET_NAME` AS `Charset`,'
  106. . ' `collapp`.`ID` AS `Id`,'
  107. . ' `collapp`.`IS_DEFAULT` AS `Default`,'
  108. . ' `coll`.`IS_COMPILED` AS `Compiled`,'
  109. . ' `coll`.`SORTLEN` AS `Sortlen`'
  110. . ' FROM `information_schema`.`COLLATION_CHARACTER_SET_APPLICABILITY` `collapp`'
  111. . ' LEFT JOIN `information_schema`.`COLLATIONS` `coll`'
  112. . ' ON `collapp`.`COLLATION_NAME`=`coll`.`COLLATION_NAME`';
  113. } else {
  114. $sql = 'SELECT `COLLATION_NAME` AS `Collation`,'
  115. . ' `CHARACTER_SET_NAME` AS `Charset`,'
  116. . ' `ID` AS `Id`,'
  117. . ' `IS_DEFAULT` AS `Default`,'
  118. . ' `IS_COMPILED` AS `Compiled`,'
  119. . ' `SORTLEN` AS `Sortlen`'
  120. . ' FROM `information_schema`.`COLLATIONS`';
  121. if ($disableIs) {
  122. $sql = 'SHOW COLLATION';
  123. }
  124. }
  125. $res = $dbi->query($sql);
  126. self::$collations = [];
  127. foreach ($res as $row) {
  128. self::$collations[$row['Charset']][$row['Collation']] = Collation::fromServer($row);
  129. }
  130. foreach (array_keys(self::$collations) as $charset) {
  131. ksort(self::$collations[$charset], SORT_STRING);
  132. }
  133. }
  134. /**
  135. * Get current server charset
  136. *
  137. * @param DatabaseInterface $dbi DatabaseInterface instance
  138. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  139. */
  140. public static function getServerCharset(DatabaseInterface $dbi, bool $disableIs): Charset
  141. {
  142. if (self::$serverCharset !== null) {
  143. return self::$serverCharset;
  144. }
  145. self::loadCharsets($dbi, $disableIs);
  146. $serverCharset = $dbi->getVariable('character_set_server');
  147. if (! is_string($serverCharset)) {// MySQL 5.7.8 fallback, issue #15614
  148. $serverCharset = $dbi->fetchValue('SELECT @@character_set_server;');
  149. }
  150. self::$serverCharset = self::$charsets[$serverCharset] ?? null;
  151. // MySQL 8.0.11+ fallback, issue #16931
  152. if (self::$serverCharset === null && $serverCharset === 'utf8mb3') {
  153. // See: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-11.html#mysqld-8-0-11-charset
  154. // The utf8mb3 character set will be replaced by utf8mb4 in a future MySQL version.
  155. // The utf8 character set is currently an alias for utf8mb3,
  156. // but will at that point become a reference to utf8mb4.
  157. // To avoid ambiguity about the meaning of utf8,
  158. // consider specifying utf8mb4 explicitly for character set references instead of utf8.
  159. // Warning: #3719 'utf8' is currently an alias for the character set UTF8MB3 [...]
  160. return self::$charsets['utf8'];
  161. }
  162. if (self::$serverCharset === null) {// Fallback in case nothing is found
  163. return Charset::fromServer(
  164. [
  165. 'Charset' => __('Unknown'),
  166. 'Description' => __('Unknown'),
  167. ]
  168. );
  169. }
  170. return self::$serverCharset;
  171. }
  172. /**
  173. * Get all server charsets
  174. *
  175. * @param DatabaseInterface $dbi DatabaseInterface instance
  176. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  177. *
  178. * @return array<string, Charset>
  179. */
  180. public static function getCharsets(DatabaseInterface $dbi, bool $disableIs): array
  181. {
  182. self::loadCharsets($dbi, $disableIs);
  183. return self::$charsets;
  184. }
  185. /**
  186. * Get all server collations
  187. *
  188. * @param DatabaseInterface $dbi DatabaseInterface instance
  189. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  190. *
  191. * @return array<string, array<string, Collation>>
  192. */
  193. public static function getCollations(DatabaseInterface $dbi, bool $disableIs): array
  194. {
  195. self::loadCollations($dbi, $disableIs);
  196. return self::$collations;
  197. }
  198. /**
  199. * @param DatabaseInterface $dbi DatabaseInterface instance
  200. * @param bool $disableIs Disable use of INFORMATION_SCHEMA
  201. * @param string|null $name Collation name
  202. */
  203. public static function findCollationByName(DatabaseInterface $dbi, bool $disableIs, ?string $name): ?Collation
  204. {
  205. $charset = explode('_', $name ?? '')[0];
  206. $collations = self::getCollations($dbi, $disableIs);
  207. return $collations[$charset][$name] ?? null;
  208. }
  209. }