Encoding.php 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. <?php
  2. declare(strict_types=1);
  3. namespace PhpMyAdmin;
  4. use function array_filter;
  5. use function array_intersect;
  6. use function array_map;
  7. use function explode;
  8. use function fclose;
  9. use function feof;
  10. use function fgets;
  11. use function fopen;
  12. use function function_exists;
  13. use function fwrite;
  14. use function iconv;
  15. use function is_string;
  16. use function mb_convert_encoding;
  17. use function mb_convert_kana;
  18. use function mb_detect_encoding;
  19. use function mb_list_encodings;
  20. use function preg_replace;
  21. use function recode_string;
  22. use function str_contains;
  23. use function str_starts_with;
  24. use function strtoupper;
  25. use function tempnam;
  26. use function unlink;
  27. /**
  28. * Encoding conversion helper class
  29. */
  30. class Encoding
  31. {
  32. /**
  33. * None encoding conversion engine
  34. */
  35. public const ENGINE_NONE = 0;
  36. /**
  37. * iconv encoding conversion engine
  38. */
  39. public const ENGINE_ICONV = 1;
  40. /**
  41. * recode encoding conversion engine
  42. */
  43. public const ENGINE_RECODE = 2;
  44. /**
  45. * mbstring encoding conversion engine
  46. */
  47. public const ENGINE_MB = 3;
  48. /**
  49. * Chosen encoding engine
  50. *
  51. * @var int
  52. */
  53. private static $engine = null;
  54. /**
  55. * Map of conversion engine configurations
  56. *
  57. * Each entry contains:
  58. *
  59. * - function to detect
  60. * - engine contant
  61. * - extension name to warn when missing
  62. *
  63. * @var array
  64. */
  65. private static $enginemap = [
  66. 'iconv' => [
  67. 'iconv',
  68. self::ENGINE_ICONV,
  69. 'iconv',
  70. ],
  71. 'recode' => [
  72. 'recode_string',
  73. self::ENGINE_RECODE,
  74. 'recode',
  75. ],
  76. 'mb' => [
  77. 'mb_convert_encoding',
  78. self::ENGINE_MB,
  79. 'mbstring',
  80. ],
  81. 'none' => [
  82. 'isset',
  83. self::ENGINE_NONE,
  84. '',
  85. ],
  86. ];
  87. /**
  88. * Order of automatic detection of engines
  89. *
  90. * @var array
  91. */
  92. private static $engineorder = [
  93. 'iconv',
  94. 'mb',
  95. 'recode',
  96. ];
  97. /**
  98. * Kanji encodings list
  99. *
  100. * @var string
  101. */
  102. private static $kanjiEncodings = 'ASCII,SJIS,EUC-JP,JIS';
  103. /**
  104. * Initializes encoding engine detecting available backends.
  105. */
  106. public static function initEngine(): void
  107. {
  108. $engine = 'auto';
  109. if (isset($GLOBALS['cfg']['RecodingEngine'])) {
  110. $engine = $GLOBALS['cfg']['RecodingEngine'];
  111. }
  112. /* Use user configuration */
  113. if (isset(self::$enginemap[$engine])) {
  114. if (function_exists(self::$enginemap[$engine][0])) {
  115. self::$engine = self::$enginemap[$engine][1];
  116. return;
  117. }
  118. Core::warnMissingExtension(self::$enginemap[$engine][2]);
  119. }
  120. /* Autodetection */
  121. foreach (self::$engineorder as $engine) {
  122. if (function_exists(self::$enginemap[$engine][0])) {
  123. self::$engine = self::$enginemap[$engine][1];
  124. return;
  125. }
  126. }
  127. /* Fallback to none conversion */
  128. self::$engine = self::ENGINE_NONE;
  129. }
  130. /**
  131. * Setter for engine. Use with caution, mostly useful for testing.
  132. *
  133. * @param int $engine Engine encoding
  134. */
  135. public static function setEngine(int $engine): void
  136. {
  137. self::$engine = $engine;
  138. }
  139. /**
  140. * Checks whether there is any charset conversion supported
  141. */
  142. public static function isSupported(): bool
  143. {
  144. if (self::$engine === null) {
  145. self::initEngine();
  146. }
  147. return self::$engine != self::ENGINE_NONE;
  148. }
  149. /**
  150. * Converts encoding of text according to parameters with detected
  151. * conversion function.
  152. *
  153. * @param string $src_charset source charset
  154. * @param string $dest_charset target charset
  155. * @param string $what what to convert
  156. *
  157. * @return string converted text
  158. */
  159. public static function convertString(
  160. string $src_charset,
  161. string $dest_charset,
  162. string $what
  163. ): string {
  164. if ($src_charset == $dest_charset) {
  165. return $what;
  166. }
  167. if (self::$engine === null) {
  168. self::initEngine();
  169. }
  170. switch (self::$engine) {
  171. case self::ENGINE_RECODE:
  172. return recode_string($src_charset . '..' . $dest_charset, $what);
  173. case self::ENGINE_ICONV:
  174. $iconvExtraParams = '';
  175. if (
  176. isset($GLOBALS['cfg']['IconvExtraParams'])
  177. && is_string($GLOBALS['cfg']['IconvExtraParams'])
  178. && str_starts_with($GLOBALS['cfg']['IconvExtraParams'], '//')
  179. ) {
  180. $iconvExtraParams = $GLOBALS['cfg']['IconvExtraParams'];
  181. }
  182. return iconv($src_charset, $dest_charset . $iconvExtraParams, $what);
  183. case self::ENGINE_MB:
  184. return mb_convert_encoding($what, $dest_charset, $src_charset);
  185. default:
  186. return $what;
  187. }
  188. }
  189. /**
  190. * Detects whether Kanji encoding is available
  191. */
  192. public static function canConvertKanji(): bool
  193. {
  194. return $GLOBALS['lang'] === 'ja';
  195. }
  196. /**
  197. * Setter for Kanji encodings. Use with caution, mostly useful for testing.
  198. */
  199. public static function getKanjiEncodings(): string
  200. {
  201. return self::$kanjiEncodings;
  202. }
  203. /**
  204. * Setter for Kanji encodings. Use with caution, mostly useful for testing.
  205. *
  206. * @param string $value Kanji encodings list
  207. */
  208. public static function setKanjiEncodings(string $value): void
  209. {
  210. self::$kanjiEncodings = $value;
  211. }
  212. /**
  213. * Reverses SJIS & EUC-JP position in the encoding codes list
  214. */
  215. public static function kanjiChangeOrder(): void
  216. {
  217. $parts = explode(',', self::$kanjiEncodings);
  218. if ($parts[1] === 'EUC-JP') {
  219. self::$kanjiEncodings = 'ASCII,SJIS,EUC-JP,JIS';
  220. return;
  221. }
  222. self::$kanjiEncodings = 'ASCII,EUC-JP,SJIS,JIS';
  223. }
  224. /**
  225. * Kanji string encoding convert
  226. *
  227. * @param string $str the string to convert
  228. * @param string $enc the destination encoding code
  229. * @param string $kana set 'kana' convert to JIS-X208-kana
  230. *
  231. * @return string the converted string
  232. */
  233. public static function kanjiStrConv(string $str, string $enc, string $kana): string
  234. {
  235. if ($enc == '' && $kana == '') {
  236. return $str;
  237. }
  238. $string_encoding = mb_detect_encoding($str, self::$kanjiEncodings);
  239. if ($string_encoding === false) {
  240. $string_encoding = 'utf-8';
  241. }
  242. if ($kana === 'kana') {
  243. $dist = mb_convert_kana($str, 'KV', $string_encoding);
  244. $str = $dist;
  245. }
  246. if ($string_encoding != $enc && $enc != '') {
  247. return mb_convert_encoding($str, $enc, $string_encoding);
  248. }
  249. return $str;
  250. }
  251. /**
  252. * Kanji file encoding convert
  253. *
  254. * @param string $file the name of the file to convert
  255. * @param string $enc the destination encoding code
  256. * @param string $kana set 'kana' convert to JIS-X208-kana
  257. *
  258. * @return string the name of the converted file
  259. */
  260. public static function kanjiFileConv(string $file, string $enc, string $kana): string
  261. {
  262. if ($enc == '' && $kana == '') {
  263. return $file;
  264. }
  265. $tmpfname = (string) tempnam($GLOBALS['config']->getUploadTempDir(), $enc);
  266. $fpd = fopen($tmpfname, 'wb');
  267. if ($fpd === false) {
  268. return $file;
  269. }
  270. $fps = fopen($file, 'r');
  271. if ($fps === false) {
  272. return $file;
  273. }
  274. self::kanjiChangeOrder();
  275. while (! feof($fps)) {
  276. $line = fgets($fps, 4096);
  277. if ($line === false) {
  278. continue;
  279. }
  280. $dist = self::kanjiStrConv($line, $enc, $kana);
  281. fwrite($fpd, $dist);
  282. }
  283. self::kanjiChangeOrder();
  284. fclose($fps);
  285. fclose($fpd);
  286. unlink($file);
  287. return $tmpfname;
  288. }
  289. /**
  290. * Defines radio form fields to switch between encoding modes
  291. *
  292. * @return string HTML code for the radio controls
  293. */
  294. public static function kanjiEncodingForm(): string
  295. {
  296. $template = new Template();
  297. return $template->render('encoding/kanji_encoding_form');
  298. }
  299. /**
  300. * Lists available encodings.
  301. *
  302. * @return array
  303. */
  304. public static function listEncodings(): array
  305. {
  306. if (self::$engine === null) {
  307. self::initEngine();
  308. }
  309. /* Most engines do not support listing */
  310. if (self::$engine != self::ENGINE_MB) {
  311. return array_filter($GLOBALS['cfg']['AvailableCharsets'], static function (string $charset): bool {
  312. // Removes any ignored character
  313. $normalizedCharset = strtoupper((string) preg_replace(['/[^A-Za-z0-9\-\/]/'], '', $charset));
  314. // The character set ISO-2022-CN-EXT can be vulnerable (CVE-2024-2961).
  315. return ! str_contains($normalizedCharset, 'ISO-2022-CN-EXT')
  316. && ! str_contains($normalizedCharset, 'ISO2022CNEXT');
  317. });
  318. }
  319. return array_intersect(
  320. array_map('strtolower', mb_list_encodings()),
  321. $GLOBALS['cfg']['AvailableCharsets']
  322. );
  323. }
  324. }