generate-identifier-regex.js 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. "use strict";
  2. // Always use the latest available version of Unicode!
  3. // https://tc39.github.io/ecma262/#sec-conformance
  4. const version = "15.1.0";
  5. const start = require(
  6. "@unicode/unicode-" + version + "/Binary_Property/ID_Start/code-points.js"
  7. ).filter(function (ch) {
  8. return ch > 0x7f;
  9. });
  10. let last = -1;
  11. const cont = require(
  12. "@unicode/unicode-" + version + "/Binary_Property/ID_Continue/code-points.js"
  13. ).filter(function (ch) {
  14. return ch > 0x7f && search(start, ch, last + 1) == -1;
  15. });
  16. function search(arr, ch, starting) {
  17. for (let i = starting; arr[i] <= ch && i < arr.length; last = i++) {
  18. if (arr[i] === ch) return i;
  19. }
  20. return -1;
  21. }
  22. function pad(str, width) {
  23. while (str.length < width) str = "0" + str;
  24. return str;
  25. }
  26. function esc(code) {
  27. const hex = code.toString(16);
  28. if (hex.length <= 2) return "\\x" + pad(hex, 2);
  29. else return "\\u" + pad(hex, 4);
  30. }
  31. function generate(chars) {
  32. const astral = [];
  33. let re = "";
  34. for (let i = 0, at = 0x10000; i < chars.length; i++) {
  35. const from = chars[i];
  36. let to = from;
  37. while (i < chars.length - 1 && chars[i + 1] == to + 1) {
  38. i++;
  39. to++;
  40. }
  41. if (to <= 0xffff) {
  42. if (from == to) re += esc(from);
  43. else if (from + 1 == to) re += esc(from) + esc(to);
  44. else re += esc(from) + "-" + esc(to);
  45. } else {
  46. astral.push(from - at, to - from);
  47. at = to;
  48. }
  49. }
  50. return { nonASCII: re, astral: astral };
  51. }
  52. const startData = generate(start);
  53. const contData = generate(cont);
  54. console.log("/* prettier-ignore */");
  55. console.log('let nonASCIIidentifierStartChars = "' + startData.nonASCII + '";');
  56. console.log("/* prettier-ignore */");
  57. console.log('let nonASCIIidentifierChars = "' + contData.nonASCII + '";');
  58. console.log("/* prettier-ignore */");
  59. console.log(
  60. "const astralIdentifierStartCodes = " + JSON.stringify(startData.astral) + ";"
  61. );
  62. console.log("/* prettier-ignore */");
  63. console.log(
  64. "const astralIdentifierCodes = " + JSON.stringify(contData.astral) + ";"
  65. );