ZhuYin.class.php 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. <?php
  2. namespace KIF\String;
  3. /**
  4. *
  5. * 注音
  6. * @usage:
  7. * $objZhuYin = new KIF\String\ZhuYin();
  8. * print_r($objZhuYin->conversion('负离子去屑止痒洗发露', true, false));
  9. * @author gaoxiaogang
  10. *
  11. */
  12. class ZhuYin
  13. {
  14. private $mywords = array();
  15. /**
  16. * @name __construct
  17. * @desc 构造函数
  18. */
  19. public function __construct()
  20. {
  21. $tmp = array();
  22. $file = file( dirname(__FILE__) . DS . '/zhuyin_table_ISCCD.txt');
  23. foreach ($file as $k => $v)
  24. {
  25. $tmp_str = explode(":|", $v);
  26. $tmp[$tmp_str[0]] = explode("|", trim($tmp_str[1]));
  27. }
  28. $this->mywords = $tmp;
  29. }
  30. /**
  31. * @name CToE
  32. * @desc 中文转英文
  33. * @param array $list
  34. */
  35. public function CToE($list)
  36. {
  37. $return = array();
  38. if(empty($list)) return ;
  39. foreach ($list as $k => $v)
  40. {
  41. if(strlen($v) == 1) //如果是英文
  42. {
  43. $return[] = array($v) ;
  44. }else //如果是中文
  45. {
  46. if(isset($this->mywords[$v]))
  47. {
  48. $return[] = $this->mywords[$v];
  49. }
  50. }
  51. }
  52. return $return ;
  53. }
  54. /**
  55. * @name conversion
  56. * @desc Utf8编码切词
  57. * @param string $words
  58. * @param bool $return_complex 是否复合输出,true用数组返回所有可能,false字符串返回一种可能
  59. * @param bool $return_prefix 是否输出首字母组合,默认为true
  60. * @param bool $return_english 是否为纯中文输出,默认true
  61. * @return mixed $results
  62. * @access public
  63. */
  64. public function conversion($words, $return_complex = true, $return_prefix = true, $return_english = true)
  65. {
  66. $words_list = $english_list = $return_list = array();
  67. $words_list = self::splitWords($words, $return_english);
  68. $english_list = $this->CToE($words_list);
  69. if(empty($english_list)) return array();
  70. if($return_complex === true)
  71. {
  72. $compelte_list = $prefix_list = array();
  73. foreach ($english_list as $k => $v)
  74. {
  75. if(empty($compelte_list))
  76. {
  77. $compelte_list = $v ;
  78. if($return_prefix === true)
  79. {
  80. foreach ($v as $key => $value)
  81. {
  82. $prefix_list[$key] = substr($value,0,1);
  83. }
  84. }
  85. continue;
  86. }
  87. $num = count($v);
  88. $tmp = $ptmp = array();
  89. foreach ($compelte_list as $key => $value)
  90. {
  91. if($num == 1)//只有一个音
  92. {
  93. $tmp[] = $value . $v[0] ;
  94. if($return_prefix === true){
  95. $ptmp[] = $prefix_list[$key] . substr($v[0], 0, 1);
  96. }
  97. }else//多音字处理
  98. {
  99. for( $i = 0 ; $i < $num ; $i++)
  100. {
  101. $tmp[] = $value . $v[$i];
  102. if($return_prefix == true){
  103. $ptmp[] = $prefix_list[$key] . substr($v[$i], 0, 1);
  104. }
  105. }
  106. $compelte_list = $tmp ;
  107. }
  108. }
  109. $compelte_list = $tmp;
  110. $prefix_list = $ptmp;
  111. }
  112. $return_list = array_merge($compelte_list,$prefix_list);
  113. }else
  114. {
  115. $compelte_list = $prefix_list = "" ;
  116. foreach ($english_list as $k => $v)
  117. {
  118. $compelte_list .= $v[0];
  119. if(true === $return_prefix) $prefix_list .= substr($v[0],0,1);
  120. }
  121. if(true === $return_prefix) $return_list = array($compelte_list,$prefix_list);
  122. else $return_list = array($compelte_list);
  123. }
  124. return $return_list ;
  125. }
  126. /**
  127. * @name splitWords
  128. * @desc Utf8编码切词
  129. * @param string $words
  130. * @param bool $return_english 是否返回英文
  131. * @return array $results
  132. * @access public
  133. */
  134. public static function splitWords($words, $return_english)
  135. {
  136. $results = array();
  137. $len = strlen($words);
  138. if($len == 0) return $results ;
  139. for ($i = 0 ; $i < $len ; $i++)
  140. {
  141. $unicode = ord($words[$i]);
  142. if ( $unicode >= 129)//如果为汉字
  143. {
  144. $results[] = $words[$i] . $words[++$i] . $words[++$i];
  145. $results[] = ' ';
  146. }
  147. if($return_english === true)//如果为英文
  148. {
  149. if( ($unicode >= 65 && $unicode <= 90) || ($unicode >= 97 && $unicode <= 122)
  150. || $unicode == 227 || $unicode == 32
  151. ) {
  152. $results[] = $words[$i];
  153. }
  154. }
  155. }
  156. return $results;
  157. }
  158. }