123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- <?php
- namespace KIF\String;
- /**
- *
- * 注音
- * @usage:
- * $objZhuYin = new KIF\String\ZhuYin();
- * print_r($objZhuYin->conversion('负离子去屑止痒洗发露', true, false));
- * @author gaoxiaogang
- *
- */
- class ZhuYin
- {
- private $mywords = array();
- /**
- * @name __construct
- * @desc 构造函数
- */
- public function __construct()
- {
- $tmp = array();
- $file = file( dirname(__FILE__) . DS . '/zhuyin_table_ISCCD.txt');
- foreach ($file as $k => $v)
- {
- $tmp_str = explode(":|", $v);
- $tmp[$tmp_str[0]] = explode("|", trim($tmp_str[1]));
- }
- $this->mywords = $tmp;
- }
- /**
- * @name CToE
- * @desc 中文转英文
- * @param array $list
- */
- public function CToE($list)
- {
- $return = array();
- if(empty($list)) return ;
- foreach ($list as $k => $v)
- {
- if(strlen($v) == 1) //如果是英文
- {
- $return[] = array($v) ;
- }else //如果是中文
- {
- if(isset($this->mywords[$v]))
- {
- $return[] = $this->mywords[$v];
- }
- }
- }
- return $return ;
- }
- /**
- * @name conversion
- * @desc Utf8编码切词
- * @param string $words
- * @param bool $return_complex 是否复合输出,true用数组返回所有可能,false字符串返回一种可能
- * @param bool $return_prefix 是否输出首字母组合,默认为true
- * @param bool $return_english 是否为纯中文输出,默认true
- * @return mixed $results
- * @access public
- */
- public function conversion($words, $return_complex = true, $return_prefix = true, $return_english = true)
- {
- $words_list = $english_list = $return_list = array();
- $words_list = self::splitWords($words, $return_english);
- $english_list = $this->CToE($words_list);
- if(empty($english_list)) return array();
- if($return_complex === true)
- {
- $compelte_list = $prefix_list = array();
- foreach ($english_list as $k => $v)
- {
- if(empty($compelte_list))
- {
- $compelte_list = $v ;
- if($return_prefix === true)
- {
- foreach ($v as $key => $value)
- {
- $prefix_list[$key] = substr($value,0,1);
- }
- }
- continue;
- }
- $num = count($v);
- $tmp = $ptmp = array();
- foreach ($compelte_list as $key => $value)
- {
- if($num == 1)//只有一个音
- {
- $tmp[] = $value . $v[0] ;
- if($return_prefix === true){
- $ptmp[] = $prefix_list[$key] . substr($v[0], 0, 1);
- }
- }else//多音字处理
- {
- for( $i = 0 ; $i < $num ; $i++)
- {
- $tmp[] = $value . $v[$i];
- if($return_prefix == true){
- $ptmp[] = $prefix_list[$key] . substr($v[$i], 0, 1);
- }
- }
- $compelte_list = $tmp ;
- }
- }
- $compelte_list = $tmp;
- $prefix_list = $ptmp;
- }
- $return_list = array_merge($compelte_list,$prefix_list);
- }else
- {
- $compelte_list = $prefix_list = "" ;
- foreach ($english_list as $k => $v)
- {
- $compelte_list .= $v[0];
- if(true === $return_prefix) $prefix_list .= substr($v[0],0,1);
- }
- if(true === $return_prefix) $return_list = array($compelte_list,$prefix_list);
- else $return_list = array($compelte_list);
- }
- return $return_list ;
- }
- /**
- * @name splitWords
- * @desc Utf8编码切词
- * @param string $words
- * @param bool $return_english 是否返回英文
- * @return array $results
- * @access public
- */
- public static function splitWords($words, $return_english)
- {
- $results = array();
- $len = strlen($words);
- if($len == 0) return $results ;
- for ($i = 0 ; $i < $len ; $i++)
- {
- $unicode = ord($words[$i]);
- if ( $unicode >= 129)//如果为汉字
- {
- $results[] = $words[$i] . $words[++$i] . $words[++$i];
- $results[] = ' ';
- }
- if($return_english === true)//如果为英文
- {
- if( ($unicode >= 65 && $unicode <= 90) || ($unicode >= 97 && $unicode <= 122)
- || $unicode == 227 || $unicode == 32
- ) {
- $results[] = $words[$i];
- }
- }
- }
- return $results;
- }
- }
|