| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 | 
							- <?php
 
- namespace KIF\String;
 
- /**
 
-  * 
 
-  * 注音
 
-  * @usage:
 
-  * 	$objZhuYin = new KIF\String\ZhuYin();
 
-  * 	print_r($objZhuYin->conversion('负离子去屑止痒洗发露', true, false));
 
-  * @author gaoxiaogang
 
-  *
 
-  */
 
- class ZhuYin
 
- {
 
- 	private $mywords = array();
 
- 	/**
 
- 	 * @name __construct
 
- 	 * @desc 构造函数
 
- 	 */
 
- 	public function __construct()
 
- 	{
 
- 		$tmp = array();
 
- 		$file = file( dirname(__FILE__) . DS . '/zhuyin_table_ISCCD.txt');
 
- 		foreach ($file as $k => $v)
 
- 		{
 
- 			$tmp_str = explode(":|", $v);
 
- 			$tmp[$tmp_str[0]] = explode("|", trim($tmp_str[1]));
 
- 		}
 
- 		$this->mywords = $tmp;
 
- 	}
 
- 	/**
 
- 	 * @name CToE
 
- 	 * @desc 中文转英文
 
- 	 * @param array $list
 
- 	 */
 
- 	public function CToE($list)
 
- 	{
 
- 		$return = array();
 
- 		if(empty($list)) return ;
 
- 		foreach ($list as $k => $v)
 
- 		{
 
- 			if(strlen($v) == 1) //如果是英文
 
- 			{
 
- 				$return[] = array($v) ;
 
- 			}else //如果是中文
 
- 			{
 
- 				if(isset($this->mywords[$v]))
 
- 				{
 
- 					$return[] = $this->mywords[$v];
 
- 				}
 
- 			}
 
- 		}
 
- 		return $return ;
 
- 	}
 
- 	/**
 
- 	 * @name conversion
 
- 	 * @desc Utf8编码切词
 
- 	 * @param string $words
 
- 	 * @param bool $return_complex  是否复合输出,true用数组返回所有可能,false字符串返回一种可能
 
- 	 * @param bool $return_prefix   是否输出首字母组合,默认为true
 
- 	 * @param bool $return_english  是否为纯中文输出,默认true
 
- 	 * @return mixed $results
 
- 	 * @access public
 
- 	 */
 
- 	public function conversion($words, $return_complex = true, $return_prefix = true, $return_english = true)
 
- 	{
 
- 		$words_list = $english_list = $return_list = array();
 
- 		$words_list = self::splitWords($words, $return_english);
 
- 		$english_list = $this->CToE($words_list);
 
- 		if(empty($english_list)) return array();
 
- 		if($return_complex === true)
 
- 		{
 
- 			$compelte_list = $prefix_list = array();
 
- 			foreach ($english_list as $k => $v)
 
- 			{
 
- 				if(empty($compelte_list))
 
- 				{
 
- 					$compelte_list = $v ;
 
- 					if($return_prefix === true)
 
- 					{
 
- 						foreach ($v as $key => $value)
 
- 						{
 
- 							$prefix_list[$key] = substr($value,0,1);
 
- 						}
 
- 					}
 
- 					continue;
 
- 				}
 
- 				$num = count($v);
 
- 				$tmp = $ptmp = array();
 
- 				foreach ($compelte_list as $key => $value)
 
- 				{
 
- 					if($num == 1)//只有一个音
 
- 					{
 
- 						$tmp[] = $value . $v[0] ;
 
- 						if($return_prefix === true){
 
- 							$ptmp[] = $prefix_list[$key] . substr($v[0], 0, 1);
 
- 						}
 
- 					}else//多音字处理 
 
- 					{
 
- 						for( $i = 0 ; $i < $num ; $i++)
 
- 						{
 
- 							$tmp[] = $value . $v[$i];
 
- 							if($return_prefix == true){
 
- 								$ptmp[] = $prefix_list[$key] . substr($v[$i], 0, 1);
 
- 							}
 
- 						}
 
- 						$compelte_list = $tmp ;
 
- 					}
 
- 				}
 
- 				$compelte_list = $tmp;
 
- 				$prefix_list = $ptmp;
 
- 			}
 
- 			$return_list = array_merge($compelte_list,$prefix_list);
 
- 		}else 
 
- 		{
 
- 			$compelte_list = $prefix_list = "" ;
 
- 			foreach ($english_list as $k => $v)
 
- 			{
 
- 				$compelte_list .= $v[0];
 
- 				if(true === $return_prefix) $prefix_list .= substr($v[0],0,1);
 
- 			}
 
- 			if(true === $return_prefix) $return_list = array($compelte_list,$prefix_list);
 
- 			else $return_list = array($compelte_list);
 
- 		}
 
- 		return $return_list ;
 
- 	}
 
- 	/**
 
- 	 * @name splitWords
 
- 	 * @desc Utf8编码切词
 
- 	 * @param string $words
 
- 	 * @param bool $return_english 是否返回英文
 
- 	 * @return array $results
 
- 	 * @access public
 
- 	 */
 
- 	public static function splitWords($words, $return_english)
 
- 	{
 
- 		$results = array();
 
- 		$len = strlen($words);
 
- 		if($len == 0) return $results ;
 
- 		for ($i = 0 ; $i < $len ; $i++)
 
- 		{
 
- 			$unicode = ord($words[$i]);
 
- 			if ( $unicode >= 129)//如果为汉字
 
- 			{
 
- 				$results[] = $words[$i] . $words[++$i] . $words[++$i];
 
- 				$results[] = ' ';
 
- 			}
 
- 			if($return_english === true)//如果为英文
 
- 			{
 
- 				if( ($unicode >= 65 && $unicode <= 90) || ($unicode >= 97 && $unicode <= 122)
 
- 					|| $unicode == 227 || $unicode == 32
 
- 				) {
 
- 					$results[] = $words[$i];
 
- 				}
 
- 			}
 
- 		}
 
- 		return $results;
 
- 	}
 
- }
 
 
  |