| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 | <?phpnamespace KIF\String;/** *  * 注音 * @usage: * 	$objZhuYin = new KIF\String\ZhuYin(); * 	print_r($objZhuYin->conversion('负离子去屑止痒洗发露', true, false)); * @author gaoxiaogang * */class ZhuYin{	private $mywords = array();	/**	 * @name __construct	 * @desc 构造函数	 */	public function __construct()	{		$tmp = array();		$file = file( dirname(__FILE__) . DS . '/zhuyin_table_ISCCD.txt');		foreach ($file as $k => $v)		{			$tmp_str = explode(":|", $v);			$tmp[$tmp_str[0]] = explode("|", trim($tmp_str[1]));		}		$this->mywords = $tmp;	}	/**	 * @name CToE	 * @desc 中文转英文	 * @param array $list	 */	public function CToE($list)	{		$return = array();		if(empty($list)) return ;		foreach ($list as $k => $v)		{			if(strlen($v) == 1) //如果是英文			{				$return[] = array($v) ;			}else //如果是中文			{				if(isset($this->mywords[$v]))				{					$return[] = $this->mywords[$v];				}			}		}		return $return ;	}	/**	 * @name conversion	 * @desc Utf8编码切词	 * @param string $words	 * @param bool $return_complex  是否复合输出,true用数组返回所有可能,false字符串返回一种可能	 * @param bool $return_prefix   是否输出首字母组合,默认为true	 * @param bool $return_english  是否为纯中文输出,默认true	 * @return mixed $results	 * @access public	 */	public function conversion($words, $return_complex = true, $return_prefix = true, $return_english = true)	{		$words_list = $english_list = $return_list = array();		$words_list = self::splitWords($words, $return_english);		$english_list = $this->CToE($words_list);		if(empty($english_list)) return array();		if($return_complex === true)		{			$compelte_list = $prefix_list = array();			foreach ($english_list as $k => $v)			{				if(empty($compelte_list))				{					$compelte_list = $v ;					if($return_prefix === true)					{						foreach ($v as $key => $value)						{							$prefix_list[$key] = substr($value,0,1);						}					}					continue;				}				$num = count($v);				$tmp = $ptmp = array();				foreach ($compelte_list as $key => $value)				{					if($num == 1)//只有一个音					{						$tmp[] = $value . $v[0] ;						if($return_prefix === true){							$ptmp[] = $prefix_list[$key] . substr($v[0], 0, 1);						}					}else//多音字处理 					{						for( $i = 0 ; $i < $num ; $i++)						{							$tmp[] = $value . $v[$i];							if($return_prefix == true){								$ptmp[] = $prefix_list[$key] . substr($v[$i], 0, 1);							}						}						$compelte_list = $tmp ;					}				}				$compelte_list = $tmp;				$prefix_list = $ptmp;			}			$return_list = array_merge($compelte_list,$prefix_list);		}else 		{			$compelte_list = $prefix_list = "" ;			foreach ($english_list as $k => $v)			{				$compelte_list .= $v[0];				if(true === $return_prefix) $prefix_list .= substr($v[0],0,1);			}			if(true === $return_prefix) $return_list = array($compelte_list,$prefix_list);			else $return_list = array($compelte_list);		}		return $return_list ;	}	/**	 * @name splitWords	 * @desc Utf8编码切词	 * @param string $words	 * @param bool $return_english 是否返回英文	 * @return array $results	 * @access public	 */	public static function splitWords($words, $return_english)	{		$results = array();		$len = strlen($words);		if($len == 0) return $results ;		for ($i = 0 ; $i < $len ; $i++)		{			$unicode = ord($words[$i]);			if ( $unicode >= 129)//如果为汉字			{				$results[] = $words[$i] . $words[++$i] . $words[++$i];				$results[] = ' ';			}			if($return_english === true)//如果为英文			{				if( ($unicode >= 65 && $unicode <= 90) || ($unicode >= 97 && $unicode <= 122)					|| $unicode == 227 || $unicode == 32				) {					$results[] = $words[$i];				}			}		}		return $results;	}}
 |