|  | @@ -6,16 +6,20 @@ class Parse
 | 
											
												
													
														|  |  {
 |  |  {
 | 
											
												
													
														|  |  	private $url = '';
 |  |  	private $url = '';
 | 
											
												
													
														|  |  	private $host = '';
 |  |  	private $host = '';
 | 
											
												
													
														|  | -	private $dom = array();
 |  | 
 | 
											
												
													
														|  | 
 |  | +	private $log;
 | 
											
												
													
														|  | 
 |  | +	private $doc = array();
 | 
											
												
													
														|  |  	private $data = array();
 |  |  	private $data = array();
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  	public function __construct($url, $project, $rule, $col)
 |  |  	public function __construct($url, $project, $rule, $col)
 | 
											
												
													
														|  |  	{
 |  |  	{
 | 
											
												
													
														|  | -		$this->url($url);
 |  | 
 | 
											
												
													
														|  | -		$dom = $this->dom($rule);
 |  | 
 | 
											
												
													
														|  | -		foreach ($dom as $k => $v) {
 |  | 
 | 
											
												
													
														|  | -			$this->handle(pq($v), $k, $col, $project);
 |  | 
 | 
											
												
													
														|  | 
 |  | +		$doc = Doc::getInstance($url, $rule);
 | 
											
												
													
														|  | 
 |  | +		$doc->log(new Log($project));
 | 
											
												
													
														|  | 
 |  | +		$data = $doc->get();
 | 
											
												
													
														|  | 
 |  | +		foreach ($data as $k => $v) {
 | 
											
												
													
														|  | 
 |  | +			print_r($data);die;
 | 
											
												
													
														|  | 
 |  | +			$this->data[$k] = $this->load($doc, $v, $col);
 | 
											
												
													
														|  |  		}
 |  |  		}
 | 
											
												
													
														|  | 
 |  | +		$doc->saveLog();
 | 
											
												
													
														|  |  	}
 |  |  	}
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  	public function get()
 |  |  	public function get()
 | 
											
										
											
												
													
														|  | @@ -23,34 +27,17 @@ class Parse
 | 
											
												
													
														|  |  		return $this->data;
 |  |  		return $this->data;
 | 
											
												
													
														|  |  	}
 |  |  	}
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -	private function url($url)
 |  | 
 | 
											
												
													
														|  | 
 |  | +	private function load($doc, $data, $col)
 | 
											
												
													
														|  |  	{
 |  |  	{
 | 
											
												
													
														|  | -		$this->url = $url;
 |  | 
 | 
											
												
													
														|  | -		$value = parse_url($this->url);
 |  | 
 | 
											
												
													
														|  | -		$this->host = $value['scheme'] . '://' . $value['host'];
 |  | 
 | 
											
												
													
														|  | -	}
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -	private function dom($rule, $url = '')
 |  | 
 | 
											
												
													
														|  | -	{
 |  | 
 | 
											
												
													
														|  | -		$url = $url ? $url : $this->url;
 |  | 
 | 
											
												
													
														|  | -		if (empty($this->dom[$url])) {
 |  | 
 | 
											
												
													
														|  | -			$dom = new Dom($url, $rule);
 |  | 
 | 
											
												
													
														|  | -			$this->dom[$url] = $dom->get();
 |  | 
 | 
											
												
													
														|  | -		}
 |  | 
 | 
											
												
													
														|  | -		
 |  | 
 | 
											
												
													
														|  | -		return $this->dom[$url];
 |  | 
 | 
											
												
													
														|  | -	}
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -	private function handle($dom, $index, $col, $project)
 |  | 
 | 
											
												
													
														|  | -	{
 |  | 
 | 
											
												
													
														|  | -		foreach ($col as $v) {
 |  | 
 | 
											
												
													
														|  | 
 |  | +		foreach ($data as $v) {
 | 
											
												
													
														|  |  			$callback = false;
 |  |  			$callback = false;
 | 
											
												
													
														|  |  			if (strpos($v['key'], '.') !== false) {
 |  |  			if (strpos($v['key'], '.') !== false) {
 | 
											
												
													
														|  |  				$temp = explode('.', $v['key']);
 |  |  				$temp = explode('.', $v['key']);
 | 
											
												
													
														|  |  				$v['key'] = $temp[1];
 |  |  				$v['key'] = $temp[1];
 | 
											
												
													
														|  |  				$callback = $temp[0];
 |  |  				$callback = $temp[0];
 | 
											
												
													
														|  |  			}
 |  |  			}
 | 
											
												
													
														|  | -			$value = $this->load($dom, $col, $v);
 |  | 
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +			$value = $doc->rule($data, $col, $v);
 | 
											
												
													
														|  |  			if ($value == 'error') {
 |  |  			if ($value == 'error') {
 | 
											
												
													
														|  |  				break;
 |  |  				break;
 | 
											
												
													
														|  |  			}
 |  |  			}
 | 
											
										
											
												
													
														|  | @@ -58,9 +45,14 @@ class Parse
 | 
											
												
													
														|  |  				$value = Dever::{$callback}($value);
 |  |  				$value = Dever::{$callback}($value);
 | 
											
												
													
														|  |  			}
 |  |  			}
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -			$this->data[$index][$v['key']] = $value;
 |  | 
 | 
											
												
													
														|  | 
 |  | +			$data[$v['key']] = $value;
 | 
											
												
													
														|  | 
 |  | +		}
 | 
											
												
													
														|  | 
 |  | +		if (Dever::input('test') == 1) {
 | 
											
												
													
														|  | 
 |  | +			$doc->outLog();
 | 
											
												
													
														|  | 
 |  | +			print_r($data);die;
 | 
											
												
													
														|  |  		}
 |  |  		}
 | 
											
												
													
														|  | -		$this->update($this->data[$index], $project);
 |  | 
 | 
											
												
													
														|  | 
 |  | +		$this->update($data, $project);
 | 
											
												
													
														|  | 
 |  | +		return $data;
 | 
											
												
													
														|  |  	}
 |  |  	}
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  	private function update($data, $project)
 |  |  	private function update($data, $project)
 | 
											
										
											
												
													
														|  | @@ -85,65 +77,4 @@ class Parse
 | 
											
												
													
														|  |  			$id = Dever::db('spider/data')->insert($update);
 |  |  			$id = Dever::db('spider/data')->insert($update);
 | 
											
												
													
														|  |  		}
 |  |  		}
 | 
											
												
													
														|  |  	}
 |  |  	}
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -	private function load($dom, $col, $config)
 |  | 
 | 
											
												
													
														|  | -	{
 |  | 
 | 
											
												
													
														|  | -		$data = $this->rule($dom, $col, $config['collect_rule'], $config['collect_include'], $config['collect_exclude']);
 |  | 
 | 
											
												
													
														|  | -		if ($config['collect_include'] && strpos($data, $config['collect_include']) === false) {
 |  | 
 | 
											
												
													
														|  | -			return 'error';
 |  | 
 | 
											
												
													
														|  | -		}
 |  | 
 | 
											
												
													
														|  | -		if ($config['collect_exclude'] && strpos($data, $config['collect_exclude']) !== false) {
 |  | 
 | 
											
												
													
														|  | -			return 'error';
 |  | 
 | 
											
												
													
														|  | -		}
 |  | 
 | 
											
												
													
														|  | -		if ($config['collect_filter']) {
 |  | 
 | 
											
												
													
														|  | -			$data = preg_replace('/' . $config['collect_filter'] . '/i', '', $data);
 |  | 
 | 
											
												
													
														|  | -		}
 |  | 
 | 
											
												
													
														|  | -		return $data;
 |  | 
 | 
											
												
													
														|  | -	}
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -	private function rule($dom, $col, $rule, $include, $exclude)
 |  | 
 | 
											
												
													
														|  | -	{
 |  | 
 | 
											
												
													
														|  | -		$result = $dom->html();
 |  | 
 | 
											
												
													
														|  | -		$rule = explode("\n", $rule);
 |  | 
 | 
											
												
													
														|  | -		if (isset($rule[0]) && $rule[0]) {
 |  | 
 | 
											
												
													
														|  | -			if (isset($col[$rule[0]])) {
 |  | 
 | 
											
												
													
														|  | -				$url = $this->getUrl($dom, $col, $col[$rule[0]]);
 |  | 
 | 
											
												
													
														|  | -				$dom = $this->dom('', $url);
 |  | 
 | 
											
												
													
														|  | -				array_shift($rule);
 |  | 
 | 
											
												
													
														|  | -			}
 |  | 
 | 
											
												
													
														|  | -			$result = $this->find($dom, $rule[0], $result);
 |  | 
 | 
											
												
													
														|  | -		}
 |  | 
 | 
											
												
													
														|  | -		if (isset($rule[1]) && $rule[1]) $result = $this->match($rule[1], $result);
 |  | 
 | 
											
												
													
														|  | -		return $result;
 |  | 
 | 
											
												
													
														|  | -	}
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -	private function find($dom, $string, $result)
 |  | 
 | 
											
												
													
														|  | -	{
 |  | 
 | 
											
												
													
														|  | -		$string = str_replace(array('$', ').'), array('$dom->find', ')->'), $string);
 |  | 
 | 
											
												
													
														|  | -		$cmd = '$result = ' . $string . ';';
 |  | 
 | 
											
												
													
														|  | -		eval($cmd);
 |  | 
 | 
											
												
													
														|  | -		return $result;
 |  | 
 | 
											
												
													
														|  | -	}
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -	private function getUrl($dom, $col, $config)
 |  | 
 | 
											
												
													
														|  | -	{
 |  | 
 | 
											
												
													
														|  | -		$url = $this->load($dom, $col, $config);
 |  | 
 | 
											
												
													
														|  | -		if (strpos($url, 'http') === false) {
 |  | 
 | 
											
												
													
														|  | -			if ($url[0] == '/') {
 |  | 
 | 
											
												
													
														|  | -				$url = $this->host . $url;
 |  | 
 | 
											
												
													
														|  | -			} else {
 |  | 
 | 
											
												
													
														|  | -				$url = $this->url . $url;
 |  | 
 | 
											
												
													
														|  | -			}
 |  | 
 | 
											
												
													
														|  | -		}
 |  | 
 | 
											
												
													
														|  | -		return $url;
 |  | 
 | 
											
												
													
														|  | -	}
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -	private function match($pattern, $string)
 |  | 
 | 
											
												
													
														|  | -	{
 |  | 
 | 
											
												
													
														|  | -		$temp = explode('||', $pattern);
 |  | 
 | 
											
												
													
														|  | -		$index = isset($temp[1]) ? $temp[1] : 1;
 |  | 
 | 
											
												
													
														|  | -		preg_match_all('/' . $temp[0] . '/i', $string, $match);
 |  | 
 | 
											
												
													
														|  | -		$result = $match[$index][0];
 |  | 
 | 
											
												
													
														|  | -		return $result;
 |  | 
 | 
											
												
													
														|  | -	}
 |  | 
 | 
											
												
													
														|  |  }
 |  |  }
 |