| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318 | 
							- <?php
 
- namespace Spider\Lib;
 
- use Dever;
 
- use Spider\Lib\Doc\Dom;
 
- class Parse
 
- {
 
- 	private $url = '';
 
- 	private $host = '';
 
- 	private $log;
 
- 	private $doc = array();
 
- 	private $data = array();
 
- 	public function __construct($url, $project, $rule, $param, $col, $set, $push)
 
- 	{
 
- 		$doc = Doc::getInstance($url, $rule);
 
- 		$doc->log(new Log($project));
 
- 		$data = $doc->get($param);
 
- 		if ($data) {
 
- 			if (!is_array($data) && !is_object($data)) {
 
- 				$state = Dever::json_decode($data);
 
- 				if ($state) {
 
- 					$data = $state;
 
- 				}
 
- 			}
 
- 			if ($data) {
 
- 				if (is_array($data)) {
 
- 					$domain = parse_url($url);
 
- 					$host = $domain['scheme'] . '://' . $domain['host'] . '/';
 
- 					foreach ($data as $k => $v) {
 
- 						if (is_string($v) && !strstr($v, 'http')) {
 
- 							$v = $host . ltrim($v, '/');
 
- 						}
 
- 						$this->data[$k] = $this->load($doc, $k, $v, $col, $set, $push, $project, $v);
 
- 					}
 
- 				} else {
 
- 					$this->data = $this->load($doc, 0, $data, $col, $set, $push, $project, $url);
 
- 				}
 
- 			}
 
- 		}
 
- 		$doc->saveLog();
 
- 	}
 
- 	public function get()
 
- 	{
 
- 		return $this->data;
 
- 	}
 
- 	private function load($doc, $index, $data, $col, $set, $push, $project, $source)
 
- 	{
 
- 		if (!$col) {
 
- 			if (Dever::input('test') == 1) {
 
- 				$doc->outLog();
 
- 				echo 'error';die;
 
- 			}
 
- 			return false;
 
- 		}
 
- 		$result = $table = array();
 
- 		if (isset($col[1])) {
 
- 			$data = $doc->init($data);
 
- 			$this->getCol($doc, $col[1], $data, $result, $table, $source);
 
- 		}
 
- 		if (isset($col[2])) {
 
- 			$data = $doc->getCur();
 
- 			$this->getCol($doc, $col[2], $data, $result, $table, $source);
 
- 		}
 
- 		if ($set) {
 
- 			foreach ($set as $v) {
 
- 				$value = $this->set($index, $v, $project);
 
- 				$result[$v['key']] = $value;
 
- 				if (Dever::input('test') == 1) {
 
- 					$table[$v['name']] = $value;
 
- 				}
 
- 			}
 
- 		}
 
- 		if ($push) {
 
- 			$result['test'] = Dever::input('test');
 
- 			$this->push($push, $result, $project);
 
- 		}
 
- 		if (Dever::input('test') == 1) {
 
- 			$doc->outLog();
 
- 			echo Dever::table($table);die;
 
- 		}
 
- 		$this->update($result, $project, $source);
 
- 		return $result;
 
- 	}
 
- 	private function getCol($doc, $col, $data, &$result, &$table, $source = false)
 
- 	{
 
- 		foreach ($col as $v) {
 
- 			$callback = false;
 
- 			if (strpos($v['key'], '.') !== false) {
 
- 				$temp = explode('.', $v['key']);
 
- 				$v['key'] = $temp[1];
 
- 				$callback = $temp[0];
 
- 			}
 
- 			if ($source && strstr($v['collect_rule'], '{link}')) {
 
- 				$v['collect_rule'] = str_replace('{link}', $source, $v['collect_rule']);
 
- 			}
 
- 			$value = $doc->rule($data, $col, $v);
 
- 			if ($value == 'error') {
 
- 				break;
 
- 			}
 
- 			if ($callback) {
 
- 				if (function_exists($callback)) {
 
- 					$value = $callback($value);
 
- 				} else {
 
- 					$value = Dever::{$callback}($value);
 
- 				}
 
- 			}
 
- 			if ($v['local'] == 1) {
 
- 				$this->res = $v['res_key'];
 
- 				$value = $this->local($value, $v['type']);
 
- 			} elseif ($v['type'] == 2) {
 
- 				if (is_string($value) && strstr($value, '[')) {
 
- 					$temp = Dever::json_decode($value);
 
- 					if ($temp) {
 
- 						$value = implode(',', $temp);
 
- 					}
 
- 				}
 
- 			}
 
- 			if ($v['collect_filter_link'] == 1) {
 
- 				$value = $this->filter($value);
 
- 			}
 
- 			if ($value) {
 
- 				$result[$v['key']] = $value;
 
- 				if (Dever::input('test') == 1) {
 
- 					$table[$v['name']] = $value;
 
- 				}
 
- 			}
 
- 		}
 
- 	}
 
- 	private function push($push, $data, $project)
 
- 	{
 
- 		$push = Dever::split($push);
 
- 		$data['project_id'] = $project;
 
- 		foreach ($push as $k => $v) {
 
- 			if (strstr($v, 'http')) {
 
- 				Dever::curl($v, $data, 'post');
 
- 			} else {
 
- 				Dever::load($v, $data);
 
- 			}
 
- 		}
 
- 	}
 
- 	private function set($index, $data, $project)
 
- 	{
 
- 		if ($data['type'] == 1) {
 
- 			return $data['value'];
 
- 		} elseif ($data['type'] == 2) {
 
- 			$old = 0;
 
- 			$info = Dever::db('spider/data')->getOne(array('pid' => $project));
 
- 			if($info) {
 
- 				$value = json_decode($info['value'], true);
 
- 				if (isset($value[$data['key']])) {
 
- 					$old = $value[$data['key']];
 
- 				}
 
- 			}
 
- 			return $data['value'] + $index + $old;
 
- 		} elseif ($data['type'] == 3) {
 
- 			$eval = '$value = ' . $data['value'] . ';';
 
- 			eval($eval);
 
- 			return $value;
 
- 		} elseif ($data['type'] == 4) {
 
- 			$temp = Dever::split($data['value']);
 
- 			return mt_rand($temp[0], $temp[1]);
 
- 		} elseif ($data['type'] == 5) {
 
- 			$temp = Dever::split($data['value']);
 
- 			$temp[0] = Dever::maketime($temp[0]);
 
- 			$temp[1] = Dever::maketime($temp[1]);
 
- 			return mt_rand($temp[0], $temp[1]);
 
- 		}
 
- 	}
 
- 	private function filter($content)
 
- 	{
 
- 		$rule = '<(a).+href="(.*?)"(.*?)>(.*?)<\/a>';
 
- 		$content = preg_replace_callback('/' . $rule . '/i', array($this, 'filter_replace'), $content);
 
- 		return $content;
 
- 	}
 
- 	private function filter_replace($result)
 
- 	{
 
- 		if (isset($result[4]) && $result[4]) {
 
- 			return $result[4];
 
- 		}
 
- 	}
 
- 	private function local($content, $type = 1)
 
- 	{
 
- 		if ($type == 1) {
 
- 			$doc = Dom::init($content);
 
- 			$pic = Dom::find($doc, '$("img").each().attr("src")');
 
- 			if ($pic) {
 
- 				$content = $this->local_replace($pic, $content);
 
- 			}
 
- 			$video = Dom::find($doc, '$("video").each().attr("src")');
 
- 			if ($video) {
 
- 				$content = $this->local_replace($video, $content);
 
- 			}
 
- 			$audio = Dom::find($doc, '$("audio").each().attr("src")');
 
- 			if ($audio) {
 
- 				$content = $this->local_replace($audio, $content);
 
- 			}
 
- 			/*
 
- 			$rule = '<(img|video|audio).+src=\"?(.+\.(jpg|gif|bmp|bnp|png))\"?.+>';
 
- 			$content = preg_replace_callback('/' . $rule . '/i', array($this, 'local_rule_replace'), $content);
 
- 			*/
 
- 		} else {
 
- 			$content = $this->copy($content);
 
- 		}
 
- 		return $content;
 
- 	}
 
- 	private function local_replace($file, $content)
 
- 	{
 
- 		if (is_string($file) && strstr($file, '[')) {
 
- 			$file = Dever::json_decode($file);
 
- 		}
 
- 		if (is_array($file)) {
 
- 			foreach ($file as $k => $v) {
 
- 				$content = $this->local_replace($v, $content);
 
- 			}
 
- 		} else {
 
- 			$result = $this->copy($file);
 
- 			if ($result) {
 
- 				$content = str_replace($file, $result, $content);
 
- 			}
 
- 		}
 
- 		return $content;
 
- 	}
 
- 	private function local_rule_replace($result)
 
- 	{
 
- 		if (isset($result[2]) && $result[2]) {
 
- 			$file = $this->copy($result[2]);
 
- 			if ($file) {
 
- 				$result[0] = str_replace($result[2], $file, $result[0]);
 
- 				return $result[0];
 
- 			}
 
- 		}
 
- 	}
 
- 	private function copy($file)
 
- 	{
 
- 		if (is_string($file) && strstr($file, '[')) {
 
- 			$temp = Dever::json_decode($file);
 
- 			if ($temp) {
 
- 				$file = array();
 
- 				foreach($temp as $k => $v) {
 
- 					$f = $this->copy($v);
 
- 					if ($f) {
 
- 						$file[] = $f;
 
- 					}
 
- 				}
 
- 				$file = implode(',', $file);
 
- 				return $file;
 
- 			}
 
- 		}
 
- 		if (strstr($file, ',')) {
 
- 			$temp = explode(',', $file);
 
- 			$file = array();
 
- 			foreach($temp as $k => $v) {
 
- 				$f = $this->copy($v);
 
- 				if ($f) {
 
- 					$file[] = $f;
 
- 				}
 
- 			}
 
- 			$file = implode(',', $file);
 
- 			return $file;
 
- 		}
 
- 		if (!strstr($file, 'http') && strstr($file, '//')) {
 
- 			$file = 'https:' . $file;
 
- 		}
 
- 		$data = Dever::load('upload/save.copy?file=' . $file . '&key=' . $this->res . '&state=1');
 
- 		if (isset($data['status']) && $data['status'] == -1) {
 
- 			return '';
 
- 		} elseif (isset($data['url'])) {
 
- 			return $data['url'];
 
- 		} else {
 
- 			return '';
 
- 		}
 
- 	}
 
- 	private function update($data, $project, $source)
 
- 	{
 
- 		if (!$data) {
 
- 			return;
 
- 		}
 
- 		$param['pid'] = $project;
 
- 		$param['source'] = $source;
 
- 		$info = Dever::db('spider/data')->one($param);
 
- 		$param['value'] = json_encode($data, JSON_UNESCAPED_UNICODE);
 
- 		if ($info) {
 
- 			$update = $param;
 
- 			$id = $update['where_id'] = $info['id'];
 
- 			Dever::db('spider/data')->update($update);
 
- 		} else {
 
- 			$update = $param;
 
- 			$id = Dever::db('spider/data')->insert($update);
 
- 		}
 
- 		echo $id;
 
- 		echo "\r\n";
 
- 	}
 
- }
 
 
  |