| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 | <?phpnamespace Spider\Lib;use Dever;set_time_limit(0);class Api{	private $queue;	public function add_api($id)	{		# 写入队列		if (is_array($id)) {			$config = $id;		} else {			$config = Dever::load('spider/lib/project')->get($id);		}				if (!$config) {			Dever::alert('项目不存在');		}		if ($config['status'] <= 2) {			Dever::load('spider/lib/project')->set($config, 3);			Dever::load('spider/lib/queue')->push($config['id']);		}		return 'reload';	}	public function test_api($id)	{		Dever::setInput('test', 1);		$this->run($id);		return 'reload';	}	# 守护进程 每分钟执行一次即可	public function daemon()	{		# 查看进程是否存在		$state = Dever::process('lib/api.cron', true);		if ($state <= 0) {			Dever::daemon('lib/api.cron', 'spider');		}		# 查看当前所有项目是否可以开始运行		$data = Dever::load('spider/lib/project')->getAll();		if ($data) {			foreach ($data as $k => $v) {				$this->add_api($v);			}		}	}	public function cron()	{		$this->queue = new Queue();		//Dever::import('task');		while (1) {			$this->load();		}	}	public function load()	{		try {			$id = $this->queue->pop();			if ($id) {				$config = Dever::load('spider/lib/project')->get($id);				if ($config) {					# 推到后台运行					# 获取当前执行的进程数量		            $num = Dever::process('lib/api.run', true);		            if ($num >= 1000) {		                # 等会儿再执行		                sleep(60);		            }		            $num = Dever::process('lib/api.run?id=' . $id, true);		            if ($num <= 0) {		                Dever::daemon('lib/api.run?id=' . $id, 'spider');		            }				}			}			return true;		} catch (\Exception $e) {            return true;        }	}	public function run()	{		$id = Dever::input('id');		if (!$id) {			return false;		}		$config = Dever::load('spider/lib/project')->get($id);		if (!$config) {			return false;		}		$cate = Dever::db('spider/cate')->find($config['cate_id']);		if (!$cate) {			return false;		}		$col = $this->col($config['id']);		$set = $this->set($config['id']);		$config['curl'] = array		(			'request_type' => $config['request_type'],			'content_type' => $config['content_type'],			'header' => $config['header'],			'param' => $config['param'],		);				$site = Dever::split($config['site']);		$config['site'] = $site[0];		$config['page'] = '';		if (!strstr($config['site'], 'http')) {			$config['site'] = $cate['site'] . $config['site'];		}		if (isset($site[1]) && $site[1]) {			$config['page'] = $site[1];		}		if ($cate['collect_rule'] && $cate['site']) {			$rule = Dever::split($cate['collect_rule']);			if (!isset($rule[1])) {				$rule[1] = '';			}			$doc = Doc::getInstance($cate['site'], $rule[0]);			$doc->log(new Log($id));			$data = $doc->get($config['curl']);			$data = Dever::json_decode($data);			if ($data) {				foreach ($data as $k => $v) {					if (!$v) {						continue;					}					if ($rule[1] && !strstr($v, $rule[1])) {						continue;					}					$config['site'] = $v;					$this->task($config, $col, $set, $v);				}			}		} else {			$this->task($config, $col, $set);		}				Dever::load('spider/lib/project')->set($config, 2);	}	private function task($config, $col, $set, $cate = false)	{		if (strpos($config['site'], '{cate=') !== false) {			$pat = '/{cate=(.*?)}/i';			preg_match_all($pat, $config['site'], $match);			if (isset($match[1][0]) && $match[1][0]) {				$cate = $cate ? $cate : $match[1][0];				$config['site'] = str_replace($match[0][0], $cate, $config['site']);			}		}		if ($config['page'] && strpos($config['page'], '{cate=') !== false) {			$pat = '/{cate=(.*?)}/i';			preg_match_all($pat, $config['page'], $match);			if (isset($match[1][0]) && $match[1][0]) {				$cate = $cate ? $cate : $match[1][0];				$config['page'] = str_replace($match[0][0], $cate, $config['page']);			}		}		if ($config['page'] && strpos($config['page'], '{page=') !== false) {			$this->page($config['page'], 1, $config, $col, $set);		} elseif (strpos($config['site'], '{page=') !== false) {			$this->page($config['site'], 2, $config, $col, $set);		} elseif ($config['param'] && strpos($config['param'], '{page=') !== false) {			$this->page($config['param'], 3, $config, $col, $set);		} else {			Dever::load('spider/lib/project')->set($config, 4, 1);			$this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);		}		/*		Dever::task(function() use($config, $this)		{			$col = $this->col($config['id']);			$this->parse($config['url'], $config['id'], $config['collect_rule'], $col);		});		*/	}	private function col($project, $source = 1)	{		return Dever::db('spider/col')->getList(['where_pid' => $project]);	}	private function set($project)	{		return Dever::db('spider/set')->getList(['where_pid' => $project]);	}	private function parse($url, $project, $rule, $param, $col, $set, $push)	{		$parse = new Parse($url, $project, $rule, $param, $col, $set, $push);		return $parse->get();	}	private function page($source, $type, $config, $col, $set)	{		$site = $config['site'];		$pat = '/{page=(.*?)}/i';		preg_match_all($pat, $source, $match);		if (isset($match[1][0]) && $match[1][0]) {			if ($config['page_num'] <= 0) $config['page_num'] = 100;			//parse_str($match[1][0], $param);			$page = $match[1][0];			for ($i = $page; $i <= $config['page_num']; $i++) {				$site_page = str_replace($match[0][0], $i, $source);				Dever::load('spider/lib/project')->set($config, 4, $i);				if ($type == 1) {					if ($i == 1) {						$site = $config['site'];					} else {						$site = $config['site'] . $site_page;					}				} elseif ($type == 2) {					$site = $site_page;				} else {					$config['curl']['param'] = $site_page;				}				$this->parse($site, $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);			}		}	}}
 |