| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 | <?phpnamespace Spider\Lib;use Dever;class Api{	private $queue;	public function add_api($id)	{		# 写入队列		if (is_array($id)) {			$config = $id;		} else {			$config = Dever::load('spider/lib/project')->get($id);		}				if (!$config) {			Dever::alert('项目不存在');		}		if ($config['status'] <= 2) {			Dever::load('spider/lib/project')->set($config, 3);			Dever::load('spider/lib/queue')->push($config['id']);		}		return 'reload';	}	public function test_api($id)	{		Dever::setInput('test', 1);		$this->run($id);		return 'reload';	}	# 守护进程 每分钟执行一次即可	public function daemon()	{		# 查看进程是否存在		$state = Dever::process('lib/api.cron', true);		if ($state <= 0) {			Dever::daemon('lib/api.cron', 'spider');		}		# 查看当前所有项目是否可以开始运行		$data = Dever::load('spider/lib/project')->getAll();		if ($data) {			foreach ($data as $k => $v) {				$this->add_api($v);			}		}	}	public function cron()	{		$this->queue = new Queue();		//Dever::import('task');		while (1) {			$this->load();		}	}	public function load()	{		try {			$id = $this->queue->pop();			if ($id) {				$config = Dever::load('spider/lib/project')->get($id);				if ($config) {					# 推到后台运行					# 获取当前执行的进程数量		            $num = Dever::process('lib/api.run', true);		            if ($num >= 1000) {		                # 等会儿再执行		                sleep(60);		            }		            Dever::daemon('lib/api.run?id=' . $id, 'spider');				}			}			return true;		} catch (\Exception $e) {            return true;        }	}	public function run()	{		$id = Dever::input('id');		if (!$id) {			return false;		}		$config = Dever::load('spider/lib/project')->get($id);		if (!$config) {			return false;		}		$col = $this->col($config['id']);		$set = $this->set($config['id']);		if (strpos($config['site'], '{') !== false && strpos($config['site'], '}') !== false) {			$this->preg($config, $col, $set);		} else {			Dever::load('spider/lib/project')->set($config, 4, 1);			$this->parse($config['site'], $config['id'], $config['collect_rule'], $col, $set, $config['push']);		}		/*		Dever::task(function() use($config, $this)		{			$col = $this->col($config['id']);			$this->parse($config['url'], $config['id'], $config['collect_rule'], $col);		});		*/		Dever::load('spider/lib/project')->set($config, 2);	}	private function col($project)	{		return Dever::db('spider/col')->getList(['where_pid' => $project]);	}	private function set($project)	{		return Dever::db('spider/set')->getList(['where_pid' => $project]);	}	private function parse($url, $project, $rule, $col, $set, $push)	{		$parse = new Parse($url, $project, $rule, $col, $set, $push);		return $parse->get();	}	private function preg($config, $col, $set)	{		$pat = '/{(.*?)}/i';		preg_match_all($pat, $config['site'], $match);		if (isset($match[1][0]) && $match[1][0]) {			if ($config['page_num'] <= 0) $config['page_num'] = 100;			parse_str($match[1][0], $param);			$this->page($param, $match[0][0], $config, $col, $set);		}	}	private function page($param, $replace, $config, $col, $set)	{		if (isset($param['page']) && $param['page']) {			for ($i = $param['page']; $i <= $config['page_num']; $i++) {				$url = str_replace($replace, $i, $config['site']);				Dever::load('spider/lib/project')->set($config, 4, $i);				$this->parse($url, $config['id'], $config['collect_rule'], $col, $set, $config['push']);			}		} else {			Dever::load('spider/lib/project')->set($config, 4, 1);			$this->parse(str_replace($replace, '', $value), $config['id'], $config['collect_rule'], $col, $set, $config['push']);		}	}}
 |