| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 | 
							- <?php
 
- namespace Spider\Lib;
 
- use Dever;
 
- set_time_limit(0);
 
- class Api
 
- {
 
- 	private $queue;
 
- 	public function add_api($id)
 
- 	{
 
- 		# 写入队列
 
- 		if (is_array($id)) {
 
- 			$config = $id;
 
- 		} else {
 
- 			$config = Dever::load('spider/lib/project')->get($id);
 
- 		}
 
- 		
 
- 		if (!$config) {
 
- 			Dever::alert('项目不存在');
 
- 		}
 
- 		if ($config['status'] <= 2) {
 
- 			Dever::load('spider/lib/project')->set($config, 3);
 
- 			Dever::load('spider/lib/queue')->push($config['id']);
 
- 		}
 
- 		return 'reload';
 
- 	}
 
- 	public function test_api($id)
 
- 	{
 
- 		//Dever::setInput('test', 1);
 
- 		$this->run($id);
 
- 		return 'reload';
 
- 	}
 
- 	# 守护进程 每分钟执行一次即可
 
- 	public function daemon()
 
- 	{
 
- 		# 查看进程是否存在
 
- 		$state = Dever::process('lib/api.cron', true);
 
- 		if ($state <= 0) {
 
- 			Dever::daemon('lib/api.cron', 'spider');
 
- 		}
 
- 		# 查看当前所有项目是否可以开始运行
 
- 		$data = Dever::load('spider/lib/project')->getAll();
 
- 		if ($data) {
 
- 			foreach ($data as $k => $v) {
 
- 				$this->add_api($v);
 
- 			}
 
- 		}
 
- 	}
 
- 	public function cron()
 
- 	{
 
- 		$this->queue = new Queue();
 
- 		//Dever::import('task');
 
- 		while (1) {
 
- 			$this->load();
 
- 		}
 
- 	}
 
- 	public function load()
 
- 	{
 
- 		try {
 
- 			$id = $this->queue->pop();
 
- 			if ($id) {
 
- 				$config = Dever::load('spider/lib/project')->get($id);
 
- 				if ($config) {
 
- 					# 推到后台运行
 
- 					# 获取当前执行的进程数量
 
- 		            $num = Dever::process('lib/api.run', true);
 
- 		            if ($num >= 1000) {
 
- 		                # 等会儿再执行
 
- 		                sleep(60);
 
- 		            }
 
- 		            $num = Dever::process('lib/api.run?id=' . $id, true);
 
- 		            if ($num <= 0) {
 
- 		                Dever::daemon('lib/api.run?id=' . $id, 'spider');
 
- 		            }
 
- 				}
 
- 			}
 
- 			return true;
 
- 		} catch (\Exception $e) {
 
-             return true;
 
-         }
 
- 	}
 
- 	public function run()
 
- 	{
 
- 		$id = Dever::input('id');
 
- 		if (!$id) {
 
- 			return false;
 
- 		}
 
- 		$config = Dever::load('spider/lib/project')->get($id);
 
- 		if (!$config) {
 
- 			return false;
 
- 		}
 
- 		$cate = Dever::db('spider/cate')->find($config['cate_id']);
 
- 		if (!$cate) {
 
- 			return false;
 
- 		}
 
- 		$col = $this->col($config['id']);
 
- 		$set = $this->set($config['id']);
 
- 		$config['curl'] = array
 
- 		(
 
- 			'request_type' => $config['request_type'],
 
- 			'content_type' => $config['content_type'],
 
- 			'header' => $config['header'],
 
- 			'param' => $config['param'],
 
- 		);
 
- 		
 
- 		$site = Dever::split($config['site']);
 
- 		$config['site'] = $site[0];
 
- 		$config['page'] = '';
 
- 		if (!strstr($config['site'], 'http')) {
 
- 			$config['site'] = $cate['site'] . $config['site'];
 
- 		}
 
- 		if (isset($site[1]) && $site[1]) {
 
- 			$config['page'] = $site[1];
 
- 		}
 
- 		if ($cate['collect_rule'] && $cate['site']) {
 
- 			$rule = Dever::split($cate['collect_rule']);
 
- 			if (!isset($rule[1])) {
 
- 				$rule[1] = '';
 
- 			}
 
- 			$doc = Doc::getInstance($cate['site'], $rule[0]);
 
- 			$doc->log(new Log($id));
 
- 			$data = $doc->get($config['curl']);
 
- 			$data = Dever::json_decode($data);
 
- 			if ($data) {
 
- 				foreach ($data as $k => $v) {
 
- 					if (!$v) {
 
- 						continue;
 
- 					}
 
- 					if ($rule[1] && !strstr($v, $rule[1])) {
 
- 						continue;
 
- 					}
 
- 					$config['site'] = $v;
 
- 					$this->task($config, $col, $set, $v);
 
- 				}
 
- 			}
 
- 		} else {
 
- 			$this->task($config, $col, $set);
 
- 		}
 
- 		
 
- 		Dever::load('spider/lib/project')->set($config, 2);
 
- 	}
 
- 	private function task($config, $col, $set, $cate = false)
 
- 	{
 
- 		if (strpos($config['site'], '{cate=') !== false) {
 
- 			$pat = '/{cate=(.*?)}/i';
 
- 			preg_match_all($pat, $config['site'], $match);
 
- 			if (isset($match[1][0]) && $match[1][0]) {
 
- 				$cate = $cate ? $cate : $match[1][0];
 
- 				$config['site'] = str_replace($match[0][0], $cate, $config['site']);
 
- 			}
 
- 		}
 
- 		if ($config['page'] && strpos($config['page'], '{cate=') !== false) {
 
- 			$pat = '/{cate=(.*?)}/i';
 
- 			preg_match_all($pat, $config['page'], $match);
 
- 			if (isset($match[1][0]) && $match[1][0]) {
 
- 				$cate = $cate ? $cate : $match[1][0];
 
- 				$config['page'] = str_replace($match[0][0], $cate, $config['page']);
 
- 			}
 
- 		}
 
- 		if ($config['page'] && strpos($config['page'], '{page=') !== false) {
 
- 			$this->page($config['page'], 1, $config, $col, $set);
 
- 		} elseif (strpos($config['site'], '{page=') !== false) {
 
- 			$this->page($config['site'], 2, $config, $col, $set);
 
- 		} elseif ($config['param'] && strpos($config['param'], '{page=') !== false) {
 
- 			$this->page($config['param'], 3, $config, $col, $set);
 
- 		} else {
 
- 			Dever::load('spider/lib/project')->set($config, 4, 1);
 
- 			$this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
 
- 		}
 
- 		/*
 
- 		Dever::task(function() use($config, $this)
 
- 		{
 
- 			$col = $this->col($config['id']);
 
- 			$this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
 
- 		});
 
- 		*/
 
- 	}
 
- 	private function col($project, $source = 1)
 
- 	{
 
- 		return Dever::db('spider/col')->getList(['where_pid' => $project]);
 
- 	}
 
- 	private function set($project)
 
- 	{
 
- 		return Dever::db('spider/set')->getList(['where_pid' => $project]);
 
- 	}
 
- 	private function parse($url, $project, $rule, $param, $col, $set, $push)
 
- 	{
 
- 		$parse = new Parse($url, $project, $rule, $param, $col, $set, $push);
 
- 		return $parse->get();
 
- 	}
 
- 	private function page($source, $type, $config, $col, $set)
 
- 	{
 
- 		$site = $config['site'];
 
- 		$pat = '/{page=(.*?)}/i';
 
- 		preg_match_all($pat, $source, $match);
 
- 		if (isset($match[1][0]) && $match[1][0]) {
 
- 			if ($config['page_num'] <= 0) $config['page_num'] = 100;
 
- 			//parse_str($match[1][0], $param);
 
- 			$page = $match[1][0];
 
- 			for ($i = $page; $i <= $config['page_num']; $i++) {
 
- 				$site_page = str_replace($match[0][0], $i, $source);
 
- 				Dever::load('spider/lib/project')->set($config, 4, $i);
 
- 				if ($type == 1) {
 
- 					if ($i == 1) {
 
- 						$site = $config['site'];
 
- 					} else {
 
- 						$site = $config['site'] . $site_page;
 
- 					}
 
- 				} elseif ($type == 2) {
 
- 					$site = $site_page;
 
- 				} else {
 
- 					$config['curl']['param'] = $site_page;
 
- 				}
 
- 				$this->parse($site, $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
 
- 			}
 
- 		}
 
- 	}
 
- }
 
 
  |