123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153 |
- <?php
- namespace Spider\Lib;
- use Dever;
- class Api
- {
- private $queue;
- public function add_api($id)
- {
- # 写入队列
- if (is_array($id)) {
- $config = $id;
- } else {
- $config = Dever::load('spider/lib/project')->get($id);
- }
-
- if (!$config) {
- Dever::alert('项目不存在');
- }
- if ($config['status'] <= 2) {
- Dever::load('spider/lib/project')->set($config, 3);
- Dever::load('spider/lib/queue')->push($config['id']);
- }
- return 'reload';
- }
- public function test_api($id)
- {
- Dever::setInput('test', 1);
- $this->run($id);
- return 'reload';
- }
- # 守护进程 每分钟执行一次即可
- public function daemon()
- {
- # 查看进程是否存在
- $state = Dever::process('lib/api.cron', true);
- if ($state <= 0) {
- Dever::daemon('lib/api.cron', 'spider');
- }
- # 查看当前所有项目是否可以开始运行
- $data = Dever::load('spider/lib/project')->getAll();
- if ($data) {
- foreach ($data as $k => $v) {
- $this->add_api($v);
- }
- }
- }
- public function cron()
- {
- $this->queue = new Queue();
- //Dever::import('task');
- while (1) {
- $this->load();
- }
- }
- public function load()
- {
- try {
- $id = $this->queue->pop();
- if ($id) {
- $config = Dever::load('spider/lib/project')->get($id);
- if ($config) {
- # 推到后台运行
- # 获取当前执行的进程数量
- $num = Dever::process('lib/api.run', true);
- if ($num >= 1000) {
- # 等会儿再执行
- sleep(60);
- }
- Dever::daemon('lib/api.run?id=' . $id, 'spider');
- }
- }
- return true;
- } catch (\Exception $e) {
- return true;
- }
- }
- public function run()
- {
- $id = Dever::input('id');
- if (!$id) {
- return false;
- }
- $config = Dever::load('spider/lib/project')->get($id);
- if (!$config) {
- return false;
- }
- $col = $this->col($config['id']);
- $set = $this->set($config['id']);
- if (strpos($config['site'], '{') !== false && strpos($config['site'], '}') !== false) {
- $this->preg($config, $col, $set);
- } else {
- Dever::load('spider/lib/project')->set($config, 4, 1);
- $this->parse($config['site'], $config['id'], $config['collect_rule'], $col, $set, $config['push']);
- }
- /*
- Dever::task(function() use($config, $this)
- {
- $col = $this->col($config['id']);
- $this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
- });
- */
- Dever::load('spider/lib/project')->set($config, 2);
- }
- private function col($project)
- {
- return Dever::db('spider/col')->getList(['where_pid' => $project]);
- }
- private function set($project)
- {
- return Dever::db('spider/set')->getList(['where_pid' => $project]);
- }
- private function parse($url, $project, $rule, $col, $set, $push)
- {
- $parse = new Parse($url, $project, $rule, $col, $set, $push);
- return $parse->get();
- }
- private function preg($config, $col, $set)
- {
- $pat = '/{(.*?)}/i';
- preg_match_all($pat, $config['site'], $match);
- if (isset($match[1][0]) && $match[1][0]) {
- if ($config['page_num'] <= 0) $config['page_num'] = 100;
- parse_str($match[1][0], $param);
- $this->page($param, $match[0][0], $config, $col, $set);
- }
- }
- private function page($param, $replace, $config, $col, $set)
- {
- if (isset($param['page']) && $param['page']) {
- for ($i = $param['page']; $i <= $config['page_num']; $i++) {
- $url = str_replace($replace, $i, $config['site']);
- Dever::load('spider/lib/project')->set($config, 4, $i);
- $this->parse($url, $config['id'], $config['collect_rule'], $col, $set, $config['push']);
- }
- } else {
- Dever::load('spider/lib/project')->set($config, 4, 1);
- $this->parse(str_replace($replace, '', $value), $config['id'], $config['collect_rule'], $col, $set, $config['push']);
- }
- }
- }
|