|
- <?php
- namespace Spider\Lib;
- use Dever;
- set_time_limit(0);
- class Api
- {
- private $queue;
- public function add_api($id)
- {
- # 写入队列
- if (is_array($id)) {
- $config = $id;
- } else {
- $config = Dever::load('spider/lib/project')->get($id);
- }
-
- if (!$config) {
- Dever::alert('项目不存在');
- }
- if ($config['status'] <= 2) {
- Dever::load('spider/lib/project')->set($config, 3);
- Dever::load('spider/lib/queue')->push($config['id']);
- }
- return 'reload';
- }
- public function test_api($id)
- {
- Dever::setInput('test', 1);
- $this->run($id);
- return 'reload';
- }
- # 守护进程 每分钟执行一次即可
- public function daemon()
- {
- # 查看进程是否存在
- $state = Dever::process('lib/api.cron', true);
- if ($state <= 0) {
- Dever::daemon('lib/api.cron', 'spider');
- }
- # 查看当前所有项目是否可以开始运行
- $data = Dever::load('spider/lib/project')->getAll();
- if ($data) {
- foreach ($data as $k => $v) {
- $this->add_api($v);
- }
- }
- }
- public function cron()
- {
- $this->queue = new Queue();
- //Dever::import('task');
- while (1) {
- $this->load();
- }
- }
- public function load()
- {
- try {
- $id = $this->queue->pop();
- if ($id) {
- $config = Dever::load('spider/lib/project')->get($id);
- if ($config) {
- # 推到后台运行
- # 获取当前执行的进程数量
- $num = Dever::process('lib/api.run', true);
- if ($num >= 1000) {
- # 等会儿再执行
- sleep(60);
- }
- $num = Dever::process('lib/api.run?id=' . $id, true);
- if ($num <= 0) {
- Dever::daemon('lib/api.run?id=' . $id, 'spider');
- }
- }
- }
- return true;
- } catch (\Exception $e) {
- return true;
- }
- }
- public function run()
- {
- $id = Dever::input('id');
- if (!$id) {
- return false;
- }
- $config = Dever::load('spider/lib/project')->get($id);
- if (!$config) {
- return false;
- }
- $cate = Dever::db('spider/cate')->find($config['cate_id']);
- if (!$cate) {
- return false;
- }
- $col = $this->col($config['id']);
- $set = $this->set($config['id']);
- $config['curl'] = array
- (
- 'request_type' => $config['request_type'],
- 'content_type' => $config['content_type'],
- 'header' => $config['header'],
- 'param' => $config['param'],
- );
-
- $site = Dever::split($config['site']);
- $config['site'] = $site[0];
- $config['page'] = '';
- if (!strstr($config['site'], 'http')) {
- $config['site'] = $cate['site'] . $config['site'];
- }
- if (isset($site[1]) && $site[1]) {
- $config['page'] = $site[1];
- }
- if ($cate['collect_rule'] && $cate['site']) {
- $rule = Dever::split($cate['collect_rule']);
- if (!isset($rule[1])) {
- $rule[1] = '';
- }
- $doc = Doc::getInstance($cate['site'], $rule[0]);
- $doc->log(new Log($id));
- $data = $doc->get($config['curl']);
- $data = Dever::json_decode($data);
- if ($data) {
- foreach ($data as $k => $v) {
- if (!$v) {
- continue;
- }
- if ($rule[1] && !strstr($v, $rule[1])) {
- continue;
- }
- $config['site'] = $v;
- $this->task($config, $col, $set, $v);
- }
- }
- } else {
- $this->task($config, $col, $set);
- }
-
- Dever::load('spider/lib/project')->set($config, 2);
- }
- private function task($config, $col, $set, $cate = false)
- {
- if (strpos($config['site'], '{cate=') !== false) {
- $pat = '/{cate=(.*?)}/i';
- preg_match_all($pat, $config['site'], $match);
- if (isset($match[1][0]) && $match[1][0]) {
- $cate = $cate ? $cate : $match[1][0];
- $config['site'] = str_replace($match[0][0], $cate, $config['site']);
- }
- }
- if ($config['page'] && strpos($config['page'], '{cate=') !== false) {
- $pat = '/{cate=(.*?)}/i';
- preg_match_all($pat, $config['page'], $match);
- if (isset($match[1][0]) && $match[1][0]) {
- $cate = $cate ? $cate : $match[1][0];
- $config['page'] = str_replace($match[0][0], $cate, $config['page']);
- }
- }
- if ($config['page'] && strpos($config['page'], '{page=') !== false) {
- $this->page($config['page'], 1, $config, $col, $set);
- } elseif (strpos($config['site'], '{page=') !== false) {
- $this->page($config['site'], 2, $config, $col, $set);
- } elseif ($config['param'] && strpos($config['param'], '{page=') !== false) {
- $this->page($config['param'], 3, $config, $col, $set);
- } else {
- Dever::load('spider/lib/project')->set($config, 4, 1);
- $this->parse($config['site'], $config['id'], $config['collect_list_rule'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
- }
- /*
- Dever::task(function() use($config, $this)
- {
- $col = $this->col($config['id']);
- $this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
- });
- */
- }
- private function col($project, $source = 1)
- {
- return Dever::db('spider/col')->getList(['where_pid' => $project]);
- }
- private function set($project)
- {
- return Dever::db('spider/set')->getList(['where_pid' => $project]);
- }
- private function parse($url, $project, $list_rule, $rule, $param, $col, $set, $push)
- {
- $test = Dever::input('test');
- if ($test == 1) {
- $parse = new Parse($url, $project, $list_rule, $rule, $param, $col, $set, $push);
- return $parse->get();
- }
- try {
- $parse = new Parse($url, $project, $list_rule, $rule, $param, $col, $set, $push);
- return $parse->get();
- } catch (\Exception $e) {
- return false;
- }
- }
- private function page($source, $type, $config, $col, $set)
- {
- $site = $config['site'];
- $pat = '/{page=(.*?)}/i';
- preg_match_all($pat, $source, $match);
- if (isset($match[1][0]) && $match[1][0]) {
- if ($config['page_num'] <= 0) $config['page_num'] = 100;
- //parse_str($match[1][0], $param);
- $page = $match[1][0];
- for ($i = $page; $i <= $config['page_num']; $i++) {
- $site_page = str_replace($match[0][0], $i, $source);
- Dever::load('spider/lib/project')->set($config, 4, $i);
- if ($type == 1) {
- if ($i == 1) {
- $site = $config['site'];
- } else {
- $site = $config['site'] . $site_page;
- }
- } elseif ($type == 2) {
- $site = $site_page;
- } else {
- $config['curl']['param'] = $site_page;
- }
- $this->parse($site, $config['id'], $config['collect_list_rule'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
- }
- }
- }
- }
|