get($id); } if (!$config) { Dever::alert('项目不存在'); } if ($config['status'] <= 2) { Dever::load('spider/lib/project')->set($config, 3); Dever::load('spider/lib/queue')->push($config['id']); } return 'reload'; } public function test_api($id) { Dever::setInput('test', 1); $this->run($id); return 'reload'; } # 守护进程 每分钟执行一次即可 public function daemon() { # 查看进程是否存在 $state = Dever::process('lib/api.cron', true); if ($state <= 0) { Dever::daemon('lib/api.cron', 'spider'); } # 查看当前所有项目是否可以开始运行 $data = Dever::load('spider/lib/project')->getAll(); if ($data) { foreach ($data as $k => $v) { $this->add_api($v); } } } public function cron() { $this->queue = new Queue(); //Dever::import('task'); while (1) { $this->load(); } } public function load() { try { $id = $this->queue->pop(); if ($id) { $config = Dever::load('spider/lib/project')->get($id); if ($config) { # 推到后台运行 # 获取当前执行的进程数量 $num = Dever::process('lib/api.run', true); if ($num >= 1000) { # 等会儿再执行 sleep(60); } Dever::daemon('lib/api.run?id=' . $id, 'spider'); } } return true; } catch (\Exception $e) { return true; } } public function run() { $id = Dever::input('id'); if (!$id) { return false; } $config = Dever::load('spider/lib/project')->get($id); if (!$config) { return false; } $col = $this->col($config['id']); $set = $this->set($config['id']); $config['curl'] = array ( 'request_type' => $config['request_type'], 'content_type' => $config['content_type'], 'header' => $config['header'], 'param' => $config['param'], ); if (strpos($config['site'], '{page=') !== false) { $this->page($config['site'], 1, $config, $col, $set); } if (strpos($config['param'], '{page=') !== false) { $this->page($config['param'], 2, $config, $col, $set); } else { Dever::load('spider/lib/project')->set($config, 4, 1); $this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']); } /* Dever::task(function() use($config, $this) { $col = $this->col($config['id']); $this->parse($config['url'], $config['id'], $config['collect_rule'], $col); }); */ Dever::load('spider/lib/project')->set($config, 2); } private function col($project) { return Dever::db('spider/col')->getList(['where_pid' => $project]); } private function set($project) { return Dever::db('spider/set')->getList(['where_pid' => $project]); } private function parse($url, $project, $rule, $param, $col, $set, $push) { $parse = new Parse($url, $project, $rule, $param, $col, $set, $push); return $parse->get(); } private function page($source, $type, $config, $col, $set) { $pat = '/{page=(.*?)}/i'; preg_match_all($pat, $source, $match); if (isset($match[1][0]) && $match[1][0]) { if ($config['page_num'] <= 0) $config['page_num'] = 100; //parse_str($match[1][0], $param); $page = $match[1][0]; for ($i = $page; $i <= $config['page_num']; $i++) { $source = str_replace($match[0][0], $i, $source); Dever::load('spider/lib/project')->set($config, 4, $i); if ($type == 1) { $config['site'] = $source; } else { $config['curl']['param'] = $source; } $this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']); } } } }