get($id); if (!$config) { Dever::alert('项目不存在'); } Dever::load('spider/lib/project')->set($config, 3); Dever::load('spider/lib/queue')->push($config['id']); return 'yes'; } public function test_api($id) { Dever::setInput('test', 1); $this->run($id); return 'yes'; } public function cron() { if (!$this->queue) { $this->queue = new Queue(); } Dever::import('task'); while (1) { $this->load(); } } private function load() { try { $id = $this->queue->pop(); if ($id) { $config = Dever::load('spider/lib/project')->get($id); if ($config && $config['status'] <= 2 && time() >= $config['sdate']) { # 推到后台运行 # 获取当前执行的进程数量 $num = $this->getNum(); if ($num >= 1000) { # 等会儿再执行 sleep(60); } $this->popen($id); } } return true; } catch (\Exception $e) { return true; } } public function run($id) { $config = Dever::load('spider/lib/project')->get($id); if (!$config) { return false; } # 此处开task $col = $this->col($config['id']); if (strpos($config['site'], '{') !== false && strpos($config['site'], '}') !== false) { $this->preg($config, $col); } else { Dever::load('spider/lib/project')->set($config, 4, 1); $this->parse($config['site'], $config['id'], $config['collect_rule'], $col); } /* Dever::task(function() use($config, $this) { $col = $this->col($config['id']); $this->parse($config['url'], $config['id'], $config['collect_rule'], $col); }); */ Dever::load('spider/lib/project')->set($config, 2); } # 将数据推到子进程处理 public function popen($id) { $path = Dever::db('spider/project')->config['path'] . 'daemon/run.php'; $command = 'php '.$path.' -send id=' . $id . ' 1>/dev/null 2>&1 &'; exec($command); } # 获取当前执行的子进程数量 public function getNum() { $command = 'ps -ef | grep gateway/api.task/runOne | grep -v grep | wc -l'; $num = exec($command); return $num; } private function col($project) { return Dever::db('spider/col')->getList(['where_pid' => $project]); } private function parse($url, $project, $rule, $col) { $parse = new Parse($url, $project, $rule, $col); return $parse->get(); } private function preg($config, $col) { $pat = '/{(.*?)}/i'; preg_match_all($pat, $config['site'], $match); if (isset($match[1][0]) && $match[1][0]) { if ($config['page_num'] <= 0) $config['page_num'] = 100; parse_str($match[1][0], $param); $this->page($param, $match[0][0], $config, $col); } } private function page($param, $replace, $config, $col) { if (isset($param['page']) && $param['page']) { for ($i = $param['page']; $i <= $config['page_num']; $i++) { $url = str_replace($replace, $i, $config['site']); Dever::load('spider/lib/project')->set($config, 4, $i); $this->parse($url, $config['id'], $config['collect_rule'], $col); } } else { Dever::load('spider/lib/project')->set($config, 4, 1); $this->parse(str_replace($replace, '', $value), $config['id'], $config['collect_rule'], $col); } } }