get($id); } if (!$config) { Dever::alert('项目不存在'); } if ($config['status'] <= 2) { Dever::load('spider/lib/project')->set($config, 3); Dever::load('spider/lib/queue')->push($config['id']); } return 'reload'; } public function test_api($id) { Dever::setInput('test', 1); $this->run($id); return 'reload'; } # 守护进程 每分钟执行一次即可 public function daemon() { # 查看进程是否存在 $state = Dever::process('lib/api.cron', true); if ($state <= 0) { Dever::daemon('lib/api.cron', 'spider'); } # 查看当前所有项目是否可以开始运行 $data = Dever::load('spider/lib/project')->getAll(); if ($data) { foreach ($data as $k => $v) { $this->add_api($v); } } } public function cron() { $this->queue = new Queue(); //Dever::import('task'); while (1) { $this->load(); } } public function load() { try { $id = $this->queue->pop(); if ($id) { $config = Dever::load('spider/lib/project')->get($id); if ($config) { # 推到后台运行 # 获取当前执行的进程数量 $num = Dever::process('lib/api.run', true); if ($num >= 1000) { # 等会儿再执行 sleep(60); } $num = Dever::process('lib/api.run?id=' . $id, true); if ($num <= 0) { Dever::daemon('lib/api.run?id=' . $id, 'spider'); } } } return true; } catch (\Exception $e) { return true; } } public function run() { $id = Dever::input('id'); if (!$id) { return false; } $config = Dever::load('spider/lib/project')->get($id); if (!$config) { return false; } $cate = Dever::db('spider/cate')->find($config['cate_id']); if (!$cate) { return false; } $col = $this->col($config['id']); $set = $this->set($config['id']); $config['curl'] = array ( 'request_type' => $config['request_type'], 'content_type' => $config['content_type'], 'header' => $config['header'], 'param' => $config['param'], ); $site = Dever::split($config['site']); $config['site'] = $site[0]; $config['page'] = ''; if (!strstr($config['site'], 'http')) { $config['site'] = $cate['site'] . $config['site']; } if (isset($site[1]) && $site[1]) { $config['page'] = $site[1]; } if ($cate['collect_rule'] && $cate['site']) { $rule = Dever::split($cate['collect_rule']); if (!isset($rule[1])) { $rule[1] = ''; } $doc = Doc::getInstance($cate['site'], $rule[0]); $doc->log(new Log($id)); $data = $doc->get($config['curl']); $data = Dever::json_decode($data); if ($data) { foreach ($data as $k => $v) { if (!$v) { continue; } if ($rule[1] && !strstr($v, $rule[1])) { continue; } $config['site'] = $v; $this->task($config, $col, $set, $v); } } } else { $this->task($config, $col, $set); } Dever::load('spider/lib/project')->set($config, 2); } private function task($config, $col, $set, $cate = false) { if (strpos($config['site'], '{cate=') !== false) { $pat = '/{cate=(.*?)}/i'; preg_match_all($pat, $config['site'], $match); if (isset($match[1][0]) && $match[1][0]) { $cate = $cate ? $cate : $match[1][0]; $config['site'] = str_replace($match[0][0], $cate, $config['site']); } } if ($config['page'] && strpos($config['page'], '{cate=') !== false) { $pat = '/{cate=(.*?)}/i'; preg_match_all($pat, $config['page'], $match); if (isset($match[1][0]) && $match[1][0]) { $cate = $cate ? $cate : $match[1][0]; $config['page'] = str_replace($match[0][0], $cate, $config['page']); } } if ($config['page'] && strpos($config['page'], '{page=') !== false) { $this->page($config['page'], 1, $config, $col, $set); } elseif (strpos($config['site'], '{page=') !== false) { $this->page($config['site'], 2, $config, $col, $set); } elseif ($config['param'] && strpos($config['param'], '{page=') !== false) { $this->page($config['param'], 3, $config, $col, $set); } else { Dever::load('spider/lib/project')->set($config, 4, 1); $this->parse($config['site'], $config['id'], $config['collect_list_rule'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']); } /* Dever::task(function() use($config, $this) { $col = $this->col($config['id']); $this->parse($config['url'], $config['id'], $config['collect_rule'], $col); }); */ } private function col($project, $source = 1) { return Dever::db('spider/col')->getList(['where_pid' => $project]); } private function set($project) { return Dever::db('spider/set')->getList(['where_pid' => $project]); } private function parse($url, $project, $list_rule, $rule, $param, $col, $set, $push) { $test = Dever::input('test'); if ($test == 1) { $parse = new Parse($url, $project, $list_rule, $rule, $param, $col, $set, $push); return $parse->get(); } try { $parse = new Parse($url, $project, $list_rule, $rule, $param, $col, $set, $push); return $parse->get(); } catch (\Exception $e) { return false; } } private function page($source, $type, $config, $col, $set) { $site = $config['site']; $pat = '/{page=(.*?)}/i'; preg_match_all($pat, $source, $match); if (isset($match[1][0]) && $match[1][0]) { if ($config['page_num'] <= 0) $config['page_num'] = 100; //parse_str($match[1][0], $param); $page = $match[1][0]; for ($i = $page; $i <= $config['page_num']; $i++) { $site_page = str_replace($match[0][0], $i, $source); Dever::load('spider/lib/project')->set($config, 4, $i); if ($type == 1) { if ($i == 1) { $site = $config['site']; } else { $site = $config['site'] . $site_page; } } elseif ($type == 2) { $site = $site_page; } else { $config['curl']['param'] = $site_page; } $this->parse($site, $config['id'], $config['collect_list_rule'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']); } } } }