|
@@ -26,7 +26,7 @@ class Api
|
|
|
|
|
|
public function test_api($id)
|
|
|
{
|
|
|
- Dever::setInput('test', 1);
|
|
|
+
|
|
|
$this->run($id);
|
|
|
return 'reload';
|
|
|
}
|
|
@@ -91,6 +91,10 @@ class Api
|
|
|
if (!$config) {
|
|
|
return false;
|
|
|
}
|
|
|
+ $cate = Dever::db('spider/cate')->find($config['cate_id']);
|
|
|
+ if (!$cate) {
|
|
|
+ return false;
|
|
|
+ }
|
|
|
$col = $this->col($config['id']);
|
|
|
$set = $this->set($config['id']);
|
|
|
|
|
@@ -101,10 +105,77 @@ class Api
|
|
|
'header' => $config['header'],
|
|
|
'param' => $config['param'],
|
|
|
);
|
|
|
- if (strpos($config['site'], '{page=') !== false) {
|
|
|
- $this->page($config['site'], 1, $config, $col, $set);
|
|
|
- } if (strpos($config['param'], '{page=') !== false) {
|
|
|
- $this->page($config['param'], 2, $config, $col, $set);
|
|
|
+
|
|
|
+ $site = Dever::split($config['site']);
|
|
|
+
|
|
|
+ $config['site'] = $site[0];
|
|
|
+ $config['page'] = '';
|
|
|
+
|
|
|
+ if (!strstr($config['site'], 'http')) {
|
|
|
+ $config['site'] = $cate['site'] . $config['site'];
|
|
|
+ }
|
|
|
+
|
|
|
+ if (isset($site[1]) && $site[1]) {
|
|
|
+ $config['page'] = $site[1];
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($cate['collect_rule'] && $cate['site']) {
|
|
|
+ $rule = Dever::split($cate['collect_rule']);
|
|
|
+ if (!isset($rule[1])) {
|
|
|
+ $rule[1] = '';
|
|
|
+ }
|
|
|
+
|
|
|
+ $doc = Doc::getInstance($cate['site'], $rule[0]);
|
|
|
+
|
|
|
+ $doc->log(new Log($id));
|
|
|
+ $data = $doc->get($config['curl']);
|
|
|
+ $data = Dever::json_decode($data);
|
|
|
+ if ($data) {
|
|
|
+ foreach ($data as $k => $v) {
|
|
|
+ if (!$v) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if ($rule[1] && !strstr($v, $rule[1])) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ $config['site'] = $v;
|
|
|
+ $this->task($config, $col, $set, $v);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ $this->task($config, $col, $set);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ Dever::load('spider/lib/project')->set($config, 2);
|
|
|
+ }
|
|
|
+
|
|
|
+ private function task($config, $col, $set, $cate = false)
|
|
|
+ {
|
|
|
+ if (strpos($config['site'], '{cate=') !== false) {
|
|
|
+ $pat = '/{cate=(.*?)}/i';
|
|
|
+ preg_match_all($pat, $config['site'], $match);
|
|
|
+ if (isset($match[1][0]) && $match[1][0]) {
|
|
|
+ $cate = $cate ? $cate : $match[1][0];
|
|
|
+ $config['site'] = str_replace($match[0][0], $cate, $config['site']);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($config['page'] && strpos($config['page'], '{cate=') !== false) {
|
|
|
+ $pat = '/{cate=(.*?)}/i';
|
|
|
+ preg_match_all($pat, $config['page'], $match);
|
|
|
+ if (isset($match[1][0]) && $match[1][0]) {
|
|
|
+ $cate = $cate ? $cate : $match[1][0];
|
|
|
+ $config['page'] = str_replace($match[0][0], $cate, $config['page']);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($config['page'] && strpos($config['page'], '{page=') !== false) {
|
|
|
+ $this->page($config['page'], 1, $config, $col, $set);
|
|
|
+ } elseif (strpos($config['site'], '{page=') !== false) {
|
|
|
+ $this->page($config['site'], 2, $config, $col, $set);
|
|
|
+ } elseif ($config['param'] && strpos($config['param'], '{page=') !== false) {
|
|
|
+ $this->page($config['param'], 3, $config, $col, $set);
|
|
|
} else {
|
|
|
Dever::load('spider/lib/project')->set($config, 4, 1);
|
|
|
$this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
|
|
@@ -116,7 +187,6 @@ class Api
|
|
|
$this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
|
|
|
});
|
|
|
*/
|
|
|
- Dever::load('spider/lib/project')->set($config, 2);
|
|
|
}
|
|
|
|
|
|
private function col($project, $source = 1)
|
|
@@ -137,6 +207,7 @@ class Api
|
|
|
|
|
|
private function page($source, $type, $config, $col, $set)
|
|
|
{
|
|
|
+ $site = $config['site'];
|
|
|
$pat = '/{page=(.*?)}/i';
|
|
|
preg_match_all($pat, $source, $match);
|
|
|
if (isset($match[1][0]) && $match[1][0]) {
|
|
@@ -145,22 +216,20 @@ class Api
|
|
|
$page = $match[1][0];
|
|
|
|
|
|
for ($i = $page; $i <= $config['page_num']; $i++) {
|
|
|
- $source = str_replace($match[0][0], $i, $source);
|
|
|
+ $site_page = str_replace($match[0][0], $i, $source);
|
|
|
Dever::load('spider/lib/project')->set($config, 4, $i);
|
|
|
if ($type == 1) {
|
|
|
- if (strstr($source, '||')) {
|
|
|
- $temp = explode('||', $source);
|
|
|
- if ($i == 1) {
|
|
|
- $source = $temp[0];
|
|
|
- } else {
|
|
|
- $source = $temp[0] . $temp[1];
|
|
|
- }
|
|
|
+ if ($i == 1) {
|
|
|
+ $site = $config['site'];
|
|
|
+ } else {
|
|
|
+ $site = $config['site'] . $site_page;
|
|
|
}
|
|
|
- $config['site'] = $source;
|
|
|
+ } elseif ($type == 2) {
|
|
|
+ $site = $site_page;
|
|
|
} else {
|
|
|
- $config['curl']['param'] = $source;
|
|
|
+ $config['curl']['param'] = $site_page;
|
|
|
}
|
|
|
- $this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
|
|
|
+ $this->parse($site, $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
|
|
|
}
|
|
|
}
|
|
|
}
|