|
@@ -0,0 +1,65 @@
|
|
|
+<?php
|
|
|
+
|
|
|
+namespace Spider\Lib;
|
|
|
+include(DEVER_APP_PATH . 'third/phpQuery.php');
|
|
|
+use Dever;
|
|
|
+use phpQuery;
|
|
|
+
|
|
|
+class Api
|
|
|
+{
|
|
|
+ public function run_api($id, $ajax = false)
|
|
|
+ {
|
|
|
+ $project = new Project($id);
|
|
|
+ $config = $project->get();
|
|
|
+ $state = true;
|
|
|
+ while ($state) {
|
|
|
+ $url = $config['queue']->pop();
|
|
|
+ if ($url) {
|
|
|
+ $data = $this->download($url);
|
|
|
+ $this->parse($data, $config);
|
|
|
+ } else {
|
|
|
+ $state = false;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private function download($url)
|
|
|
+ {
|
|
|
+ $data = Dever::curl($url);
|
|
|
+ $encode = mb_detect_encoding($data, array('GB2312','GBK','UTF-8'));
|
|
|
+ if ($encode == 'GB2312' || $encode == 'GBK' || $encode == 'EUC-CN') {
|
|
|
+ $data = \iconv('GBK', 'UTF-8', $data);
|
|
|
+ }
|
|
|
+ if ($encode == 'CP936') {
|
|
|
+ $data = \iconv('SJIS', 'UTF-8', $data);
|
|
|
+ }
|
|
|
+ $data = str_replace(PHP_EOL, '', $data);
|
|
|
+ return $data;
|
|
|
+ }
|
|
|
+
|
|
|
+ private function parse($data, $config)
|
|
|
+ {
|
|
|
+ $jq = phpQuery::newDocumentHTML($data);
|
|
|
+ $cmd = '$jq = ' . $config['collect_rule'] . ';';
|
|
|
+ eval($cmd);
|
|
|
+ $data = array();
|
|
|
+ foreach ($config['col'] as $v) {
|
|
|
+ $result = $jq->html();
|
|
|
+ $rule = explode("\n", $v['collect_rule']);
|
|
|
+ if (isset($rule[0]) && $rule[0]) {
|
|
|
+ $cmd = '$result = ' . $rule[0] . ';';
|
|
|
+ eval($cmd);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (isset($rule[1]) && $rule[1]) {
|
|
|
+ $temp = explode('||', $rule[1]);
|
|
|
+ $index = isset($temp[1]) ? $temp[1] : 1;
|
|
|
+ preg_match_all('/' . $temp[0] . '/i', $result, $match);
|
|
|
+ $result = $match[$index][0];
|
|
|
+ }
|
|
|
+
|
|
|
+ $data[$v['key']] = $result;
|
|
|
+ }
|
|
|
+ print_r($data);die;
|
|
|
+ }
|
|
|
+}
|