1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- <?php
- namespace Spider\Lib;
- include(DEVER_APP_PATH . 'third/phpQuery.php');
- use Dever;
- use phpQuery;
- class Api
- {
- public function run_api($id, $ajax = false)
- {
- $project = new Project($id);
- $config = $project->get();
- $state = true;
- while ($state) {
- $url = $config['queue']->pop();
- if ($url) {
- $data = $this->download($url);
- $this->parse($data, $config);
- } else {
- $state = false;
- }
- }
- }
- private function download($url)
- {
- $data = Dever::curl($url);
- $encode = mb_detect_encoding($data, array('GB2312','GBK','UTF-8'));
- if ($encode == 'GB2312' || $encode == 'GBK' || $encode == 'EUC-CN') {
- $data = \iconv('GBK', 'UTF-8', $data);
- }
- if ($encode == 'CP936') {
- $data = \iconv('SJIS', 'UTF-8', $data);
- }
- $data = str_replace(PHP_EOL, '', $data);
- return $data;
- }
- private function parse($data, $config)
- {
- $jq = phpQuery::newDocumentHTML($data);
- $cmd = '$jq = ' . $config['collect_rule'] . ';';
- eval($cmd);
- $data = array();
- foreach ($config['col'] as $v) {
- $result = $jq->html();
- $rule = explode("\n", $v['collect_rule']);
- if (isset($rule[0]) && $rule[0]) {
- $cmd = '$result = ' . $rule[0] . ';';
- eval($cmd);
- }
-
- if (isset($rule[1]) && $rule[1]) {
- $temp = explode('||', $rule[1]);
- $index = isset($temp[1]) ? $temp[1] : 1;
- preg_match_all('/' . $temp[0] . '/i', $result, $match);
- $result = $match[$index][0];
- }
-
- $data[$v['key']] = $result;
- }
- print_r($data);die;
- }
- }
|