Api.php 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. <?php
  2. namespace Spider\Lib;
  3. include(DEVER_APP_PATH . 'third/phpQuery.php');
  4. use Dever;
  5. use phpQuery;
  6. class Api
  7. {
  8. public function run_api($id, $ajax = false)
  9. {
  10. $project = new Project($id);
  11. $config = $project->get();
  12. $state = true;
  13. while ($state) {
  14. $url = $config['queue']->pop();
  15. if ($url) {
  16. $data = $this->download($url);
  17. $this->parse($data, $config);
  18. } else {
  19. $state = false;
  20. }
  21. }
  22. }
  23. private function download($url)
  24. {
  25. $data = Dever::curl($url);
  26. $encode = mb_detect_encoding($data, array('GB2312','GBK','UTF-8'));
  27. if ($encode == 'GB2312' || $encode == 'GBK' || $encode == 'EUC-CN') {
  28. $data = \iconv('GBK', 'UTF-8', $data);
  29. }
  30. if ($encode == 'CP936') {
  31. $data = \iconv('SJIS', 'UTF-8', $data);
  32. }
  33. $data = str_replace(PHP_EOL, '', $data);
  34. return $data;
  35. }
  36. private function parse($data, $config)
  37. {
  38. $jq = phpQuery::newDocumentHTML($data);
  39. $cmd = '$jq = ' . $config['collect_rule'] . ';';
  40. eval($cmd);
  41. $data = array();
  42. foreach ($config['col'] as $v) {
  43. $result = $jq->html();
  44. $rule = explode("\n", $v['collect_rule']);
  45. if (isset($rule[0]) && $rule[0]) {
  46. $cmd = '$result = ' . $rule[0] . ';';
  47. eval($cmd);
  48. }
  49. if (isset($rule[1]) && $rule[1]) {
  50. $temp = explode('||', $rule[1]);
  51. $index = isset($temp[1]) ? $temp[1] : 1;
  52. preg_match_all('/' . $temp[0] . '/i', $result, $match);
  53. $result = $match[$index][0];
  54. }
  55. $data[$v['key']] = $result;
  56. }
  57. print_r($data);die;
  58. }
  59. }