Api.php 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. class Api
  5. {
  6. private $queue;
  7. public function add_api($id)
  8. {
  9. # 写入队列
  10. $config = Dever::load('spider/lib/project')->get($id);
  11. if (!$config) {
  12. Dever::alert('项目不存在');
  13. }
  14. Dever::load('spider/lib/project')->set($config, 3);
  15. Dever::load('spider/lib/queue')->push($config['id']);
  16. return 'yes';
  17. }
  18. public function test_api($id)
  19. {
  20. Dever::setInput('test', 1);
  21. $this->run($id);
  22. return 'yes';
  23. }
  24. public function cron()
  25. {
  26. if (!$this->queue) {
  27. $this->queue = new Queue();
  28. }
  29. Dever::import('task');
  30. while (1) {
  31. $this->load();
  32. }
  33. }
  34. private function load()
  35. {
  36. try {
  37. $id = $this->queue->pop();
  38. if ($id) {
  39. $config = Dever::load('spider/lib/project')->get($id);
  40. if ($config && $config['status'] <= 2 && time() >= $config['sdate']) {
  41. # 推到后台运行
  42. # 获取当前执行的进程数量
  43. $num = $this->getNum();
  44. if ($num >= 1000) {
  45. # 等会儿再执行
  46. sleep(60);
  47. }
  48. $this->popen($id);
  49. }
  50. }
  51. return true;
  52. } catch (\Exception $e) {
  53. return true;
  54. }
  55. }
  56. public function run($id)
  57. {
  58. $config = Dever::load('spider/lib/project')->get($id);
  59. if (!$config) {
  60. return false;
  61. }
  62. # 此处开task
  63. $col = $this->col($config['id']);
  64. if (strpos($config['site'], '{') !== false && strpos($config['site'], '}') !== false) {
  65. $this->preg($config, $col);
  66. } else {
  67. Dever::load('spider/lib/project')->set($config, 4, 1);
  68. $this->parse($config['site'], $config['id'], $config['collect_rule'], $col);
  69. }
  70. /*
  71. Dever::task(function() use($config, $this)
  72. {
  73. $col = $this->col($config['id']);
  74. $this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
  75. });
  76. */
  77. Dever::load('spider/lib/project')->set($config, 2);
  78. }
  79. # 将数据推到子进程处理
  80. public function popen($id)
  81. {
  82. $path = Dever::db('spider/project')->config['path'] . 'daemon/run.php';
  83. $command = 'php '.$path.' -send id=' . $id . ' 1>/dev/null 2>&1 &';
  84. exec($command);
  85. }
  86. # 获取当前执行的子进程数量
  87. public function getNum()
  88. {
  89. $command = 'ps -ef | grep gateway/api.task/runOne | grep -v grep | wc -l';
  90. $num = exec($command);
  91. return $num;
  92. }
  93. private function col($project)
  94. {
  95. return Dever::db('spider/col')->getList(['where_pid' => $project]);
  96. }
  97. private function parse($url, $project, $rule, $col)
  98. {
  99. $parse = new Parse($url, $project, $rule, $col);
  100. return $parse->get();
  101. }
  102. private function preg($config, $col)
  103. {
  104. $pat = '/{(.*?)}/i';
  105. preg_match_all($pat, $config['site'], $match);
  106. if (isset($match[1][0]) && $match[1][0]) {
  107. if ($config['page_num'] <= 0) $config['page_num'] = 100;
  108. parse_str($match[1][0], $param);
  109. $this->page($param, $match[0][0], $config, $col);
  110. }
  111. }
  112. private function page($param, $replace, $config, $col)
  113. {
  114. if (isset($param['page']) && $param['page']) {
  115. for ($i = $param['page']; $i <= $config['page_num']; $i++) {
  116. $url = str_replace($replace, $i, $config['site']);
  117. Dever::load('spider/lib/project')->set($config, 4, $i);
  118. $this->parse($url, $config['id'], $config['collect_rule'], $col);
  119. }
  120. } else {
  121. Dever::load('spider/lib/project')->set($config, 4, 1);
  122. $this->parse(str_replace($replace, '', $value), $config['id'], $config['collect_rule'], $col);
  123. }
  124. }
  125. }