Api.php 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. class Api
  5. {
  6. private $queue;
  7. public function add_api($id)
  8. {
  9. # 写入队列
  10. if (is_array($id)) {
  11. $config = $id;
  12. } else {
  13. $config = Dever::load('spider/lib/project')->get($id);
  14. }
  15. if (!$config) {
  16. Dever::alert('项目不存在');
  17. }
  18. if ($config['status'] <= 2) {
  19. Dever::load('spider/lib/project')->set($config, 3);
  20. Dever::load('spider/lib/queue')->push($config['id']);
  21. }
  22. return 'reload';
  23. }
  24. public function test_api($id)
  25. {
  26. Dever::setInput('test', 1);
  27. $this->run($id);
  28. return 'reload';
  29. }
  30. # 守护进程 每分钟执行一次即可
  31. public function daemon()
  32. {
  33. # 查看进程是否存在
  34. $state = Dever::process('lib/api.cron', true);
  35. if ($state <= 0) {
  36. Dever::daemon('lib/api.cron', 'spider');
  37. }
  38. # 查看当前所有项目是否可以开始运行
  39. $data = Dever::load('spider/lib/project')->getAll();
  40. if ($data) {
  41. foreach ($data as $k => $v) {
  42. $this->add_api($v);
  43. }
  44. }
  45. }
  46. public function cron()
  47. {
  48. $this->queue = new Queue();
  49. //Dever::import('task');
  50. while (1) {
  51. $this->load();
  52. }
  53. }
  54. public function load()
  55. {
  56. try {
  57. $id = $this->queue->pop();
  58. if ($id) {
  59. $config = Dever::load('spider/lib/project')->get($id);
  60. if ($config) {
  61. # 推到后台运行
  62. # 获取当前执行的进程数量
  63. $num = Dever::process('lib/api.run', true);
  64. if ($num >= 1000) {
  65. # 等会儿再执行
  66. sleep(60);
  67. }
  68. Dever::daemon('lib/api.run?id=' . $id, 'spider');
  69. }
  70. }
  71. return true;
  72. } catch (\Exception $e) {
  73. return true;
  74. }
  75. }
  76. public function run()
  77. {
  78. $id = Dever::input('id');
  79. if (!$id) {
  80. return false;
  81. }
  82. $config = Dever::load('spider/lib/project')->get($id);
  83. if (!$config) {
  84. return false;
  85. }
  86. $col = $this->col($config['id']);
  87. $set = $this->set($config['id']);
  88. $config['curl'] = array
  89. (
  90. 'request_type' => $config['request_type'],
  91. 'content_type' => $config['content_type'],
  92. 'header' => $config['header'],
  93. 'param' => $config['param'],
  94. );
  95. if (strpos($config['site'], '{page=') !== false) {
  96. $this->page($config['site'], 1, $config, $col, $set);
  97. } if (strpos($config['param'], '{page=') !== false) {
  98. $this->page($config['param'], 2, $config, $col, $set);
  99. } else {
  100. Dever::load('spider/lib/project')->set($config, 4, 1);
  101. $this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
  102. }
  103. /*
  104. Dever::task(function() use($config, $this)
  105. {
  106. $col = $this->col($config['id']);
  107. $this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
  108. });
  109. */
  110. Dever::load('spider/lib/project')->set($config, 2);
  111. }
  112. private function col($project, $source = 1)
  113. {
  114. return Dever::db('spider/col')->getList(['where_pid' => $project]);
  115. }
  116. private function set($project)
  117. {
  118. return Dever::db('spider/set')->getList(['where_pid' => $project]);
  119. }
  120. private function parse($url, $project, $rule, $param, $col, $set, $push)
  121. {
  122. $parse = new Parse($url, $project, $rule, $param, $col, $set, $push);
  123. return $parse->get();
  124. }
  125. private function page($source, $type, $config, $col, $set)
  126. {
  127. $pat = '/{page=(.*?)}/i';
  128. preg_match_all($pat, $source, $match);
  129. if (isset($match[1][0]) && $match[1][0]) {
  130. if ($config['page_num'] <= 0) $config['page_num'] = 100;
  131. //parse_str($match[1][0], $param);
  132. $page = $match[1][0];
  133. for ($i = $page; $i <= $config['page_num']; $i++) {
  134. $source = str_replace($match[0][0], $i, $source);
  135. Dever::load('spider/lib/project')->set($config, 4, $i);
  136. if ($type == 1) {
  137. if (strstr($source, '||')) {
  138. $temp = explode('||', $source);
  139. if ($i == 1) {
  140. $source = $temp[0];
  141. } else {
  142. $source = $temp[0] . $temp[1];
  143. }
  144. }
  145. $config['site'] = $source;
  146. } else {
  147. $config['curl']['param'] = $source;
  148. }
  149. $this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
  150. }
  151. }
  152. }
  153. }