Api.php 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. class Api
  5. {
  6. private $queue;
  7. public function add_api($id)
  8. {
  9. # 写入队列
  10. if (is_array($id)) {
  11. $config = $id;
  12. } else {
  13. $config = Dever::load('spider/lib/project')->get($id);
  14. }
  15. if (!$config) {
  16. Dever::alert('项目不存在');
  17. }
  18. if ($config['status'] <= 2) {
  19. Dever::load('spider/lib/project')->set($config, 3);
  20. Dever::load('spider/lib/queue')->push($config['id']);
  21. }
  22. return 'reload';
  23. }
  24. public function test_api($id)
  25. {
  26. Dever::setInput('test', 1);
  27. $this->run($id);
  28. return 'reload';
  29. }
  30. # 守护进程 每分钟执行一次即可
  31. public function daemon()
  32. {
  33. # 查看进程是否存在
  34. $state = Dever::process('lib/api.cron', true);
  35. if ($state <= 0) {
  36. Dever::daemon('lib/api.cron', 'spider');
  37. }
  38. # 查看当前所有项目是否可以开始运行
  39. $data = Dever::load('spider/lib/project')->getAll();
  40. if ($data) {
  41. foreach ($data as $k => $v) {
  42. $this->add_api($v);
  43. }
  44. }
  45. }
  46. public function cron()
  47. {
  48. $this->queue = new Queue();
  49. //Dever::import('task');
  50. while (1) {
  51. $this->load();
  52. }
  53. }
  54. public function load()
  55. {
  56. try {
  57. $id = $this->queue->pop();
  58. if ($id) {
  59. $config = Dever::load('spider/lib/project')->get($id);
  60. if ($config) {
  61. # 推到后台运行
  62. # 获取当前执行的进程数量
  63. $num = Dever::process('lib/api.run', true);
  64. if ($num >= 1000) {
  65. # 等会儿再执行
  66. sleep(60);
  67. }
  68. Dever::daemon('lib/api.run?id=' . $id, 'spider');
  69. }
  70. }
  71. return true;
  72. } catch (\Exception $e) {
  73. return true;
  74. }
  75. }
  76. public function run()
  77. {
  78. $id = Dever::input('id');
  79. if (!$id) {
  80. return false;
  81. }
  82. $config = Dever::load('spider/lib/project')->get($id);
  83. if (!$config) {
  84. return false;
  85. }
  86. $col = $this->col($config['id']);
  87. $set = $this->set($config['id']);
  88. if (strpos($config['site'], '{') !== false && strpos($config['site'], '}') !== false) {
  89. $this->preg($config, $col, $set);
  90. } else {
  91. Dever::load('spider/lib/project')->set($config, 4, 1);
  92. $this->parse($config['site'], $config['id'], $config['collect_rule'], $col, $set, $config['push']);
  93. }
  94. /*
  95. Dever::task(function() use($config, $this)
  96. {
  97. $col = $this->col($config['id']);
  98. $this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
  99. });
  100. */
  101. Dever::load('spider/lib/project')->set($config, 2);
  102. }
  103. private function col($project)
  104. {
  105. return Dever::db('spider/col')->getList(['where_pid' => $project]);
  106. }
  107. private function set($project)
  108. {
  109. return Dever::db('spider/set')->getList(['where_pid' => $project]);
  110. }
  111. private function parse($url, $project, $rule, $col, $set, $push)
  112. {
  113. $parse = new Parse($url, $project, $rule, $col, $set, $push);
  114. return $parse->get();
  115. }
  116. private function preg($config, $col, $set)
  117. {
  118. $pat = '/{(.*?)}/i';
  119. preg_match_all($pat, $config['site'], $match);
  120. if (isset($match[1][0]) && $match[1][0]) {
  121. if ($config['page_num'] <= 0) $config['page_num'] = 100;
  122. parse_str($match[1][0], $param);
  123. $this->page($param, $match[0][0], $config, $col, $set);
  124. }
  125. }
  126. private function page($param, $replace, $config, $col, $set)
  127. {
  128. if (isset($param['page']) && $param['page']) {
  129. for ($i = $param['page']; $i <= $config['page_num']; $i++) {
  130. $url = str_replace($replace, $i, $config['site']);
  131. Dever::load('spider/lib/project')->set($config, 4, $i);
  132. $this->parse($url, $config['id'], $config['collect_rule'], $col, $set, $config['push']);
  133. }
  134. } else {
  135. Dever::load('spider/lib/project')->set($config, 4, 1);
  136. $this->parse(str_replace($replace, '', $value), $config['id'], $config['collect_rule'], $col, $set, $config['push']);
  137. }
  138. }
  139. }