Api.php 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. class Api
  5. {
  6. private $queue;
  7. public function add_api($id)
  8. {
  9. # 写入队列
  10. if (is_array($id)) {
  11. $config = $id;
  12. } else {
  13. $config = Dever::load('spider/lib/project')->get($id);
  14. }
  15. if (!$config) {
  16. Dever::alert('项目不存在');
  17. }
  18. if ($config['status'] <= 2) {
  19. Dever::load('spider/lib/project')->set($config, 3);
  20. Dever::load('spider/lib/queue')->push($config['id']);
  21. }
  22. return 'reload';
  23. }
  24. public function test_api($id)
  25. {
  26. //Dever::setInput('test', 1);
  27. $this->run($id);
  28. return 'reload';
  29. }
  30. # 守护进程 每分钟执行一次即可
  31. public function daemon()
  32. {
  33. # 查看进程是否存在
  34. $state = Dever::process('lib/api.cron', true);
  35. if ($state <= 0) {
  36. Dever::daemon('lib/api.cron', 'spider');
  37. }
  38. # 查看当前所有项目是否可以开始运行
  39. $data = Dever::load('spider/lib/project')->getAll();
  40. if ($data) {
  41. foreach ($data as $k => $v) {
  42. $this->add_api($v);
  43. }
  44. }
  45. }
  46. public function cron()
  47. {
  48. $this->queue = new Queue();
  49. //Dever::import('task');
  50. while (1) {
  51. $this->load();
  52. }
  53. }
  54. public function load()
  55. {
  56. try {
  57. $id = $this->queue->pop();
  58. if ($id) {
  59. $config = Dever::load('spider/lib/project')->get($id);
  60. if ($config) {
  61. # 推到后台运行
  62. # 获取当前执行的进程数量
  63. $num = Dever::process('lib/api.run', true);
  64. if ($num >= 1000) {
  65. # 等会儿再执行
  66. sleep(60);
  67. }
  68. Dever::daemon('lib/api.run?id=' . $id, 'spider');
  69. }
  70. }
  71. return true;
  72. } catch (\Exception $e) {
  73. return true;
  74. }
  75. }
  76. public function run()
  77. {
  78. $id = Dever::input('id');
  79. if (!$id) {
  80. return false;
  81. }
  82. $config = Dever::load('spider/lib/project')->get($id);
  83. if (!$config) {
  84. return false;
  85. }
  86. $cate = Dever::db('spider/cate')->find($config['cate_id']);
  87. if (!$cate) {
  88. return false;
  89. }
  90. $col = $this->col($config['id']);
  91. $set = $this->set($config['id']);
  92. $config['curl'] = array
  93. (
  94. 'request_type' => $config['request_type'],
  95. 'content_type' => $config['content_type'],
  96. 'header' => $config['header'],
  97. 'param' => $config['param'],
  98. );
  99. $site = Dever::split($config['site']);
  100. $config['site'] = $site[0];
  101. $config['page'] = '';
  102. if (!strstr($config['site'], 'http')) {
  103. $config['site'] = $cate['site'] . $config['site'];
  104. }
  105. if (isset($site[1]) && $site[1]) {
  106. $config['page'] = $site[1];
  107. }
  108. if ($cate['collect_rule'] && $cate['site']) {
  109. $rule = Dever::split($cate['collect_rule']);
  110. if (!isset($rule[1])) {
  111. $rule[1] = '';
  112. }
  113. $doc = Doc::getInstance($cate['site'], $rule[0]);
  114. $doc->log(new Log($id));
  115. $data = $doc->get($config['curl']);
  116. $data = Dever::json_decode($data);
  117. if ($data) {
  118. foreach ($data as $k => $v) {
  119. if (!$v) {
  120. continue;
  121. }
  122. if ($rule[1] && !strstr($v, $rule[1])) {
  123. continue;
  124. }
  125. $config['site'] = $v;
  126. $this->task($config, $col, $set, $v);
  127. }
  128. }
  129. } else {
  130. $this->task($config, $col, $set);
  131. }
  132. Dever::load('spider/lib/project')->set($config, 2);
  133. }
  134. private function task($config, $col, $set, $cate = false)
  135. {
  136. if (strpos($config['site'], '{cate=') !== false) {
  137. $pat = '/{cate=(.*?)}/i';
  138. preg_match_all($pat, $config['site'], $match);
  139. if (isset($match[1][0]) && $match[1][0]) {
  140. $cate = $cate ? $cate : $match[1][0];
  141. $config['site'] = str_replace($match[0][0], $cate, $config['site']);
  142. }
  143. }
  144. if ($config['page'] && strpos($config['page'], '{cate=') !== false) {
  145. $pat = '/{cate=(.*?)}/i';
  146. preg_match_all($pat, $config['page'], $match);
  147. if (isset($match[1][0]) && $match[1][0]) {
  148. $cate = $cate ? $cate : $match[1][0];
  149. $config['page'] = str_replace($match[0][0], $cate, $config['page']);
  150. }
  151. }
  152. if ($config['page'] && strpos($config['page'], '{page=') !== false) {
  153. $this->page($config['page'], 1, $config, $col, $set);
  154. } elseif (strpos($config['site'], '{page=') !== false) {
  155. $this->page($config['site'], 2, $config, $col, $set);
  156. } elseif ($config['param'] && strpos($config['param'], '{page=') !== false) {
  157. $this->page($config['param'], 3, $config, $col, $set);
  158. } else {
  159. Dever::load('spider/lib/project')->set($config, 4, 1);
  160. $this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
  161. }
  162. /*
  163. Dever::task(function() use($config, $this)
  164. {
  165. $col = $this->col($config['id']);
  166. $this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
  167. });
  168. */
  169. }
  170. private function col($project, $source = 1)
  171. {
  172. return Dever::db('spider/col')->getList(['where_pid' => $project]);
  173. }
  174. private function set($project)
  175. {
  176. return Dever::db('spider/set')->getList(['where_pid' => $project]);
  177. }
  178. private function parse($url, $project, $rule, $param, $col, $set, $push)
  179. {
  180. $parse = new Parse($url, $project, $rule, $param, $col, $set, $push);
  181. return $parse->get();
  182. }
  183. private function page($source, $type, $config, $col, $set)
  184. {
  185. $site = $config['site'];
  186. $pat = '/{page=(.*?)}/i';
  187. preg_match_all($pat, $source, $match);
  188. if (isset($match[1][0]) && $match[1][0]) {
  189. if ($config['page_num'] <= 0) $config['page_num'] = 100;
  190. //parse_str($match[1][0], $param);
  191. $page = $match[1][0];
  192. for ($i = $page; $i <= $config['page_num']; $i++) {
  193. $site_page = str_replace($match[0][0], $i, $source);
  194. Dever::load('spider/lib/project')->set($config, 4, $i);
  195. if ($type == 1) {
  196. if ($i == 1) {
  197. $site = $config['site'];
  198. } else {
  199. $site = $config['site'] . $site_page;
  200. }
  201. } elseif ($type == 2) {
  202. $site = $site_page;
  203. } else {
  204. $config['curl']['param'] = $site_page;
  205. }
  206. $this->parse($site, $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
  207. }
  208. }
  209. }
  210. }