Api.php 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. class Api
  5. {
  6. private $queue;
  7. public function add_api($id)
  8. {
  9. # 写入队列
  10. if (is_array($id)) {
  11. $config = $id;
  12. } else {
  13. $config = Dever::load('spider/lib/project')->get($id);
  14. }
  15. if (!$config) {
  16. Dever::alert('项目不存在');
  17. }
  18. if ($config['status'] <= 2) {
  19. Dever::load('spider/lib/project')->set($config, 3);
  20. Dever::load('spider/lib/queue')->push($config['id']);
  21. }
  22. return 'reload';
  23. }
  24. public function test_api($id)
  25. {
  26. Dever::setInput('test', 1);
  27. $this->run($id);
  28. return 'reload';
  29. }
  30. # 守护进程 每分钟执行一次即可
  31. public function daemon()
  32. {
  33. # 查看进程是否存在
  34. $state = Dever::process('lib/api.cron', true);
  35. if ($state <= 0) {
  36. Dever::daemon('lib/api.cron', 'spider');
  37. }
  38. # 查看当前所有项目是否可以开始运行
  39. $data = Dever::load('spider/lib/project')->getAll();
  40. if ($data) {
  41. foreach ($data as $k => $v) {
  42. $this->add_api($v);
  43. }
  44. }
  45. }
  46. public function cron()
  47. {
  48. $this->queue = new Queue();
  49. //Dever::import('task');
  50. while (1) {
  51. $this->load();
  52. }
  53. }
  54. public function load()
  55. {
  56. try {
  57. $id = $this->queue->pop();
  58. if ($id) {
  59. $config = Dever::load('spider/lib/project')->get($id);
  60. if ($config) {
  61. # 推到后台运行
  62. # 获取当前执行的进程数量
  63. $num = Dever::process('lib/api.run', true);
  64. if ($num >= 1000) {
  65. # 等会儿再执行
  66. sleep(60);
  67. }
  68. $num = Dever::process('lib/api.run?id=' . $id, true);
  69. if ($num <= 0) {
  70. Dever::daemon('lib/api.run?id=' . $id, 'spider');
  71. }
  72. }
  73. }
  74. return true;
  75. } catch (\Exception $e) {
  76. return true;
  77. }
  78. }
  79. public function run()
  80. {
  81. $id = Dever::input('id');
  82. if (!$id) {
  83. return false;
  84. }
  85. $config = Dever::load('spider/lib/project')->get($id);
  86. if (!$config) {
  87. return false;
  88. }
  89. $cate = Dever::db('spider/cate')->find($config['cate_id']);
  90. if (!$cate) {
  91. return false;
  92. }
  93. $col = $this->col($config['id']);
  94. $set = $this->set($config['id']);
  95. $config['curl'] = array
  96. (
  97. 'request_type' => $config['request_type'],
  98. 'content_type' => $config['content_type'],
  99. 'header' => $config['header'],
  100. 'param' => $config['param'],
  101. );
  102. $site = Dever::split($config['site']);
  103. $config['site'] = $site[0];
  104. $config['page'] = '';
  105. if (!strstr($config['site'], 'http')) {
  106. $config['site'] = $cate['site'] . $config['site'];
  107. }
  108. if (isset($site[1]) && $site[1]) {
  109. $config['page'] = $site[1];
  110. }
  111. if ($cate['collect_rule'] && $cate['site']) {
  112. $rule = Dever::split($cate['collect_rule']);
  113. if (!isset($rule[1])) {
  114. $rule[1] = '';
  115. }
  116. $doc = Doc::getInstance($cate['site'], $rule[0]);
  117. $doc->log(new Log($id));
  118. $data = $doc->get($config['curl']);
  119. $data = Dever::json_decode($data);
  120. if ($data) {
  121. foreach ($data as $k => $v) {
  122. if (!$v) {
  123. continue;
  124. }
  125. if ($rule[1] && !strstr($v, $rule[1])) {
  126. continue;
  127. }
  128. $config['site'] = $v;
  129. $this->task($config, $col, $set, $v);
  130. }
  131. }
  132. } else {
  133. $this->task($config, $col, $set);
  134. }
  135. Dever::load('spider/lib/project')->set($config, 2);
  136. }
  137. private function task($config, $col, $set, $cate = false)
  138. {
  139. if (strpos($config['site'], '{cate=') !== false) {
  140. $pat = '/{cate=(.*?)}/i';
  141. preg_match_all($pat, $config['site'], $match);
  142. if (isset($match[1][0]) && $match[1][0]) {
  143. $cate = $cate ? $cate : $match[1][0];
  144. $config['site'] = str_replace($match[0][0], $cate, $config['site']);
  145. }
  146. }
  147. if ($config['page'] && strpos($config['page'], '{cate=') !== false) {
  148. $pat = '/{cate=(.*?)}/i';
  149. preg_match_all($pat, $config['page'], $match);
  150. if (isset($match[1][0]) && $match[1][0]) {
  151. $cate = $cate ? $cate : $match[1][0];
  152. $config['page'] = str_replace($match[0][0], $cate, $config['page']);
  153. }
  154. }
  155. if ($config['page'] && strpos($config['page'], '{page=') !== false) {
  156. $this->page($config['page'], 1, $config, $col, $set);
  157. } elseif (strpos($config['site'], '{page=') !== false) {
  158. $this->page($config['site'], 2, $config, $col, $set);
  159. } elseif ($config['param'] && strpos($config['param'], '{page=') !== false) {
  160. $this->page($config['param'], 3, $config, $col, $set);
  161. } else {
  162. Dever::load('spider/lib/project')->set($config, 4, 1);
  163. $this->parse($config['site'], $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
  164. }
  165. /*
  166. Dever::task(function() use($config, $this)
  167. {
  168. $col = $this->col($config['id']);
  169. $this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
  170. });
  171. */
  172. }
  173. private function col($project, $source = 1)
  174. {
  175. return Dever::db('spider/col')->getList(['where_pid' => $project]);
  176. }
  177. private function set($project)
  178. {
  179. return Dever::db('spider/set')->getList(['where_pid' => $project]);
  180. }
  181. private function parse($url, $project, $rule, $param, $col, $set, $push)
  182. {
  183. $parse = new Parse($url, $project, $rule, $param, $col, $set, $push);
  184. return $parse->get();
  185. }
  186. private function page($source, $type, $config, $col, $set)
  187. {
  188. $site = $config['site'];
  189. $pat = '/{page=(.*?)}/i';
  190. preg_match_all($pat, $source, $match);
  191. if (isset($match[1][0]) && $match[1][0]) {
  192. if ($config['page_num'] <= 0) $config['page_num'] = 100;
  193. //parse_str($match[1][0], $param);
  194. $page = $match[1][0];
  195. for ($i = $page; $i <= $config['page_num']; $i++) {
  196. $site_page = str_replace($match[0][0], $i, $source);
  197. Dever::load('spider/lib/project')->set($config, 4, $i);
  198. if ($type == 1) {
  199. if ($i == 1) {
  200. $site = $config['site'];
  201. } else {
  202. $site = $config['site'] . $site_page;
  203. }
  204. } elseif ($type == 2) {
  205. $site = $site_page;
  206. } else {
  207. $config['curl']['param'] = $site_page;
  208. }
  209. $this->parse($site, $config['id'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
  210. }
  211. }
  212. }
  213. }