Api.php 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. set_time_limit(0);
  5. class Api
  6. {
  7. private $queue;
  8. public function add_api($id)
  9. {
  10. # 写入队列
  11. if (is_array($id)) {
  12. $config = $id;
  13. } else {
  14. $config = Dever::load('spider/lib/project')->get($id);
  15. }
  16. if (!$config) {
  17. Dever::alert('项目不存在');
  18. }
  19. if ($config['status'] <= 2) {
  20. Dever::load('spider/lib/project')->set($config, 3);
  21. Dever::load('spider/lib/queue')->push($config['id']);
  22. }
  23. return 'reload';
  24. }
  25. public function test_api($id)
  26. {
  27. Dever::setInput('test', 1);
  28. $this->run($id);
  29. return 'reload';
  30. }
  31. # 守护进程 每分钟执行一次即可
  32. public function daemon()
  33. {
  34. # 查看进程是否存在
  35. $state = Dever::process('lib/api.cron', true);
  36. if ($state <= 0) {
  37. Dever::daemon('lib/api.cron', 'spider');
  38. }
  39. # 查看当前所有项目是否可以开始运行
  40. $data = Dever::load('spider/lib/project')->getAll();
  41. if ($data) {
  42. foreach ($data as $k => $v) {
  43. $this->add_api($v);
  44. }
  45. }
  46. }
  47. public function cron()
  48. {
  49. $this->queue = new Queue();
  50. //Dever::import('task');
  51. while (1) {
  52. $this->load();
  53. }
  54. }
  55. public function load()
  56. {
  57. try {
  58. $id = $this->queue->pop();
  59. if ($id) {
  60. $config = Dever::load('spider/lib/project')->get($id);
  61. if ($config) {
  62. # 推到后台运行
  63. # 获取当前执行的进程数量
  64. $num = Dever::process('lib/api.run', true);
  65. if ($num >= 1000) {
  66. # 等会儿再执行
  67. sleep(60);
  68. }
  69. $num = Dever::process('lib/api.run?id=' . $id, true);
  70. if ($num <= 0) {
  71. Dever::daemon('lib/api.run?id=' . $id, 'spider');
  72. }
  73. }
  74. }
  75. return true;
  76. } catch (\Exception $e) {
  77. return true;
  78. }
  79. }
  80. public function run()
  81. {
  82. $id = Dever::input('id');
  83. if (!$id) {
  84. return false;
  85. }
  86. $config = Dever::load('spider/lib/project')->get($id);
  87. if (!$config) {
  88. return false;
  89. }
  90. $cate = Dever::db('spider/cate')->find($config['cate_id']);
  91. if (!$cate) {
  92. return false;
  93. }
  94. $col = $this->col($config['id']);
  95. $set = $this->set($config['id']);
  96. $config['curl'] = array
  97. (
  98. 'request_type' => $config['request_type'],
  99. 'content_type' => $config['content_type'],
  100. 'header' => $config['header'],
  101. 'param' => $config['param'],
  102. );
  103. $site = Dever::split($config['site']);
  104. $config['site'] = $site[0];
  105. $config['page'] = '';
  106. if (!strstr($config['site'], 'http')) {
  107. $config['site'] = $cate['site'] . $config['site'];
  108. }
  109. if (isset($site[1]) && $site[1]) {
  110. $config['page'] = $site[1];
  111. }
  112. if ($cate['collect_rule'] && $cate['site']) {
  113. $rule = Dever::split($cate['collect_rule']);
  114. if (!isset($rule[1])) {
  115. $rule[1] = '';
  116. }
  117. $doc = Doc::getInstance($cate['site'], $rule[0]);
  118. $doc->log(new Log($id));
  119. $data = $doc->get($config['curl']);
  120. $data = Dever::json_decode($data);
  121. if ($data) {
  122. foreach ($data as $k => $v) {
  123. if (!$v) {
  124. continue;
  125. }
  126. if ($rule[1] && !strstr($v, $rule[1])) {
  127. continue;
  128. }
  129. $config['site'] = $v;
  130. $this->task($config, $col, $set, $v);
  131. }
  132. }
  133. } else {
  134. $this->task($config, $col, $set);
  135. }
  136. Dever::load('spider/lib/project')->set($config, 2);
  137. }
  138. private function task($config, $col, $set, $cate = false)
  139. {
  140. if (strpos($config['site'], '{cate=') !== false) {
  141. $pat = '/{cate=(.*?)}/i';
  142. preg_match_all($pat, $config['site'], $match);
  143. if (isset($match[1][0]) && $match[1][0]) {
  144. $cate = $cate ? $cate : $match[1][0];
  145. $config['site'] = str_replace($match[0][0], $cate, $config['site']);
  146. }
  147. }
  148. if ($config['page'] && strpos($config['page'], '{cate=') !== false) {
  149. $pat = '/{cate=(.*?)}/i';
  150. preg_match_all($pat, $config['page'], $match);
  151. if (isset($match[1][0]) && $match[1][0]) {
  152. $cate = $cate ? $cate : $match[1][0];
  153. $config['page'] = str_replace($match[0][0], $cate, $config['page']);
  154. }
  155. }
  156. if ($config['page'] && strpos($config['page'], '{page=') !== false) {
  157. $this->page($config['page'], 1, $config, $col, $set);
  158. } elseif (strpos($config['site'], '{page=') !== false) {
  159. $this->page($config['site'], 2, $config, $col, $set);
  160. } elseif ($config['param'] && strpos($config['param'], '{page=') !== false) {
  161. $this->page($config['param'], 3, $config, $col, $set);
  162. } else {
  163. Dever::load('spider/lib/project')->set($config, 4, 1);
  164. $this->parse($config['site'], $config['id'], $config['collect_list_rule'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
  165. }
  166. /*
  167. Dever::task(function() use($config, $this)
  168. {
  169. $col = $this->col($config['id']);
  170. $this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
  171. });
  172. */
  173. }
  174. private function col($project, $source = 1)
  175. {
  176. return Dever::db('spider/col')->getList(['where_pid' => $project]);
  177. }
  178. private function set($project)
  179. {
  180. return Dever::db('spider/set')->getList(['where_pid' => $project]);
  181. }
  182. private function parse($url, $project, $list_rule, $rule, $param, $col, $set, $push)
  183. {
  184. $test = Dever::input('test');
  185. if ($test == 1) {
  186. $parse = new Parse($url, $project, $list_rule, $rule, $param, $col, $set, $push);
  187. return $parse->get();
  188. }
  189. try {
  190. $parse = new Parse($url, $project, $list_rule, $rule, $param, $col, $set, $push);
  191. return $parse->get();
  192. } catch (\Exception $e) {
  193. return false;
  194. }
  195. }
  196. private function page($source, $type, $config, $col, $set)
  197. {
  198. $site = $config['site'];
  199. $pat = '/{page=(.*?)}/i';
  200. preg_match_all($pat, $source, $match);
  201. if (isset($match[1][0]) && $match[1][0]) {
  202. if ($config['page_num'] <= 0) $config['page_num'] = 100;
  203. //parse_str($match[1][0], $param);
  204. $page = $match[1][0];
  205. for ($i = $page; $i <= $config['page_num']; $i++) {
  206. $site_page = str_replace($match[0][0], $i, $source);
  207. Dever::load('spider/lib/project')->set($config, 4, $i);
  208. if ($type == 1) {
  209. if ($i == 1) {
  210. $site = $config['site'];
  211. } else {
  212. $site = $config['site'] . $site_page;
  213. }
  214. } elseif ($type == 2) {
  215. $site = $site_page;
  216. } else {
  217. $config['curl']['param'] = $site_page;
  218. }
  219. $this->parse($site, $config['id'], $config['collect_list_rule'], $config['collect_rule'], $config['curl'], $col, $set, $config['push']);
  220. }
  221. }
  222. }
  223. }