Parse.php 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. use Spider\Lib\Doc\Dom;
  5. class Parse
  6. {
  7. private $url = '';
  8. private $host = '';
  9. private $log;
  10. private $doc = array();
  11. private $data = array();
  12. public function __construct($url, $project, $list_rule, $rule, $param, $col, $set, $push)
  13. {
  14. $doc = Doc::getInstance($url, $rule);
  15. $doc->log(new Log($project));
  16. $data = $doc->get($param);
  17. if ($data) {
  18. if (!is_array($data) && !is_object($data)) {
  19. $state = Dever::json_decode($data);
  20. if ($state) {
  21. $data = $state;
  22. }
  23. }
  24. if ($data) {
  25. if (is_array($data)) {
  26. $domain = parse_url($url);
  27. $host = $domain['scheme'] . '://' . $domain['host'] . '/';
  28. foreach ($data as $k => $v) {
  29. if (is_string($v) && !strstr($v, 'http')) {
  30. $v = $host . ltrim($v, '/');
  31. }
  32. $this->data[$k] = $this->load($doc, $k, $v, $col, $set, $push, $project, $v, $list_rule);
  33. }
  34. } else {
  35. $this->data = $this->load($doc, 0, $data, $col, $set, $push, $project, $url, $list_rule);
  36. }
  37. }
  38. }
  39. $doc->saveLog();
  40. }
  41. public function get()
  42. {
  43. return $this->data;
  44. }
  45. private function load($doc, $index, $data, $col, $set, $push, $project, $source, $list_rule)
  46. {
  47. if (!$col) {
  48. if (Dever::input('test') == 1) {
  49. $doc->outLog();
  50. echo 'error';die;
  51. }
  52. return false;
  53. }
  54. $result = $table = array();
  55. if (isset($col[1])) {
  56. $data = $doc->init($data);
  57. $this->getCol($doc, $col[1], $data, $result, $table, $source);
  58. }
  59. if (isset($col[2])) {
  60. $data = $doc->getCur();
  61. $data = $doc->find($data, $list_rule . '.eq('.$index.')');
  62. $this->getCol($doc, $col[2], $data, $result, $table, $source);
  63. }
  64. if ($set) {
  65. foreach ($set as $v) {
  66. $value = $this->set($index, $v, $project);
  67. $result[$v['key']] = $value;
  68. if (Dever::input('test') == 1) {
  69. $table[$v['name']] = $value;
  70. }
  71. }
  72. }
  73. if ($push) {
  74. $result['test'] = Dever::input('test');
  75. $this->push($push, $result, $project);
  76. }
  77. if (Dever::input('test') == 1) {
  78. $doc->outLog();
  79. echo Dever::table($table);die;
  80. }
  81. $this->update($result, $project, $source);
  82. return $result;
  83. }
  84. private function getCol($doc, $col, $data, &$result, &$table, $source = false)
  85. {
  86. foreach ($col as $v) {
  87. $callback = false;
  88. if (strpos($v['key'], '.') !== false) {
  89. $temp = explode('.', $v['key']);
  90. $v['key'] = $temp[1];
  91. $callback = $temp[0];
  92. }
  93. if ($source && strstr($v['collect_rule'], '{link}')) {
  94. $v['collect_rule'] = str_replace('{link}', $source, $v['collect_rule']);
  95. }
  96. $value = $doc->rule($data, $col, $v);
  97. if ($value == 'error') {
  98. break;
  99. }
  100. if ($callback) {
  101. if (function_exists($callback)) {
  102. $value = $callback($value);
  103. } else {
  104. $value = Dever::{$callback}($value);
  105. }
  106. }
  107. if ($v['local'] == 1) {
  108. $this->res = $v['res_key'];
  109. $value = $this->local($value, $v['type']);
  110. } elseif ($v['type'] == 2) {
  111. if (is_string($value) && strstr($value, '[')) {
  112. $temp = Dever::json_decode($value);
  113. if ($temp) {
  114. $value = implode(',', $temp);
  115. }
  116. }
  117. }
  118. if ($v['collect_filter_link'] == 1) {
  119. $value = $this->filter($value);
  120. }
  121. if ($value) {
  122. $result[$v['key']] = $value;
  123. if (Dever::input('test') == 1) {
  124. $table[$v['name']] = $value;
  125. }
  126. }
  127. }
  128. }
  129. private function push($push, $data, $project)
  130. {
  131. $push = Dever::split($push);
  132. $data['project_id'] = $project;
  133. foreach ($push as $k => $v) {
  134. if (strstr($v, 'http')) {
  135. Dever::curl($v, $data, 'post');
  136. } else {
  137. Dever::load($v, $data);
  138. }
  139. }
  140. }
  141. private function set($index, $data, $project)
  142. {
  143. if ($data['type'] == 1) {
  144. return $data['value'];
  145. } elseif ($data['type'] == 2) {
  146. $old = 0;
  147. $info = Dever::db('spider/data')->getOne(array('pid' => $project));
  148. if($info) {
  149. $value = json_decode($info['value'], true);
  150. if (isset($value[$data['key']])) {
  151. $old = $value[$data['key']];
  152. }
  153. }
  154. return $data['value'] + $index + $old;
  155. } elseif ($data['type'] == 3) {
  156. $eval = '$value = ' . $data['value'] . ';';
  157. eval($eval);
  158. return $value;
  159. } elseif ($data['type'] == 4) {
  160. $temp = Dever::split($data['value']);
  161. return mt_rand($temp[0], $temp[1]);
  162. } elseif ($data['type'] == 5) {
  163. $temp = Dever::split($data['value']);
  164. $temp[0] = Dever::maketime($temp[0]);
  165. $temp[1] = Dever::maketime($temp[1]);
  166. return mt_rand($temp[0], $temp[1]);
  167. }
  168. }
  169. private function filter($content)
  170. {
  171. $rule = '<(a).+href="(.*?)"(.*?)>(.*?)<\/a>';
  172. $content = preg_replace_callback('/' . $rule . '/i', array($this, 'filter_replace'), $content);
  173. return $content;
  174. }
  175. private function filter_replace($result)
  176. {
  177. if (isset($result[4]) && $result[4]) {
  178. return $result[4];
  179. }
  180. }
  181. private function local($content, $type = 1)
  182. {
  183. if ($type == 1) {
  184. $doc = Dom::init($content);
  185. $pic = Dom::find($doc, '$("img").each().attr("src")');
  186. if ($pic) {
  187. $content = $this->local_replace($pic, $content);
  188. }
  189. $video = Dom::find($doc, '$("video").each().attr("src")');
  190. if ($video) {
  191. $content = $this->local_replace($video, $content);
  192. }
  193. $audio = Dom::find($doc, '$("audio").each().attr("src")');
  194. if ($audio) {
  195. $content = $this->local_replace($audio, $content);
  196. }
  197. /*
  198. $rule = '<(img|video|audio).+src=\"?(.+\.(jpg|gif|bmp|bnp|png))\"?.+>';
  199. $content = preg_replace_callback('/' . $rule . '/i', array($this, 'local_rule_replace'), $content);
  200. */
  201. } else {
  202. $content = $this->copy($content);
  203. }
  204. return $content;
  205. }
  206. private function local_replace($file, $content)
  207. {
  208. if (is_string($file) && strstr($file, '[')) {
  209. $file = Dever::json_decode($file);
  210. }
  211. if (is_array($file)) {
  212. foreach ($file as $k => $v) {
  213. $content = $this->local_replace($v, $content);
  214. }
  215. } else {
  216. $result = $this->copy($file);
  217. if ($result) {
  218. $content = str_replace($file, $result, $content);
  219. }
  220. }
  221. return $content;
  222. }
  223. private function local_rule_replace($result)
  224. {
  225. if (isset($result[2]) && $result[2]) {
  226. $file = $this->copy($result[2]);
  227. if ($file) {
  228. $result[0] = str_replace($result[2], $file, $result[0]);
  229. return $result[0];
  230. }
  231. }
  232. }
  233. private function copy($file)
  234. {
  235. if (is_string($file) && strstr($file, '[')) {
  236. $temp = Dever::json_decode($file);
  237. if ($temp) {
  238. $file = array();
  239. foreach($temp as $k => $v) {
  240. $f = $this->copy($v);
  241. if ($f) {
  242. $file[] = $f;
  243. }
  244. }
  245. $file = implode(',', $file);
  246. return $file;
  247. }
  248. }
  249. if (strstr($file, ',')) {
  250. $temp = explode(',', $file);
  251. $file = array();
  252. foreach($temp as $k => $v) {
  253. $f = $this->copy($v);
  254. if ($f) {
  255. $file[] = $f;
  256. }
  257. }
  258. $file = implode(',', $file);
  259. return $file;
  260. }
  261. if (!strstr($file, 'http') && strstr($file, '//')) {
  262. $file = 'https:' . $file;
  263. }
  264. $data = Dever::load('upload/save.copy?file=' . $file . '&key=' . $this->res . '&state=1');
  265. if (isset($data['status']) && $data['status'] == -1) {
  266. return '';
  267. } elseif (isset($data['url'])) {
  268. return $data['url'];
  269. } else {
  270. return '';
  271. }
  272. }
  273. private function update($data, $project, $source)
  274. {
  275. if (!$data) {
  276. return;
  277. }
  278. $param['pid'] = $project;
  279. $param['source'] = $source;
  280. $info = Dever::db('spider/data')->one($param);
  281. $param['value'] = json_encode($data, JSON_UNESCAPED_UNICODE);
  282. if ($info) {
  283. $update = $param;
  284. $id = $update['where_id'] = $info['id'];
  285. Dever::db('spider/data')->update($update);
  286. } else {
  287. $update = $param;
  288. $id = Dever::db('spider/data')->insert($update);
  289. }
  290. echo $id;
  291. echo "\r\n";
  292. }
  293. }