Parse.php 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. class Parse
  5. {
  6. private $url = '';
  7. private $host = '';
  8. private $log;
  9. private $doc = array();
  10. private $data = array();
  11. public function __construct($url, $project, $rule, $param, $col, $set, $push)
  12. {
  13. $doc = Doc::getInstance($url, $rule);
  14. $doc->log(new Log($project));
  15. $data = $doc->get($param);
  16. if ($data) {
  17. if (!is_array($data) && !is_object($data)) {
  18. $state = Dever::json_decode($data);
  19. if ($state) {
  20. $data = $state;
  21. }
  22. }
  23. if ($data) {
  24. if (is_array($data)) {
  25. $domain = parse_url($url);
  26. $host = $domain['scheme'] . '://' . $domain['host'] . '/';
  27. foreach ($data as $k => $v) {
  28. if (is_string($v) && !strstr($v, 'http')) {
  29. $v = $host . ltrim($v, '/');
  30. }
  31. $this->data[$k] = $this->load($doc, $k, $v, $col, $set, $push, $project);
  32. }
  33. } else {
  34. $this->data = $this->load($doc, 0, $data, $col, $set, $push, $project);
  35. }
  36. }
  37. }
  38. $doc->saveLog();
  39. }
  40. public function get()
  41. {
  42. return $this->data;
  43. }
  44. private function load($doc, $index, $data, $col, $set, $push, $project)
  45. {
  46. if (!$col) {
  47. if (Dever::input('test') == 1) {
  48. $doc->outLog();
  49. echo 'error';die;
  50. }
  51. return false;
  52. }
  53. $result = $table = array();
  54. if (isset($col[1])) {
  55. $data = $doc->init($data);
  56. $this->getCol($doc, $col[1], $data, $result, $table);
  57. }
  58. if (isset($col[2])) {
  59. $data = $doc->getCur();
  60. $this->getCol($doc, $col[2], $data, $result, $table);
  61. }
  62. if ($set) {
  63. foreach ($set as $v) {
  64. $value = $this->set($index, $v, $project);
  65. $result[$v['key']] = $value;
  66. if (Dever::input('test') == 1) {
  67. $table[$v['name']] = $value;
  68. }
  69. }
  70. }
  71. if ($push) {
  72. $result['test'] = Dever::input('test');
  73. $this->push($push, $result, $project);
  74. }
  75. if (Dever::input('test') == 1) {
  76. $doc->outLog();
  77. echo Dever::table($table);die;
  78. }
  79. $this->update($result, $project);
  80. return $result;
  81. }
  82. private function getCol($doc, $col, $data, &$result, &$table)
  83. {
  84. foreach ($col as $v) {
  85. $callback = false;
  86. if (strpos($v['key'], '.') !== false) {
  87. $temp = explode('.', $v['key']);
  88. $v['key'] = $temp[1];
  89. $callback = $temp[0];
  90. }
  91. $value = $doc->rule($data, $col, $v);
  92. if ($value == 'error') {
  93. break;
  94. }
  95. if ($callback) {
  96. if (function_exists($callback)) {
  97. $value = $callback($value);
  98. } else {
  99. $value = Dever::{$callback}($value);
  100. }
  101. }
  102. if ($v['local'] == 1) {
  103. $this->res = $v['res_key'];
  104. $value = $this->local($value, $v['type']);
  105. }
  106. if ($v['collect_filter_link'] == 1) {
  107. $value = $this->filter($value);
  108. }
  109. if ($value) {
  110. $result[$v['key']] = $value;
  111. if (Dever::input('test') == 1) {
  112. $table[$v['name']] = $value;
  113. }
  114. }
  115. }
  116. }
  117. private function push($push, $data, $project)
  118. {
  119. $push = explode("\n", str_replace("\r", '', $push));
  120. $data['project_id'] = $project;
  121. foreach ($push as $k => $v) {
  122. if (strstr($v, 'http')) {
  123. Dever::curl($v, $data, 'post');
  124. } else {
  125. Dever::load($v, $data);
  126. }
  127. }
  128. }
  129. private function set($index, $data, $project)
  130. {
  131. if ($data['type'] == 1) {
  132. return $data['value'];
  133. } elseif ($data['type'] == 2) {
  134. $old = 0;
  135. $info = Dever::db('spider/data')->getOne(array('pid' => $project));
  136. if($info) {
  137. $value = json_decode($info['value'], true);
  138. if (isset($value[$data['key']])) {
  139. $old = $value[$data['key']];
  140. }
  141. }
  142. return $data['value'] + $index + $old;
  143. } elseif ($data['type'] == 3) {
  144. $eval = '$value = ' . $data['value'] . ';';
  145. eval($eval);
  146. return $value;
  147. } elseif ($data['type'] == 4) {
  148. $temp = explode("\n", str_replace("\r", '', $data['value']));
  149. return mt_rand($temp[0], $temp[1]);
  150. } elseif ($data['type'] == 5) {
  151. $temp = explode("\n", str_replace("\r", '', $data['value']));
  152. $temp[0] = Dever::maketime($temp[0]);
  153. $temp[1] = Dever::maketime($temp[1]);
  154. return mt_rand($temp[0], $temp[1]);
  155. }
  156. }
  157. private function filter($content)
  158. {
  159. $rule = '<(a).+href="(.*?)"(.*?)>(.*?)<\/a>';
  160. $content = preg_replace_callback('/' . $rule . '/i', array($this, 'filter_replace'), $content);
  161. return $content;
  162. }
  163. private function filter_replace($result)
  164. {
  165. if (isset($result[4]) && $result[4]) {
  166. return $result[4];
  167. }
  168. }
  169. private function local($content, $type = 1)
  170. {
  171. if ($type == 1) {
  172. $rule = '<(img|video|audio).+src=\"?(.+\.(jpg|gif|bmp|bnp|png))\"?.+>';
  173. $content = preg_replace_callback('/' . $rule . '/i', array($this, 'local_replace'), $content);
  174. } else {
  175. $content = $this->copy($content);
  176. }
  177. return $content;
  178. }
  179. private function local_replace($result)
  180. {
  181. if (isset($result[2]) && $result[2]) {
  182. $file = $this->copy($result[2]);
  183. if ($file) {
  184. $result[0] = str_replace($result[2], $file, $result[0]);
  185. return $result[0];
  186. }
  187. }
  188. }
  189. private function copy($file)
  190. {
  191. if (strstr($file, ',')) {
  192. $temp = explode(',', $file);
  193. $file = array();
  194. foreach($temp as $k => $v) {
  195. $f = $this->copy($v);
  196. if ($f) {
  197. $file[] = $f;
  198. }
  199. }
  200. $file = implode(',', $file);
  201. return $file;
  202. }
  203. $data = Dever::load('upload/save.copy?file=' . $file . '&key=' . $this->res . '&state=1');
  204. if (isset($data['url'])) {
  205. return $data['url'];
  206. } else {
  207. return '';
  208. }
  209. }
  210. private function update($data, $project)
  211. {
  212. if (!$data) {
  213. return;
  214. }
  215. $param['option_pid'] = $project;
  216. $param['option_value'] = json_encode($data, JSON_UNESCAPED_UNICODE);
  217. $info = Dever::db('spider/data')->one($param);
  218. if ($info) {
  219. $update = array();
  220. foreach ($param as $i => $j) {
  221. $i = str_replace('option_', 'set_', $i);
  222. $update[$i] = $j;
  223. }
  224. $id = $update['where_id'] = $info['id'];
  225. Dever::db('spider/data')->update($update);
  226. } else {
  227. $update = array();
  228. foreach ($param as $i => $j) {
  229. $i = str_replace('option_', 'add_', $i);
  230. $update[$i] = $j;
  231. }
  232. $id = Dever::db('spider/data')->insert($update);
  233. }
  234. }
  235. }