Parse.php 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. <?php
  2. namespace Spider\Lib;
  3. use Dever;
  4. class Parse
  5. {
  6. private $url = '';
  7. private $host = '';
  8. private $log;
  9. private $doc = array();
  10. private $data = array();
  11. public function __construct($url, $project, $rule, $col)
  12. {
  13. $doc = Doc::getInstance($url, $rule);
  14. $doc->log(new Log($project));
  15. $data = $doc->get();
  16. if ($data) {
  17. if (!is_array($data) && !is_object($data)) {
  18. $state = Dever::json_decode($data);
  19. if ($state) {
  20. $data = $state;
  21. }
  22. }
  23. if ($data) {
  24. if (is_array($data)) {
  25. $domain = parse_url($url);
  26. $host = $domain['scheme'] . '://' . $domain['host'];
  27. foreach ($data as $k => $v) {
  28. if (!strstr($v, 'http')) {
  29. $v = $host . $v;
  30. }
  31. $this->data[$k] = $this->load($doc, $v, $col, $project);
  32. }
  33. } else {
  34. $this->data = $this->load($doc, $data, $col, $project);
  35. }
  36. }
  37. }
  38. $doc->saveLog();
  39. }
  40. public function get()
  41. {
  42. return $this->data;
  43. }
  44. private function load($doc, $data, $col, $project)
  45. {
  46. $result = $table = array();
  47. $data = $doc->init($data);
  48. if (!$col) {
  49. if (Dever::input('test') == 1) {
  50. $doc->outLog();
  51. echo 'error';die;
  52. }
  53. return false;
  54. }
  55. foreach ($col as $v) {
  56. $callback = false;
  57. if (strpos($v['key'], '.') !== false) {
  58. $temp = explode('.', $v['key']);
  59. $v['key'] = $temp[1];
  60. $callback = $temp[0];
  61. }
  62. $value = $doc->rule($data, $col, $v);
  63. if ($value == 'error') {
  64. break;
  65. }
  66. if ($callback) {
  67. if (function_exists($callback)) {
  68. $value = $callback($value);
  69. } else {
  70. $value = Dever::{$callback}($value);
  71. }
  72. }
  73. if ($v['local'] == 1) {
  74. $this->res = $v['res_key'];
  75. $value = $this->local($value, $v['type']);
  76. }
  77. if ($v['collect_filter_link'] == 1) {
  78. $value = $this->filter($value);
  79. }
  80. $result[$v['key']] = $value;
  81. if (Dever::input('test') == 1) {
  82. $table[$v['name']] = $value;
  83. }
  84. }
  85. if (Dever::input('test') == 1) {
  86. $doc->outLog();
  87. echo Dever::table($table);die;
  88. }
  89. $this->update($result, $project);
  90. return $result;
  91. }
  92. private function filter($content)
  93. {
  94. $rule = '<(a).+href="(.*?)"(.*?)>(.*?)<\/a>';
  95. $content = preg_replace_callback('/' . $rule . '/i', array($this, 'filter_replace'), $content);
  96. return $content;
  97. }
  98. private function filter_replace($result)
  99. {
  100. if (isset($result[4]) && $result[4]) {
  101. return $result[4];
  102. }
  103. }
  104. private function local($content, $type = 1)
  105. {
  106. if ($type == 1) {
  107. $rule = '<(img|video|audio).+src=\"?(.+\.(jpg|gif|bmp|bnp|png))\"?.+>';
  108. $content = preg_replace_callback('/' . $rule . '/i', array($this, 'local_replace'), $content);
  109. } else {
  110. $content = $this->copy($content);
  111. }
  112. return $content;
  113. }
  114. private function local_replace($result)
  115. {
  116. if (isset($result[2]) && $result[2]) {
  117. $file = $this->copy($result[2]);
  118. if ($file) {
  119. $result[0] = str_replace($result[2], $file, $result[0]);
  120. return $result[0];
  121. }
  122. }
  123. }
  124. private function copy($file)
  125. {
  126. $data = Dever::load('upload/save.copy?file=' . $file . '&key=' . $this->res . '&state=1');
  127. if (isset($data['url'])) {
  128. return $data['url'];
  129. } else {
  130. return $file;
  131. }
  132. }
  133. private function update($data, $project)
  134. {
  135. $param['option_pid'] = $project;
  136. $param['option_value'] = json_encode($data, JSON_UNESCAPED_UNICODE);
  137. $info = Dever::db('spider/data')->one($param);
  138. if ($info) {
  139. $update = array();
  140. foreach ($param as $i => $j) {
  141. $i = str_replace('option_', 'set_', $i);
  142. $update[$i] = $j;
  143. }
  144. $id = $update['where_id'] = $info['id'];
  145. Dever::db('spider/data')->update($update);
  146. } else {
  147. $update = array();
  148. foreach ($param as $i => $j) {
  149. $i = str_replace('option_', 'add_', $i);
  150. $update[$i] = $j;
  151. }
  152. $id = Dever::db('spider/data')->insert($update);
  153. }
  154. }
  155. }