url($url); $this->rule = $rule; if (strpos($this->rule, '$json') !== false) { $this->type = 'json'; } else { $this->type = 'dom'; } } private function url($url = false) { if (!$url) { return; } $this->url = $url; $value = parse_url($this->url); $this->path = $this->host = $value['scheme'] . '://' . $value['host']; if (isset($value['path']) && $value['path']) { $temp = explode('/', $value['path']); unset($temp[count($temp)-1]); $this->path .= implode('/', $temp); } $this->path .= '/'; } public function get($param = array()) { $doc = $this->doc(false, $param); if (!$this->cur) { $this->cur = $doc; } if ($this->rule) { $doc = $this->find($doc, $this->rule); } return $doc; } public function doc($url = false, $param = array()) { $this->url($url); $html = $this->download($this->url, $param); return ($this->getClass())::init($html); } public function getCur() { return $this->cur; } private function download($url, $header = '', $param = '') { $this->addLog($url . '下载中...'); $download = new Download($url, $header, $param); $this->addLog($url . '下载完成'); return $download->get($this->type); } private function collect($data, $include, $exclude, $filter, $attr) { if ($include) { $include = Dever::split($include); foreach ($include as $k => $v) { $state = preg_match('/' . $v . '/i', $data); if ($state) { break; } } if (!$state) { return 'error'; } } if ($exclude) { $exclude = Dever::split($exclude); foreach ($exclude as $k => $v) { $state = preg_match('/' . $v . '/i', $data); if (!$state) { return 'error'; } } } if ($filter) { $filter = Dever::split($filter); foreach ($filter as $k => $v) { $s = ''; if (strstr($v, '=>')) { $temp = explode('=>', $v); $v = $temp[0]; $s = $temp[1]; } $data = preg_replace('/' . $v . '/i', $s, $data); } } if ($attr) { $attr = Dever::split($attr); foreach ($attr as $k => $v) { $s = ''; $v = $v . '="(.*?)"'; $data = preg_replace('/' . $v . '/i', $s, $data); } } return $data; } private function getClass() { return 'Spider\\Lib\\Doc\\' . ucfirst($this->type); } public function find($doc, $rule) { return ($this->getClass())::find($doc, $rule); } public function init($data) { if (is_string($data) && filter_var($data, FILTER_VALIDATE_URL) !== false) { $data = $this->doc($data); } else { $data = ($this->getClass())::init($data); } return $data; } public function rule($data, $col, $config) { $name = '字段[' . $config['name'] . '('.$config['key'].')]' . '"'; $this->addLog($name . '正在按照规则['.$config['collect_rule'].']进行解析'); $method = 'rule_' . $this->type; $result = $this->getRule($data, $col, $config['collect_rule'], $config['key']); if (isset($config['collect_url']) && $config['collect_url']) { $collect_url = Dever::split($config['collect_url']); if (!isset($collect_url[1])) { $collect_url[1] = ''; } $temp = array(); $temp[] = $result; $this->getNext($temp, $data, $col, $collect_url[0], $collect_url[1], $config['collect_rule'], $config['key']); $result = implode(',', $temp); } $this->addLog($name . '解析完成'); return $this->collect($result, $config['collect_include'], $config['collect_exclude'], $config['collect_filter'], $config['collect_attr']); } public function getNext(&$result, $data, $col, $collect_url, $collect_include, $collect_rule, $key) { $url = $this->getUrl($data, $col, $collect_url, $key); if ($url) { if ($collect_include && !strstr($url, $collect_include)) { return; } $data = $this->init($url); if ($data) { $temp = $this->getRule($data, $col, $collect_rule, $key); if ($temp) { $result[] = $temp; $this->getNext($result, $data, $col, $collect_url, $collect_include, $collect_rule, $key); } } } } public function getUrl($data, $col, $collect_rule, $key) { $url = $this->getRule($data, $col, $collect_rule, $key); if (!$url) { return ''; } if (strpos($url, 'http') === false) { if ($url[0] == '/') { $url = $this->host . $url; } elseif (strstr($url, '.')) { $url = $this->path . $url; } else { $url = $this->url . $url; } } return $url; } public function getRule($data, $col, $collect_rule, $key) { return ($this->getClass())::rule($this, $data, $col, $collect_rule, $key); } public function addLog($string) { if ($this->log) { $this->log->add($string); } } public function saveLog() { if ($this->log) { $this->log->save(); } } public function outLog() { if ($this->log) { $this->log->out(); } } public function log(Log $log) { $this->log = $log; } }