url($url); $this->rule = $rule; if (strpos($this->rule, '$json') !== false) { $this->type = 'json'; } else { $this->type = 'dom'; } } private function url($url = false) { if (!$url) { return; } $this->url = $url; $value = parse_url($this->url); $this->path = $this->host = $value['scheme'] . '://' . $value['host']; if (isset($value['path']) && $value['path']) { $temp = explode('/', $value['path']); unset($temp[count($temp)-1]); $this->path .= implode('/', $temp); } $this->path .= '/'; } public function get($param = array()) { $doc = $this->doc(false, $param); if (!$this->cur) { $this->cur = $doc; } if ($this->rule) { $doc = $this->find($doc, $this->rule); } return $doc; } public function doc($url = false, $param = array()) { $this->url($url); $html = $this->download($this->url, $param); return ($this->getClass())::init($html); } public function getCur() { return $this->cur; } private function download($url, $header = '', $param = '') { $this->addLog($url . '下载中...'); $download = new Download($url, $header, $param); $this->addLog($url . '下载完成'); return $download->get($this->type); } private function collect($data, $include, $exclude, $filter) { if ($include) { $include = explode("\n", str_replace("\r", '', $include)); foreach ($include as $k => $v) { $state = preg_match('/' . $v . '/i', $data); if ($state) { break; } } if (!$state) { return 'error'; } } if ($exclude) { $exclude = explode("\n", str_replace("\r", '', $exclude)); foreach ($exclude as $k => $v) { $state = preg_match('/' . $v . '/i', $data); if (!$state) { return 'error'; } } } if ($filter) { $filter = explode("\n", str_replace("\r", '', $filter)); foreach ($filter as $k => $v) { $s = ''; if (strstr($v, '=>')) { $temp = explode('=>', $v); $v = $temp[0]; $s = $temp[1]; } $data = preg_replace('/' . $v . '/i', $s, $data); } } return $data; } private function getClass() { return ucfirst($this->type); } public function find($doc, $rule) { return ($this->getClass())::find($doc, $rule); } public function init($data) { if (is_string($data) && filter_var($data, FILTER_VALIDATE_URL) !== false) { $data = $this->doc($data); } else { $data = ($this->getClass())::init($data); } return $data; } public function rule($data, $col, $config) { $name = '字段[' . $config['name'] . '('.$config['key'].')]' . '"'; $this->addLog($name . '正在按照规则['.$config['collect_rule'].']进行解析'); $method = 'rule_' . $this->type; $result = $this->getRule($data, $col, $config['collect_rule'], $config['key']); if (isset($config['collect_url']) && $config['collect_url']) { $collect_url = explode("\n", str_replace("\r", '', $config['collect_url'])); if (!isset($collect_url[1])) { $collect_url[1] = ''; } $temp = array(); $temp[] = $result; $this->getNext($temp, $data, $col, $collect_url[0], $collect_url[1], $config['collect_rule'], $config['key']); $result = implode(',', $temp); } $this->addLog($name . '解析完成'); return $this->collect($result, $config['collect_include'], $config['collect_exclude'], $config['collect_filter']); } public function getNext(&$result, $data, $col, $collect_url, $collect_include, $collect_rule, $key) { $url = $this->getUrl($data, $col, $collect_url, $key); if ($url) { if ($collect_include && !strstr($url, $collect_include)) { return; } $data = $this->init($url); if ($data) { $temp = $this->getRule($data, $col, $collect_rule, $key); if ($temp) { $result[] = $temp; $this->getNext($result, $data, $col, $collect_url, $collect_include, $collect_rule, $key); } } } } public function getUrl($data, $col, $collect_rule, $key) { $url = $this->getRule($data, $col, $collect_rule, $key); if (!$url) { return ''; } if (strpos($url, 'http') === false) { if ($url[0] == '/') { $url = $this->host . $url; } elseif (strstr($url, '.')) { $url = $this->path . $url; } else { $url = $this->url . $url; } } return $url; } public function getRule($data, $col, $collect_rule, $key) { return ($this->getClass())::rule($this, $data, $col, $collect_rule, $key); } public function addLog($string) { if ($this->log) { $this->log->add($string); } } public function saveLog() { if ($this->log) { $this->log->save(); } } public function outLog() { if ($this->log) { $this->log->out(); } } public function log(Log $log) { $this->log = $log; } }