rabin 7 jaren geleden
bovenliggende
commit
4549848fe1
4 gewijzigde bestanden met toevoegingen van 138 en 1 verwijderingen
  1. 2 1
      database/col.php
  2. 65 0
      lib/Api.php
  3. 50 0
      lib/Project.php
  4. 21 0
      lib/Queue.php

+ 2 - 1
database/col.php

@@ -74,7 +74,8 @@ return array
 			'desc' 		=> '采集规则',
 			'match' 	=> 'is_string',
 			'update'	=> 'textarea',
-			//'edit'		=> true,
+			'list'		=> true,
+			'edit'		=> 'textarea',
 		),
 
 		'collect_include'		=> array

+ 65 - 0
lib/Api.php

@@ -0,0 +1,65 @@
+<?php
+
+namespace Spider\Lib;
+include(DEVER_APP_PATH . 'third/phpQuery.php');
+use Dever;
+use phpQuery;
+
+class Api
+{
+	public function run_api($id, $ajax = false)
+	{
+		$project = new Project($id);
+		$config = $project->get();
+		$state = true;
+		while ($state) {
+			$url = $config['queue']->pop();
+			if ($url) {
+				$data = $this->download($url);
+				$this->parse($data, $config);
+			} else {
+				$state = false;	
+			}
+		}
+	}
+
+	private function download($url)
+	{
+		$data = Dever::curl($url);
+		$encode = mb_detect_encoding($data, array('GB2312','GBK','UTF-8'));
+		if ($encode == 'GB2312' || $encode == 'GBK' || $encode == 'EUC-CN') {
+			$data = \iconv('GBK', 'UTF-8', $data);
+		}
+		if ($encode == 'CP936') {
+			$data = \iconv('SJIS', 'UTF-8', $data);
+		}
+		$data = str_replace(PHP_EOL, '', $data); 
+		return $data;
+	}
+
+	private function parse($data, $config)
+	{
+		$jq = phpQuery::newDocumentHTML($data);
+		$cmd = '$jq = ' . $config['collect_rule'] . ';';
+		eval($cmd);
+		$data = array();
+		foreach ($config['col'] as $v) {
+			$result = $jq->html();
+			$rule = explode("\n", $v['collect_rule']);
+			if (isset($rule[0]) && $rule[0]) {
+				$cmd = '$result = ' . $rule[0] . ';';
+				eval($cmd);
+			}
+			
+			if (isset($rule[1]) && $rule[1]) {
+				$temp = explode('||', $rule[1]);
+				$index = isset($temp[1]) ? $temp[1] : 1;
+				preg_match_all('/' . $temp[0] . '/i', $result, $match);
+				$result = $match[$index][0];
+			}
+			
+			$data[$v['key']] = $result;
+		}
+		print_r($data);die;
+	}
+}

+ 50 - 0
lib/Project.php

@@ -0,0 +1,50 @@
+<?php
+
+namespace Spider\Lib;
+
+use Dever;
+
+class Project
+{
+	private $config;
+
+	public function __construct($id)
+	{
+		if (!$id) {
+			Dever::alert('id不存在');
+		}
+		$this->config = Dever::db('spider/project')->one($id);
+		$this->check();
+		$this->runing();
+	}
+
+	public function get()
+	{
+		return $this->config;
+	}
+
+	private function check()
+	{
+		if (!$this->config) {
+			Dever::alert('项目未定义');
+		}
+
+		if ($this->config['status'] != 1) {
+			//Dever::alert('项目不是待机状态');
+		}
+	}
+
+	private function runing()
+	{
+		Dever::db('spider/project')->update(['status' => 2, 'where_id' => $this->config['id']]);
+		$this->setting();
+	}
+
+	private function setting()
+	{
+		$this->config['col'] = Dever::db('spider/col')->all(['project_id' => $this->config['id']]);
+		$this->config['url'] = explode("\r\n", $this->config['site']);
+		$this->config['queue'] = new Queue();
+		array_walk($this->config['url'], [$this->config['queue'], 'push']);
+	}
+}

+ 21 - 0
lib/Queue.php

@@ -0,0 +1,21 @@
+<?php
+
+namespace Spider\Lib;
+
+use Dever;
+
+class Queue
+{
+	private $data = array();
+
+	public function push($value)
+	{
+		array_push($this->data, $value);
+		return true;
+	}
+
+	public function pop()
+	{
+		return array_shift($this->data);
+	}
+}