rabin 7 gadi atpakaļ
vecāks
revīzija
13c1a40fa0
11 mainītis faili ar 311 papildinājumiem un 914 dzēšanām
  1. 35 4
      database/col.php
  2. 22 83
      database/data.php
  3. 5 15
      database/project.php
  4. 13 43
      lib/Api.php
  5. 41 0
      lib/Dom.php
  6. 31 0
      lib/Download.php
  7. 149 0
      lib/Parse.php
  8. 1 1
      lib/Project.php
  9. 0 134
      src/Api.php
  10. 1 1
      src/Cate.php
  11. 13 633
      src/Data.php

+ 35 - 4
database/col.php

@@ -15,7 +15,7 @@ return array
 	'menu' => false,
 	# 后台菜单排序
 	'order' => 6,
-	'desc' => 'dom解析:类似jquery的解析方式,如$("#id")->find("a")->attr("class")',
+	'desc' => '字段唯一标识符:如果用逗号隔开,则前面的是dever公共函数,如maketime.date,则会调用Dever::maketime()来处理date的数据',
 	# 数据结构
 	'struct' => array
 	(
@@ -42,6 +42,7 @@ return array
 			'update'	=> 'text',
 			'search'	=> 'fulltext',
 			'list'		=> true,
+			'edit'		=> true,
 		),
 
 		'key'		=> array
@@ -54,16 +55,18 @@ return array
 			'update'	=> 'text',
 			'search'	=> 'fulltext',
 			'list'		=> true,
+			'edit'		=> true,
 		),
 
 		'project_id'		=> array
 		(
 			'type' 		=> 'int-11',
 			'name' 		=> '项目id',
-			'default' 	=> Dever::input('option_project_id', -1),
+			'default' 	=> Dever::input('search_option_project_id', -1),
 			'desc' 		=> '请选择项目id',
 			'match' 	=> 'is_numeric',
 			'search'	=> 'order',
+			'update'	=> 'hidden',
 		),
 		
 		'collect_rule'		=> array
@@ -81,7 +84,7 @@ return array
 		'collect_include'		=> array
 		(
 			'type' 		=> 'varchar-500',
-			'name' 		=> '包含字符-如果填写该项,则进一步过滤,只保留包含有该字符的,支持dom解析、正则',
+			'name' 		=> '包含字符-如果填写该项,则进一步过滤,只保留包含有该字符的',
 			'default' 	=> '',
 			'desc' 		=> '采集规则',
 			'match' 	=> 'option',
@@ -91,7 +94,17 @@ return array
 		'collect_exclude'		=> array
 		(
 			'type' 		=> 'varchar-500',
-			'name' 		=> '不包含字符-如果填写该项,则进一步过滤,只保留不包含有该字符的,支持dom解析、正则',
+			'name' 		=> '不包含字符-如果填写该项,则进一步过滤,只保留不包含有该字符的',
+			'default' 	=> '',
+			'desc' 		=> '采集规则',
+			'match' 	=> 'option',
+			'update'	=> 'textarea',
+		),
+
+		'collect_filter'		=> array
+		(
+			'type' 		=> 'varchar-500',
+			'name' 		=> '过滤规则-如果填写该项,则过滤掉符合该规则的字符,仅支持字符串和正则',
 			'default' 	=> '',
 			'desc' 		=> '采集规则',
 			'match' 	=> 'option',
@@ -118,4 +131,22 @@ return array
 			'list'		=> 'date("Y-m-d H:i:s", {cdate})',
 		),
 	),
+
+	'request' => array
+	(
+		'getList' => array
+		(
+			'where' => array
+			(
+				'project_id' => 'yes',
+				'state' => 1,
+			),
+			'type' => 'all',
+			'order' => array
+			(
+				'id' => 'desc',
+			),
+			'col' => '*|key',
+		),
+	)
 );

+ 22 - 83
database/data.php

@@ -26,103 +26,44 @@ return array
 			'desc' 		=> '',
 			'match' 	=> 'is_numeric',
 			'search'	=> 'order',
-			'list'		=> true,
-			'order'		=> 'desc',
-		),
-
-		'source_link'		=> array
-		(
-			'type' 		=> 'varchar-300',
-			'name' 		=> '来源网址',
-			'default' 	=> '',
-			'desc' 		=> '来源网址',
-			'match' 	=> 'is_string',
-			//'update'	=> 'text',
-			//'search'	=> 'fulltext',
-			'list'		=> true,
-		),
-
-		'source_list'		=> array
-		(
-			'type' 		=> 'varchar-300',
-			'name' 		=> '来源网址所属列表页',
-			'default' 	=> '',
-			'desc' 		=> '来源网址所属列表页',
-			'match' 	=> 'is_string',
-			//'update'	=> 'text',
-			//'search'	=> 'fulltext',
 			//'list'		=> true,
-		),
-		
-		'name'		=> array
-		(
-			'type' 		=> 'varchar-300',
-			'name' 		=> '标题',
-			'default' 	=> '',
-			'desc' 		=> '请输入标题',
-			'match' 	=> 'is_string',
-			'update'	=> 'text',
-			'search'	=> 'fulltext',
-			'list'		=> true,
-			'edit'		=> 'textarea',
-		),
-
-		'cate_id'		=> array
-		(
-			'type' 		=> 'int-11',
-			'name' 		=> '分类id',
-			'default' 	=> '1',
-			'desc' 		=> '请选择分类id',
-			'match' 	=> 'is_numeric',
-			'search'	=> 'order',
-			'list'		=> '{cate_id} > 0 ? Dever::load("collect/cate-one#name", {cate_id}) : "无"',
-			# 开启显示控制,可以控制下边的表单
-			//'show'		=> 'cate_id',
+			'order'		=> 'desc',
 		),
 
-		'config_id'		=> array
+		'project_id'		=> array
 		(
 			'type' 		=> 'int-11',
-			'name' 		=> '配置id',
+			'name' 		=> '项目',
 			'default' 	=> '1',
-			'desc' 		=> '请选择配置id',
+			'desc' 		=> '请选择项目id',
 			'match' 	=> 'is_numeric',
-			'search'	=> 'order',
-			//'list'		=> '{config_id} > 0 ? Dever::load("collect/config-one#name", {config_id}) : "无"',
+			'list'		=> '{project_id} > 0 ? Dever::load("spider/project-one#name", {project_id}) : "无"',
 			# 开启显示控制,可以控制下边的表单
 			//'show'		=> 'cate_id',
 		),
-		
-		'info'		=> array
-		(
-			'type' 		=> 'varchar-500',
-			'name' 		=> '摘要',
-			'default' 	=> '',
-			'desc' 		=> '请输入摘要',
-			'match' 	=> 'is_string',
-			'update'	=> 'textarea',
-			//'edit'		=> true,
-		),
 
-		'content'		=> array
+		'value'		=> array
 		(
 			'type' 		=> 'text-1000',
-			'name' 		=> '内容',
+			'name' 		=> '数据',
 			'default' 	=> '',
-			'desc' 		=> '请输入内容',
+			'desc' 		=> '数据',
 			'match' 	=> 'is_string',
-			'update'	=> 'editor',
-			'key'		=> 1,
+			//'update'	=> 'editor',
+			'search'	=> 'fulltext',
+			'list'		=> 'Dever::load("spider/data.value", {id})',
+			'modal'		=> '查看详情',
 		),
 
-		'extend'		=> array
+		'log_id'		=> array
 		(
-			'type' 		=> 'text-1000',
-			'name' 		=> '扩展信息',
-			'default' 	=> '',
-			'desc' 		=> '请扩展信息',
-			'match' 	=> 'is_string',
-			'update'	=> 'textarea',
+			'type' 		=> 'int-11',
+			'name' 		=> '日志id',
+			'default' 	=> '1',
+			'desc' 		=> '请输入日志id',
+			'match' 	=> 'is_numeric',
+			//'update'	=> 'text',
+			'search'	=> 'fulltext',
 			//'list'		=> true,
 		),
 
@@ -133,8 +74,6 @@ return array
 			'default' 	=> '1',
 			'desc' 		=> '请选择状态',
 			'match' 	=> 'is_numeric',
-			'option' 	=> $option,
-			//'update'	=> 'radio',
 		),
 		
 		'cdate'		=> array
@@ -152,9 +91,9 @@ return array
 	'manage' => array
 	(
 		# 开启批量管理
-		'mul' => true,
+		//'mul' => true,
 		'insert' => false,
-		'list_button' => array(6 => '删除'),
+		'delete' => false,
 	),
 
 	

+ 5 - 15
database/project.php

@@ -133,7 +133,8 @@ return array
 			'desc' 		=> '采集规则',
 			'match' 	=> 'is_string',
 			'update'	=> 'textarea',
-			//'edit'		=> true,
+			'edit'		=> true,
+			'list'		=> true,
 		),
 
 		'page_rule'		=> array
@@ -190,7 +191,7 @@ return array
 			'callback'	=> 'maketime',
 		),
 
-		'interval'		=> array
+		'intervals'		=> array
 		(
 			'type' 		=> 'int-11',
 			'name' 		=> '抓取间隔秒数-填写开始时间之后的间隔抓取的秒数,为0则只抓取一次',
@@ -214,17 +215,6 @@ return array
 			'order'		=> 'desc',
 		),
 
-		'local'		=> array
-		(
-			'type' 		=> 'tinyint-1',
-			'name' 		=> '图片本地化-默认关闭,将图片抓取到本站来,开启后抓取效率会降低',
-			'default' 	=> '1',
-			'desc' 		=> '图片本地化',
-			'match' 	=> 'is_numeric',
-			'option' 	=> $local,
-			'update'	=> 'radio',
-		),
-
 		'state'		=> array
 		(
 			'type' 		=> 'tinyint-1',
@@ -265,8 +255,8 @@ return array
 		# 可以删除
 		'list_button' => array
 		(
-			'list_col' => array('字段设置', '"col&option_project_id={id}"'),
-			'list_data' => array('数据列表', '"data&option_project_id={id}&oper_save_jump=project&oper_parent=project"'),
+			'list_col' => array('字段设置', '"col&search_option_project_id={id}"'),
+			'list_data' => array('数据列表', '"data&search_option_project_id={id}&oper_save_jump=project&oper_parent=project"'),
 			//'delete' => array('采集数据', 'Dever::url("spider/data.daemon?id={id}&")'),
 		),
 		

+ 13 - 43
lib/Api.php

@@ -1,9 +1,6 @@
 <?php
-
 namespace Spider\Lib;
-include(DEVER_APP_PATH . 'third/phpQuery.php');
 use Dever;
-use phpQuery;
 
 class Api
 {
@@ -13,53 +10,26 @@ class Api
 		$config = $project->get();
 		$state = true;
 		while ($state) {
-			$url = $config['queue']->pop();
-			if ($url) {
-				$data = $this->download($url);
-				$this->parse($data, $config);
-			} else {
-				$state = false;	
-			}
+			$state = $this->queue($config);
 		}
+		return 'yes';
 	}
 
-	private function download($url)
+	private function queue($config)
 	{
-		$data = Dever::curl($url);
-		$encode = mb_detect_encoding($data, array('GB2312','GBK','UTF-8'));
-		if ($encode == 'GB2312' || $encode == 'GBK' || $encode == 'EUC-CN') {
-			$data = \iconv('GBK', 'UTF-8', $data);
-		}
-		if ($encode == 'CP936') {
-			$data = \iconv('SJIS', 'UTF-8', $data);
+		$url = $config['queue']->pop();
+		if ($url) {
+			$this->parse($url, $config['id'], $config['collect_rule'], $config['col']);
+			$state = true;
+		} else {
+			$state = false;
 		}
-		$data = str_replace(PHP_EOL, '', $data); 
-		return $data;
+		return $state;
 	}
 
-	private function parse($data, $config)
+	private function parse($url, $project, $rule, $col)
 	{
-		$jq = phpQuery::newDocumentHTML($data);
-		$cmd = '$jq = ' . $config['collect_rule'] . ';';
-		eval($cmd);
-		$data = array();
-		foreach ($config['col'] as $v) {
-			$result = $jq->html();
-			$rule = explode("\n", $v['collect_rule']);
-			if (isset($rule[0]) && $rule[0]) {
-				$cmd = '$result = ' . $rule[0] . ';';
-				eval($cmd);
-			}
-			
-			if (isset($rule[1]) && $rule[1]) {
-				$temp = explode('||', $rule[1]);
-				$index = isset($temp[1]) ? $temp[1] : 1;
-				preg_match_all('/' . $temp[0] . '/i', $result, $match);
-				$result = $match[$index][0];
-			}
-			
-			$data[$v['key']] = $result;
-		}
-		print_r($data);die;
+		$parse = new Parse($url, $project, $rule, $col);
+		return $parse->get();
 	}
 }

+ 41 - 0
lib/Dom.php

@@ -0,0 +1,41 @@
+<?php
+namespace Spider\Lib;
+include(DEVER_APP_PATH . 'third/phpQuery.php');
+use Dever;
+use phpQuery;
+
+class Dom
+{
+	private $query;
+
+	public function __construct($url, $rule = '')
+	{
+		$html = $this->download($url);
+		$dom = phpQuery::newDocumentHTML($html);
+		if ($rule) {
+			$dom = $this->find($dom, $rule);
+		}
+		
+		$this->query = $dom;
+	}
+
+
+	private function download($url)
+	{
+		$download = new Download($url);
+		return $download->get();
+	}
+
+	public function get()
+	{
+		return $this->query;
+	}
+
+	public function find($dom, $rule)
+	{
+		$rule = str_replace('$', '$dom->find', $rule);
+		$cmd = '$dom = ' . $rule . ';';
+		eval($cmd);
+		return $dom;
+	}
+}

+ 31 - 0
lib/Download.php

@@ -0,0 +1,31 @@
+<?php
+namespace Spider\Lib;
+use Dever;
+
+class Download
+{
+	private $data;
+
+	public function __construct($url)
+	{
+		$this->data = Dever::curl($url);
+	}
+
+	public function get()
+	{
+		return $this->filter($this->data);
+	}
+
+	private function filter($string)
+	{
+		$encode = mb_detect_encoding($string, array('GB2312','GBK','UTF-8'));
+		if ($encode == 'GB2312' || $encode == 'GBK' || $encode == 'EUC-CN') {
+			$string = \iconv('GBK', 'UTF-8', $string);
+		}
+		if ($encode == 'CP936') {
+			$string = \iconv('SJIS', 'UTF-8', $string);
+		}
+		$string = str_replace(PHP_EOL, '', $string);
+		return $string;
+	}
+}

+ 149 - 0
lib/Parse.php

@@ -0,0 +1,149 @@
+<?php
+namespace Spider\Lib;
+use Dever;
+
+class Parse
+{
+	private $url = '';
+	private $host = '';
+	private $dom = array();
+	private $data = array();
+
+	public function __construct($url, $project, $rule, $col)
+	{
+		$this->url($url);
+		$dom = $this->dom($rule);
+		foreach ($dom as $k => $v) {
+			$this->handle(pq($v), $k, $col, $project);
+		}
+	}
+
+	public function get()
+	{
+		return $this->data;
+	}
+
+	private function url($url)
+	{
+		$this->url = $url;
+		$value = parse_url($this->url);
+		$this->host = $value['scheme'] . '://' . $value['host'];
+	}
+
+	private function dom($rule, $url = '')
+	{
+		$url = $url ? $url : $this->url;
+		if (empty($this->dom[$url])) {
+			$dom = new Dom($url, $rule);
+			$this->dom[$url] = $dom->get();
+		}
+		
+		return $this->dom[$url];
+	}
+
+	private function handle($dom, $index, $col, $project)
+	{
+		foreach ($col as $v) {
+			$callback = false;
+			if (strpos($v['key'], '.') !== false) {
+				$temp = explode('.', $v['key']);
+				$v['key'] = $temp[1];
+				$callback = $temp[0];
+			}
+			$value = $this->load($dom, $col, $v);
+			if ($value == 'error') {
+				break;
+			}
+			if ($callback) {
+				$value = Dever::{$callback}($value);
+			}
+
+			$this->data[$index][$v['key']] = $value;
+		}
+		$this->update($this->data[$index], $project);
+	}
+
+	private function update($data, $project)
+	{
+		$param['option_project_id'] = $project;
+		$param['option_value'] = json_encode($data);
+		$info = Dever::db('spider/data')->one($param);
+		if ($info) {
+			$update = array();
+			foreach ($param as $i => $j) {
+				$i = str_replace('option_', 'set_', $i);
+				$update[$i] = $j;
+			}
+			$id = $update['where_id'] = $info['id'];
+			Dever::db('spider/data')->update($update);
+		} else {
+			$update = array();
+			foreach ($param as $i => $j) {
+				$i = str_replace('option_', 'add_', $i);
+				$update[$i] = $j;
+			}
+			$id = Dever::db('spider/data')->insert($update);
+		}
+	}
+
+	private function load($dom, $col, $config)
+	{
+		$data = $this->rule($dom, $col, $config['collect_rule'], $config['collect_include'], $config['collect_exclude']);
+		if ($config['collect_include'] && strpos($data, $config['collect_include']) === false) {
+			return 'error';
+		}
+		if ($config['collect_exclude'] && strpos($data, $config['collect_exclude']) !== false) {
+			return 'error';
+		}
+		if ($config['collect_filter']) {
+			$data = preg_replace('/' . $config['collect_filter'] . '/i', '', $data);
+		}
+		return $data;
+	}
+
+	private function rule($dom, $col, $rule, $include, $exclude)
+	{
+		$result = $dom->html();
+		$rule = explode("\n", $rule);
+		if (isset($rule[0]) && $rule[0]) {
+			if (isset($col[$rule[0]])) {
+				$url = $this->getUrl($dom, $col, $col[$rule[0]]);
+				$dom = $this->dom('', $url);
+				array_shift($rule);
+			}
+			$result = $this->find($dom, $rule[0], $result);
+		}
+		if (isset($rule[1]) && $rule[1]) $result = $this->match($rule[1], $result);
+		return $result;
+	}
+
+	private function find($dom, $string, $result)
+	{
+		$string = str_replace(array('$', ').'), array('$dom->find', ')->'), $string);
+		$cmd = '$result = ' . $string . ';';
+		eval($cmd);
+		return $result;
+	}
+
+	private function getUrl($dom, $col, $config)
+	{
+		$url = $this->load($dom, $col, $config);
+		if (strpos($url, 'http') === false) {
+			if ($url[0] == '/') {
+				$url = $this->host . $url;
+			} else {
+				$url = $this->url . $url;
+			}
+		}
+		return $url;
+	}
+
+	private function match($pattern, $string)
+	{
+		$temp = explode('||', $pattern);
+		$index = isset($temp[1]) ? $temp[1] : 1;
+		preg_match_all('/' . $temp[0] . '/i', $string, $match);
+		$result = $match[$index][0];
+		return $result;
+	}
+}

+ 1 - 1
lib/Project.php

@@ -42,7 +42,7 @@ class Project
 
 	private function setting()
 	{
-		$this->config['col'] = Dever::db('spider/col')->state(['project_id' => $this->config['id']]);
+		$this->config['col'] = Dever::db('spider/col')->getList(['where_project_id' => $this->config['id']]);
 		$this->config['url'] = explode("\r\n", $this->config['site']);
 		$this->config['queue'] = new Queue();
 		array_walk($this->config['url'], [$this->config['queue'], 'push']);

+ 0 - 134
src/Api.php

@@ -1,134 +0,0 @@
-<?php
-
-namespace DeverApp\Collect;
-
-use Dever;
-
-class Show
-{
-	# 显示数据
-	public function view()
-	{
-		$html = '<link rel="stylesheet" type="text/css" href="http://cdn-cosme.net/css/general/common.css?9c12b1c5396e84d693bea9d2dab205c6f2157da9" media="all">
-				<link rel="stylesheet" type="text/css" href="http://cdn-cosme.net/css/cnt/keyword/keyword.css?9c12b1c5396e84d693bea9d2dab205c6f2157da9" media="all">
-				<link rel="stylesheet" type="text/css" href="http://cdn-cosme.net/css/general/navi.css?9c12b1c5396e84d693bea9d2dab205c6f2157da9" media="all">';
-
-
-		$html .= '<style>
-
-		#keyword-ranking-header
-		{
-			margin-top:10px;
-		}
-
-		#main-col
-		{
-			margin:0 auto;
-			float:none;
-			width:100%;
-		}
-
-		.keyword-ranking-item .summary
-		{
-			width:40%;
-		}
-		.item
-		{
-			width:250px;
-		}
-		#keyword-ranking-header p
-		{
-			text-align:center;
-		}
-		body{    overflow: hidden;}
-
-		#keyword-ranking-title{
-    font-size: 20px!important;
-}
-#keyword-ranking-header{
-    font-size: 16px!important;
-}
-#keyword-ranking-header li, #keyword-ranking-footer li{
-    font-size: 16px!important;
-    width: 100px!important;
-}
-#keyword-ranking-header li.on{
-    background-size: 100px;
-}
-.keyword-ranking-item .summary .item, .keyword-pr .summary .item{
-    font-size: 24px!important;
-    width: 400px!important;
-}
-.keyword-ranking-item .summary .brand{
-    font-size: 18px;
-}
-.keyword-ranking-item .summary .icon-cmn-tieup{
-    font-size: 14px;
-}
-.keyword-ranking-item .summary .category, .keyword-pr .summary .category{
-    font-size: 18px!important;
-}
-.keyword-ranking-item .summary .point, .keyword-ranking-item .summary .votes, .keyword-ranking-item .summary .price, .keyword-ranking-item .summary .onsale, .keyword-pr .summary .point, .keyword-pr .summary .votes, .keyword-pr .summary .price, .keyword-pr .summary .onsale{
-    font-size: 16px!important;
-}
-.reviewer-average{
-    font-size: 15px!important;
-    line-height: inherit!important;
-}
-.keyword-ranking-item .pic, .keyword-pr .pic{
-    width: 95px!important;
-    text-align: center;
-}
-.keyword-ranking-item .pic img, .keyword-pr .pic img{
-    height: 95px;
-}
-.keyword-ranking-item dt{
-    width: 60px!important;
-}
-.keyword-ranking-item dt .rank-num img{
-    width: 60px!important;
-}
-		</style>';
-
-		$config = Dever::input('id', 2);
-
-		$info = Dever::load('collect/config-one', $config);
-
-		$site = explode("\r\n", $info['site']);
-
-		$temp = explode('/', $site[0]);
-		$num = $temp[5];
-
-		$key = Dever::input('key', 0);
-
-		$param['option_config_id'] = $config;
-
-		$data = Dever::load('collect/data-getAll', $param);
-
-		$i = 0;
-		foreach($data as $k => $v)
-		{
-			if($key == $i)
-			{
-				$html .= '<div id="main-col">';
-				$html .= $this->content($v['content'], $key, $config, $num);
-				$html .= '</div>';
-			}
-			$i++;
-		}
-		
-		echo $html;die;
-
-		return $html;
-	}
-
-	private function content($content, $key, $config, $num)
-	{
-		$content = str_replace('http://www.cosme.net/item/item_id/'.$num.'/ranking/page/', Dever::url('show.view?id=' . $config . '&key='), $content);
-
-		$content = str_replace('http://www.cosme.net/item/item_id/'.$num.'/ranking', Dever::url('show.view?id=' . $config . '&key=0'), $content);
-
-		$content = preg_replace('/"http:\/\/www.cosme.net\/(.*?)"/i', 'javascript:;', $content);
-		return $content;
-	}
-}

+ 1 - 1
src/Cate.php

@@ -1,6 +1,6 @@
 <?php
 
-namespace Forum\Src;
+namespace Spider\Src;
 
 use Dever;
 

+ 13 - 633
src/Data.php

@@ -1,643 +1,23 @@
 <?php
 
-namespace DeverApp\Collect;
+namespace Spider\Src;
 
 use Dever;
-ini_set('max_execution_time', '0');
-set_time_limit(0);
+
 class Data
 {
-	# 保存数据
-	protected $_data;
-	
-	# 停止采集
-	public function stop()
-	{
-		$id	= Dever::input('id');
-		Dever::load('collect/config-update', array('set_state' => 1, 'where_id' => $id));
-	}
-	
-	# 生成配置文件
-	public function create()
-	{
-		$id	= Dever::input('id');
-		if($id > 0)
-		{
-			$data = Dever::load('collect/config-check', $id);
-			if($data)
-			{
-				foreach($data as $k => $v)
-				{
-					if(is_string($k) && $k != 'id')
-					{
-						$result[$k] = $v;
-					}
-				}
-				
-				$result = base64_encode(serialize($result));
-				$html = '<div style="width:100%; table-layout:fixed; word-break: break-all; overflow:hidden;">'.$result.'</div>';
-				echo $html;die;
-			}
-		}
-		echo '没有配置数据';die;
-	}
-	
-	# 开始后台采集数据
-	public function daemon()
-	{
-		$id	= Dever::input('id');
-		if($id > 0)
-		{
-			$data = Dever::load('collect/config-check', $id);
-
-			//Dever::daemon('data.run?id=' . $id, 'collect');
-
-			# 加入cron
-			Dever::cron($data['name'], $data['sdate'], 'data.run?id=' . $id, $data['time'], 'collect');
-			
-			Dever::abert('已经开始自动采集数据,关闭本窗口即可!');
-		}
-	}
-	
-	# 开始实时采集数据
-	public function run()
-	{
-		$id	= Dever::input('id');
-		
-		$where = array();
-		# 启动
-		if($id > 0)
-		{
-			$param['option_id'] = $id;
-
-			$config = Dever::load('collect/config-check', $id);
-
-			$this->_run($config);
-		}
-	}
-	
-	private function _run($config)
-	{
-		# 运行中
-		Dever::load('collect/config-update', array('set_status' => 2, 'where_id' => $config['id']));
-
-		//while(true);
-		if($config)
-		{
-			//$config = Dever::collect_decode($config);
-			if(strstr($config['site'], '|g|'))
-			{
-				$temp = explode('|g|', $config['site']);
-				$config['url'] = $temp[0] . $temp[1];
-				$config['site'] = $temp[0];
-
-				$this->_create($config);
-			}
-			else
-			{
-				$config['url'] = $config['site'];
-
-				$url = explode("\r\n", $config['url']);
-				foreach($url as $k => $v)
-				{
-					if($v)
-					{
-						$config['url'] = $v;
-						$this->_create($config);
-					}
-				}
-			}
-			
-			if(Dever::input('test') == 1)
-			{
-				print_r($this->_data);die;
-			}
-			
-			# 继续
-			$status = 4;
-			if($config['time'] <= 0)
-			{
-				# 完成状态
-				$status = 4;
-			}
-			Dever::load('collect/config-update', array('set_status' => $status, 'set_num' => $config['num'] + 1, 'set_sdate' => time(), 'where_id' => $config['id']));
-
-			die;
-		}
-	}
-	
-	private function _create($config, $page = 1)
-	{
-		$result = array();
-		# 分析整个网页
-		if($config['site_rule_content'] == 1)
-		{
-			if(Dever::input('ajax') == 1 && Dever::input('url'))
-			{
-				$config['url'] = Dever::input('url');
-			}
-			list($temp, $result) = $this->_match($config['url'], $config['site_rule'], true);
-
-			if(Dever::input('ajax') == 1)
-			{
-				$data['txt'] = $this->_ajax('网址' . $config['url'] . '分析中,已得到内容链接,分析内容中...');
-				$data['url'] = implode(',', $result);
-				$data['page'] = $config['page_rule'];
-				$data['site'] = $config['site'];
-
-				echo json_encode($data);die;
-			}
-		}
-		else
-		{
-			$result = explode("\r\n", $config['url']);
-		}
-
-		if($result)
-		{
-			foreach($result as $k => $v)
-			{
-				$this->_createOne($v, $config);
-			}
-			
-			if($config['page_rule'])
-			{
-				$config_page = $config['page_rule'];
-				//sleep(2);
-				$max = false;
-				if(strstr($config['page_rule'], '|g|'))
-				{
-					$temp = explode('|g|', $config['page_rule']);
-					$config_page = $temp[0];
-					$max = $temp[1];
-				}
-				$page = $page + 1;
-				if($max && $page > $max)
-				{
-					# 最多只能跑这个页数的数据
-				}
-				else
-				{
-					$config['url'] = $config['site'] . '' . str_replace('(*)', $page, $config_page);
-
-					$this->_create($config, $page);
-				}
-			}
-		}
-		//sleep(2);
-	}
-
-	private function _ajax($txt)
-	{
-		return '[时间:' . date('Y-m-d H:i:s') . ']' . $txt;
-	}
-
-	# ajax 取一条
-	public function cronOne()
-	{
-		$id	= Dever::input('id');
-		$where = array
-		(
-			array('=', 'id', $id),
-		);
-		# 验证当前配置的状态
-		$data = DEVER_Db::select('config', '', $where);
-		$config = $data[0];
-		$config = Dever::collect_decode($config);
-		if(strstr($config['site'], '|g|'))
-		{
-			$temp = explode('|g|', $config['site']);
-			$config['url'] = $temp[0] . $temp[1];
-			$config['site'] = $temp[0];
-		}
-		else
-		{
-			$config['url'] = $config['site'];
-		}
-
-		$url = Dever::input('url');
-		Dever::input('ajax', 1);
-
-		if($url && $config)
-		{
-			$this->_createOne($url, $config);
-		}
-	}
-
-	private function _createOne($v, $config)
-	{
-		if(!strstr($v, 'http://'))
-		{
-			if(strstr($v, '/'))
-			{
-				$t = explode('/', $v);
-			}
-			else
-			{
-				$v = '/' . $v;
-			}
-
-			if(!$t[0])
-			{
-				$u = parse_url($config['url']);
-				$v = 'http://' . $u['host'] . $v;
-			} 
-			else
-			{
-				$v = $config['url'] . $v;
-			}
-		}
-
-		# 分析标题
-		$link = $v;
-
-		list($html, $name) = $this->_match($v, $config['name_rule'], true);
-
-		if($name && isset($name[0]) && $name[0])
-		{
-			if($config['name_include'] && !strstr($name[0], $config['name_include']))
-			{
-				return;
-			}
-			# 分析内容
-			$content = '';
-			if($config['content_rule'])
-			{
-				$content = $this->_loadContent($v,$config, $html);
-			}
-
-			//print_r($content);die;
-			
-			# 分析摘要
-			$intro = '';
-			if($config['intro_rule'])
-			{
-				list($temp, $string) = $this->_match($html, $config['intro_rule']);
-				
-				if($string && $string[0])
-				{
-					$intro = $string[0];
-				}
-			}
-			
-			$cdate = '';
-			# 分析时间
-			if($config['date_rule'])
-			{
-				list($temp, $date) = $this->_match($html, $config['date_rule']);
-				
-				if($date && $date[0])
-				{
-					$cdate = $date[0];
-				}
-			}
-
-			$extend = '';
-			# 分析扩展字段
-			if($config['extend_rule'])
-			{
-				list($temp, $extend) = $this->_match($html, $config['extend_rule'], false, true);
-			}
-			
-			# 入库
-			$this->_data($name[0], $content, $intro, $cdate, $config, $config['url'], $link, $extend);
-		}
-	}
-
-	private function _loadContent($url, $config, $html)
-	{
-
-		$content = '';
-		if(strstr($config['content_rule'], '|page|'))
-		{
-			$page = explode('|page|', $config['content_rule']);
-			$config['content_rule'] = $page[0];
-			$content = $this->_getContent($config, $html, false);
-			if(strstr($url, '.html'))
-			{
-				$url = str_replace('.html', '', $url);
-			}
-			for($i = 2; $i<= 100 ; $i++)
-			{
-				$u = $url . str_replace('(*)', $i, $page[1]);
-
-				$ct = $this->_getContent($config, $u, true);
-
-				if($ct && !strstr($content, $ct))
-				{
-					$content = $content . "\r\n" . $ct;
-				}
-				else
-				{
-					break;
-				}
-			}
-		}
-		else
-		{
-			$content = $this->_getContent($config, $html, false);
-		}
-
-		return $content;
-
-	}
-
-	private function _getContent($config, $url, $state)
-	{
-		list($temp, $content) = $this->_match($url, $config['content_rule'], $state);
-						
-		$contents = array();
-
-		if($content && is_array($content))
-		{
-			foreach($content as $a => $b)
-			{
-				$contents[] = '<p>'.rtrim(ltrim($b)).'</p>';
-			}
-		}
-						
-		if($contents)
-		{
-			$content = implode('', $contents);
-		}
-						
-		# 过滤不想要的字符
-		if($content && $config['content_include'])
-		{
-			$pic = '';
-			$temp = explode("\r\n", $config['content_include']);
-			foreach($temp as $tk => $tv)
-			{
-				$gg =  '|g|0';
-				$method = false;
-				if(strstr($tv, '||'))
-				{
-					$temp = explode('||', $tv);
-					$tv = $temp[0];
-					$method = $temp[1];
-					$gg = '';
-				}
-				list($t, $include) = $this->_match($content, $tv . $gg);
-				if($method == 'pic' && $include)
-				{
-					foreach($include as $ik => $iv)
-					{
-						$u = parse_url($config['url']);
-						$iv = 'http://' . $u['host'] . '/'. $iv;
-						$pic = '<img src="'.$iv.'">';
-					}
-				}
-				elseif($include)
-				{
-					foreach($include as $ik => $iv)
-					{
-						if($pic)
-						{
-							$content = str_replace($iv, $pic, $content);
-							$pic = '';
-						}
-						else
-						{
-							$content = str_replace($iv, '', $content);
-						}
-					}
-				}
-			}
-		}
-		return $content;
-	}
-	
-	private function _match($data, $rule, $type = false, $mul = false)
-	{
-		if(strstr($rule, "\r\n") || $mul == true)
-		{
-			$array = explode("\r\n", $rule);
-			$return = array();
-			foreach($array as $k => $v)
-			{
-				if($v)
-				{
-					$result = $this->_match($data, $v, $type);
-					if($result)
-					{
-						if($mul == true && isset($result[1]))
-						{
-							$return[] = $result[1][0];
-						}
-						else
-						{
-							return $result;
-							break;
-						}
-					}
-				}
-			}
-			if($mul == true && $return)
-			{
-				return array(false, implode('||', $return));
-			}
-			
-			return array(false, false);
-		}
-		
-		$index = 1;
-		if(strstr($rule, '|g|'))
-		{
-			$array = explode('|g|', $rule);
-			$rule = $array[0];
-			$index = $array[1];
-		}
-		
-		# 这里做这个替换是为了防止有人不做转义,而有的又做了转义
-		if(strstr($rule, '\\/'))
-		{
-			$rule = str_replace('\\/', '/', $rule);
-		}
-		if(strstr($rule, '/'))
-		{
-			$rule = str_replace('/', '\\/', $rule);
-		}
-		
-		if($type == true)
-		{
-			if(isset($array[2]) && $array[2])
-			{
-				$data = $array[2] . $data;
-			}
-			//sleep(1);
-			$data = Dever::curl($data);
-
-			if(!$data) return array(false, false);
-
-			
-			$encode = mb_detect_encoding($data, array('GB2312','GBK','UTF-8'));
-
-			//echo $encode;die;
-
-			if($encode == 'GB2312' || $encode == 'GBK' || $encode == 'EUC-CN')
-			{
-				$data = \iconv('GBK', 'UTF-8', $data);
-			}
-
-			if($encode == 'CP936')
-			{
-				$data = \iconv('SJIS', 'UTF-8', $data);
-			}
-		}
-		
-		# 过滤换行
-		$data = str_replace(PHP_EOL, '', $data); 
-
-		preg_match_all('/' . $rule . '/i', $data, $result);
-			
-		return array($data, $result[$index]);
-	}
-	
-	# 将得到的数据生成一份保存下来
-	private function _data($name, $content, $intro, $cdate, $config, $site, $link, $extend)
-	{
-		$data['add_name'] = strip_tags(trim($name));
-		$data['add_cate_id'] = $config['cate_id'] ? $config['cate_id'] : 1;
-		//$data['add_admin'] = 1;
-		
-		# 标签分析工具
-		//$data['log_Tag'] = $this->_tag($name);
-		$data['add_content'] = $content;
-		$data['add_extend'] = $extend;
-		$data['add_info'] = trim($intro);
-		$data['add_cdate'] = $cdate ? Dever::maketime(strip_tags(trim($cdate))) : time();
-		$data['add_source_link'] = $link;
-		$data['add_source_list'] = $site;
-		$data['add_config_id'] = $config['id'];
-
-		//print_r($data);die;
-		
-		if(Dever::input('test') == 1)
-		{
-			$num = count($this->_data);
-			$this->_data[$num] = $data;
-			echo '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> ';
-			print_r($data);die;
-			return;
-		}
-		
-		# 验证是否已经存在数据,用标题不靠谱,原站如果改了标题就不好了
-		$info = Dever::load('collect/data-check', array('option_source_link' => $link));
-
-		//print_r($info);die;
-
-		# 直接入库
-		if($info)
-		{
-			$update['set_name'] = $data['add_name'];
-			$update['set_cate_id'] = $data['add_cate_id'];
-			$update['set_content'] = $data['add_content'];
-			$update['set_info'] = $data['add_info'];
-			$update['set_extend'] = $extend;
-			$update['set_cdate'] = time();
-			$update['set_config_id'] = $config['id'];
-			$update['set_source_link'] = $link;
-			$update['set_source_list'] = $site;
-			$update['where_id'] = $info['id'];
-
-			Dever::load('collect/data-update', $update);
-			$id = $info['id'];
-			$desc = '(<a style="color:red">已采集过,直接覆盖</a>)';
-		}
-		else
-		{
-			$id = Dever::load('collect/data-insert', $data);
-			$desc = '(<a style="color:blue">新采集的内容</a>)';
-		}
-
-		if(Dever::input('ajax') == 1)
-		{
-			if($config['type'] == 1)
-			{
-				$txt = '采集成功';
-			}
-			else
-			{
-				$txt = '采集成功';
-			}
-			$result['txt'] = $this->_ajax('内容[<a href="'.$link.'" target="_blank">' . $data['add_name'] . '</a>]' . $txt . $desc);
-			$result['url'] = $link;
-
-			echo json_encode($result);die;
-		}
-		
-		return true;
-	}
-	
-	/**
-	 * 上传操作 直接参考的ueditor插件
-	 */
-	private function _upload($file, $id)
-	{
-		global $zbp;
-		//http开头验证
-        if(strpos($file, "http") !== 0)
-        {
-            return $file;
-        }
-        
-		//获取请求头并检测死链
-        $heads = get_headers($file);
-        if(!(stristr($heads[0], "200") && stristr($heads[0], "OK")))
-        {
-            return $file;
-        }
-        
-        
-        //格式验证(扩展名验证和Content-Type验证)
-        $fileType = strtolower(strrchr($file, '.'));
-        if(stristr($heads['Content-Type'], "image"))
-        {
-            return $file;
-        }
-
-        //打开输出缓冲区并获取远程图片
-        ob_start();
-        $context = stream_context_create
-        (
-            array('http' => array
-            (
-                'follow_location' => false // don't follow redirects
-            ))
-        );
-        readfile($file, false, $context);
-        $img = ob_get_contents();
-        ob_end_clean();
-        preg_match("/[\/]([^\/]*)[\.]?[^\.\/]*$/", $file, $m);
-        
-        $filename = $m ? $m[1] : "";
-        $filesize = strlen($img);
-        $filetype = strtolower(strrchr($filename, '.'));
+    public function value($id)
+    {
+        $data = Dever::load('spider/data-one', $id);
+        $data['value'] = Dever::table(json_decode($data['value'], true));
 
-		# 这里要修改upload类 算了
-        $root = Dever::path($zbp->usersdir . 'upload/collect/');
-        $id = ceil($id/1000);
-		$filepath = Dever::path($root . $id . '/');
-		$filepath = 'upload/collect/' . $id . '/';
-        $filename = md5($file) . $filetype;
+        return $data['value'];
+    }
 
-		$upload = new Upload;
-		$upload->Dir = $filepath;
-		$upload->Name = $filename;
-		# 这个暂时没用啊
-		//$upload->SourceName = $filepath . $filename;
-		$upload->MimeType = $heads['Content-Type'];
-		$upload->Size = $fileSize;
-		$upload->AuthorID = $zbp->user->ID;
-		
-		if(!$upload->SaveBase64File(base64_encode($img)))
-		{
-			return $file;
-		}
-		
-		$upload->Save();
-		$file = $upload->Url;
+    public function get($project_id = 1)
+    {
+        $data = Dever::load('spider/data-all', array('option_project_id' => $project_id));
 
-		return $file;
-	}
+        return $data;
+    }
 }