dever 2 years ago
parent
commit
e7cad35271
16 changed files with 251 additions and 503 deletions
  1. 1 1
      daemon/main.php
  2. 0 200
      database/cate.php
  3. 5 4
      database/data.php
  4. 57 92
      database/project.php
  5. 94 33
      lib/Api.php
  6. 0 62
      lib/Cron.php
  7. 10 0
      lib/Doc.php
  8. 3 3
      lib/Log.php
  9. 11 3
      lib/Parse.php
  10. 44 19
      lib/Project.php
  11. 4 46
      lib/Queue.php
  12. 7 8
      lib/doc/Dom.php
  13. 5 0
      lib/doc/Json.php
  14. 1 1
      package.json
  15. 0 30
      src/Cate.php
  16. 9 1
      src/Data.php

+ 1 - 1
daemon/main.php

@@ -4,4 +4,4 @@ define('DEVER_DAEMON', true);
 
 include(dirname(__FILE__) . DIRECTORY_SEPARATOR . '../index.php');
 
-Dever::load('spider/api.cron');
+Dever::load('spider/lib/api.cron');

+ 0 - 200
database/cate.php

@@ -1,200 +0,0 @@
-<?php
-
-$cate = function()
-{
-	$array = array(-1 => array('name' => '父级分类'));
-	$cate = Dever::load('spider/cate-mainAll');
-	if ($cate) {
-		$array += $cate;
-	}
-	return $array;
-};
-return array
-(
-	# 表名
-	'name' => 'cate',
-	# 显示给用户看的名称
-	'lang' => '分类',
-	'order' => 10,
-	# 数据结构
-	'struct' => array
-	(
-		'id' 		=> array
-		(
-			'type' 		=> 'int-11',
-			'name' 		=> '分类ID',
-			'default' 	=> '',
-			'desc' 		=> '',
-			'match' 	=> 'is_numeric',
-			//'search'	=> 'order',
-			'order'		=> 'desc',
-			'list'		=> true,
-		),
-		
-		'name'		=> array
-		(
-			'type' 		=> 'varchar-60',
-			'name' 		=> '分类名称',
-			'default' 	=> '',
-			'desc' 		=> '请输入分类名称',
-			'match' 	=> 'is_string',
-			'update'	=> 'text',
-			'search'	=> 'fulltext',
-			'list'		=> true,
-		),
-		
-		'cate_id'		=> array
-		(
-			'type' 		=> 'int-11',
-			'name' 		=> '上级分类',
-			'default' 	=> Dever::input('option_cate_id', -1),
-			'desc' 		=> '请选择上级分类',
-			'match' 	=> 'is_numeric',
-			'update'	=> 'select',
-			'search'	=> 'order,select',
-			'list'		=> '{cate_id} > 0 ? Dever::load("spider/cate-one#name", {cate_id}) : "父级分类"',
-			'option' 	=> $cate,
-		),
-		
-		'reorder'		=> array
-		(
-			'type' 		=> 'int-11',
-			'name' 		=> '排序(数值越大越靠前)',
-			'default' 	=> '1',
-			'desc' 		=> '请输入排序',
-			'match' 	=> 'option',
-			'update'	=> 'text',
-			'search'	=> 'order',
-			'list_name' => '排序',
-			'list'		=> true,
-			'order'		=> 'desc',
-			'edit'		=> true,
-		),
-
-		'state'		=> array
-		(
-			'type' 		=> 'tinyint-1',
-			'name' 		=> '状态',
-			'default' 	=> '1',
-			'desc' 		=> '请选择状态',
-			'match' 	=> 'is_numeric',
-		),
-		
-		'cdate'		=> array
-		(
-			'type' 		=> 'int-11',
-			'name' 		=> '录入时间',
-			'match' 	=> array('is_numeric', time()),
-			'desc' 		=> '',
-			# 只有insert时才生效
-			'insert'	=> true,
-			'list'		=> 'date("Y-m-d H:i:s", {cdate})',
-		),
-	),
-	
-	'manage' => array
-	(
-		# 列表页的类型
-		'list_type' => 'parent',
-
-		'list_button' => array
-		(
-			'add' => array('新增子分类', '"cate&option_cate_id={id}"', '{cate_id}<=0'),
-			//6 => '删除'
-		),
-	),
-	
-	# request 请求接口定义
-	'request' => array
-	(
-		# info 取一条正常的数据
-		'info' => array
-		(
-			'where' => array
-			(
-				'id' => 'is_numeric',
-				'state' => 1,
-			),
-			'type' => 'one',
-		),
-		
-		'link' => array
-		(
-			'where' => array
-			(
-				'link' => 'is_string',
-				'state' => 1,
-			),
-			'type' => 'one',
-		),
-		
-		# main 取所有主分类
-		'main' => array
-		(
-			'where' => array
-			(
-				'cate_id' => -1,
-				'state' => 1,
-			),
-			'type' => 'all',
-			'order' => array
-			(
-				'reorder' => 'desc',
-				'id' => 'desc',
-			),
-			'col' => '*|id',
-		),
-		
-		# 取所有下级分类
-		'child' => array
-		(
-			'where' => array
-			(
-				'cate_id' => array('1', '>='),
-				'state' => 1,
-			),
-			'type' => 'all',
-			'order' => array
-			(
-				'reorder' => 'desc',
-				'id' => 'desc',
-			),
-			'col' => '*|cate_id|id|',
-		),
-		
-		
-		# main 取所有主分类
-		'mainAll' => array
-		(
-			'where' => array
-			(
-				'cate_id' => -1,
-				'state' => 1,
-			),
-			'type' => 'all',
-			'order' => array
-			(
-				'reorder' => 'desc',
-				'id' => 'desc',
-			),
-			'col' => '*|id',
-		),
-		
-		# 取所有下级分类
-		'childAll' => array
-		(
-			'where' => array
-			(
-				'cate_id' => array('1', '>='),
-				'state' => 1,
-			),
-			'type' => 'all',
-			'order' => array
-			(
-				'reorder' => 'desc',
-				'id' => 'desc',
-			),
-			'col' => '*|cate_id|id|',
-		),
-	),
-);

+ 5 - 4
database/data.php

@@ -27,7 +27,7 @@ return array
 			'desc' 		=> '',
 			'match' 	=> 'is_numeric',
 			'search'	=> 'order',
-			//'list'		=> true,
+			'list'		=> true,
 			'order'		=> 'desc',
 		),
 
@@ -38,7 +38,7 @@ return array
 			'default' 	=> '1',
 			'desc' 		=> '请选择项目id',
 			'match' 	=> 'is_numeric',
-			'list'		=> '{pid} > 0 ? Dever::load("spider/project-one#name", {pid}) : "无"',
+			//'list'		=> '{pid} > 0 ? Dever::load("spider/project-one#name", {pid}) : "无"',
 			# 开启显示控制,可以控制下边的表单
 			//'show'		=> 'cate_id',
 		),
@@ -52,7 +52,7 @@ return array
 			'match' 	=> 'is_string',
 			//'update'	=> 'editor',
 			'search'	=> 'fulltext',
-			//'list'		=> 'Dever::load("spider/data.value", {id})',
+			'list'		=> 'Dever::load("spider/data.value", {id})',
 			//'modal'		=> '查看详情',
 		),
 
@@ -94,7 +94,8 @@ return array
 		# 开启批量管理
 		//'mul' => true,
 		'insert' => false,
-		'delete' => false,
+		'edit' => false,
+		//'delete' => false,
 	),
 
 	

+ 57 - 92
database/project.php

@@ -8,43 +8,14 @@ $local = array
 	2 => '开启',
 );
 
-
 $status = array
 (
-	1 => '待机',
-	2 => '入队',
-	3 => '停止',
+	1 => '未开始',
+	2 => '已完成',
+	3 => '队列中',
+	4 => '运行中',
 );
 
-$cate = function()
-{
-	$array = array();
-	$cate = Dever::load('spider/cate-main');
-	if ($cate) {
-		$array += $cate;
-	}
-	return $array;
-};
-
-$cate_child = function()
-{
-	$cate = Dever::load('spider/cate-child');
-	return $cate;
-};
-
-$search_cate = function()
-{
-	$array = array
-	(
-		-1 => array('id' => '-1', 'name' => '所有分类'),
-	);
-	$cate = Dever::load('spider/cate.get');
-	if ($cate) {
-		$array += $cate;
-	}
-	return $array;
-};
-
 $project = function()
 {
 	$array = array
@@ -58,7 +29,16 @@ $project = function()
 	return $array;
 };
 
-$id = Dever::input('search_option_pid', -1);
+$info = Dever::load('manage/project.get');
+
+$path = $info['spider']['path'];
+
+if(isset($info['spider']['setup']))
+{
+	$path = $info['spider']['setup'];
+}
+
+$id = Dever::input('option_pid', -1);
 
 return array
 (
@@ -66,9 +46,13 @@ return array
 	'name' => 'project',
 	# 显示给用户看的名称
 	'lang' => '项目管理',
+	'status' => $status,
+	'path' => $path,
 	# 后台菜单排序
 	'order' => 20,
-	//'desc' => 'API:http://192.168.15.10/plant/spider/??project_id=1',
+	'desc' => Dever::markdown('**启动守护进程的方法,以下三项任选其一:**
+	1. 常规任务:请将 ```* * * * * root php '.$path.'daemon/main.php``` 放到cron中[建议每分钟执行一次] 
+	2. 长期运行:请执行 ```php '.$path.'daemon/main.php 1>/dev/null 2>&1 &``` 指令,放置后台运行[一般用于队列等需要长期运行的项目]'),
 	# 数据结构
 	'struct' => array
 	(
@@ -81,7 +65,7 @@ return array
 			'desc' 		=> '',
 			'match' 	=> 'is_numeric',
 			'search'	=> 'order',
-			'list'		=> true,
+			//'list'		=> true,
 			'order'		=> 'desc',
 		),
 		
@@ -109,45 +93,15 @@ return array
 			'search'	=> 'select',
 			'option' 	=> $project,
 		),
-
-		'cate_id_parent'		=> array
-		(
-			'type' 		=> 'int-11',
-			'name' 		=> '选择分类',
-			'default' 	=> '0',
-			'desc' 		=> '请选择分类',
-			'match' 	=> 'is_numeric',
-			'update'	=> 'select',
-			//'search'	=> 'order,select',
-			'list_name' => '父级分类',
-			'option' 	=> $cate,
-			# 当值改变时,执行下一步操作
-			'child_name' => 'cate_id',
-			'child' => $cate_child,
-			'child_value' => '{cate_id}',
-		),
-		
-		'cate_id'		=> array
-		(
-			'type' 		=> 'int-11',
-			'name' 		=> '子分类',
-			'default' 	=> '0',
-			'desc' 		=> '请选择子分类',
-			'match' 	=> 'is_numeric',
-			'search'	=> 'group',
-			'option' 	=> $search_cate,
-			'list_name'	=> '分类',
-			'list'		=> '{cate_id} > 0 ? Dever::load("spider/cate-one#name", {cate_id}) : "无"',
-		),
 		
 		'site'		=> array
 		(
 			'type' 		=> 'text-255',
-			'name' 		=> '采集网址-多个网址换行隔开,如有分页,请写成这样pg{page=1}',
+			'name' 		=> '采集网址-如有分页,请写成这样{page=1}',
 			'default' 	=> '',
 			'desc' 		=> '采集网址',
 			'match' 	=> 'option',
-			'update'	=> 'textarea',
+			'update'	=> $id < 0 ? 'hidden': 'textarea',
 			//'list'		=> true,
 			//'edit'		=> 'textarea',
 		),
@@ -159,7 +113,7 @@ return array
 			'default' 	=> '',
 			'desc' 		=> '采集规则',
 			'match' 	=> 'is_string',
-			'update'	=> 'textarea',
+			'update'	=> $id < 0 ? 'hidden': 'textarea',
 			//'edit'		=> true,
 			//'list'		=> true,
 		),
@@ -167,11 +121,11 @@ return array
 		'page_num'		=> array
 		(
 			'type' 		=> 'int-11',
-			'name' 		=> '采集页数-值为0则默认采集1000页',
+			'name' 		=> '采集页数-值为0则默认采集100页',
 			'default' 	=> '0',
 			'desc' 		=> '采集页数',
 			'match' 	=> 'option',
-			'update'	=> 'text',
+			'update'	=> $id < 0 ? 'hidden': 'text',
 		),
 
 		'status'		=> array
@@ -182,41 +136,48 @@ return array
 			'desc' 		=> '状态',
 			'match' 	=> 'is_numeric',
 			'option' 	=> $status,
-			'list'		=> true,
-			'update'	=> $id > 0 ? 'hidden' : 'radio',
-			'edit'		=> true,
+			'list'		=> 'Dever::load("spider/lib/project.status", {id})',
+			//'update'	=> $id > 0 ? 'hidden' : 'radio',
+			//'edit'		=> true,
 		),
 
 		'num'		=> array
 		(
 			'type' 		=> 'int-11',
-			'name' 		=> '抓取次数',
+			'name' 		=> '采集次数',
+			'default' 	=> '0',
+			'desc' 		=> '采集次数',
+			'match' 	=> 'is_numeric',
+		),
+
+		'cur_page'		=> array
+		(
+			'type' 		=> 'int-11',
+			'name' 		=> '当前采集页数',
 			'default' 	=> '0',
-			'desc' 		=> '抓取次数',
+			'desc' 		=> '当前采集页数',
 			'match' 	=> 'is_numeric',
-			'list'		=> $id > 0 ? false : true,
 		),
 
 		'sdate'		=> array
 		(
 			'type' 		=> 'int-11',
-			'name' 		=> '抓取开始时间',
+			'name' 		=> '下次采集时间',
 			'default' 	=> '0',
-			'desc' 		=> '抓取开始时间',
+			'desc' 		=> '下次采集时间',
 			'match' 	=> 'is_numeric',
-			'update'	=> $id > 0 ? 'hidden' : 'date',
-			'list'		=> $id > 0 ? false : '{sdate} > 0 ? date("Y-m-d H:i:s", {sdate}) : ""',
+			//'update'	=> $id < 0 ? 'hidden' : 'date',
 			'callback'	=> 'maketime',
 		),
 
 		'interval'		=> array
 		(
 			'type' 		=> 'int-11',
-			'name' 		=> '抓取间隔秒数-填写开始时间之后的间隔抓取的秒数,为0则只抓取一次',
+			'name' 		=> '采集间隔秒数-填写开始时间之后的间隔采集的秒数,为0则只采集一次',
 			'default' 	=> '0',
-			'desc' 		=> '抓取间隔秒数',
+			'desc' 		=> '采集间隔秒数',
 			'match' 	=> 'is_numeric',
-			'update'	=> $id > 0 ? 'hidden' : 'text',
+			'update'	=> $id < 0 ? 'hidden' : 'text',
 		),
 
 		'reorder'		=> array
@@ -257,13 +218,13 @@ return array
 
 	'manage' => array
 	(
-		'delete' => false,
+		//'delete' => false,
 		# 更新数据时,要显示的按钮,这里填写js脚本事件即可。保存当前数据可为固定参数:"save-data",复制数据为:copy-data
 		'update_button' => array
 		(
 			'提交保存' => 'save-data',
 			'放弃保存' => "msg({status:1,msg:'yes'})",
-			'复制数据' => 'copy-data',
+			//'复制数据' => 'copy-data',
 		),
 
 		# 不允许编辑
@@ -275,13 +236,17 @@ return array
 		# 可以删除
 		'list_button' => array
 		(
-			'list_col' => array('字段设置', '"col&search_option_pid={id}&oper_parent=project"'),
-			'add' => array('新增子项目', '"project&search_option_pid={id}&oper_parent=project&oper_save_jump=project"'),
-			'list_data' => array('数据列表', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"'),
-			//'delete' => array('采集数据', 'Dever::url("spider/data.daemon?id={id}&")'),
+			'update' => array('编辑', '"project&option_pid={project_id}"'),
+			'delete' => '删除',
+			'br1' => array('<br /><br />'),
+			'add' => array('新增子项目', '"project&option_pid={id}&oper_parent=project&oper_save_jump=project"', '{project_id} == -1'),
+
+			'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{project_id} > 0 && {status} <= 2'),
+			'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"', '{project_id} > 0'),
+			'br2' => array('<br /><br />'),
+			'new' => array('测试采集', 'Dever::url("spider/lib/api.test?id={id}")', '{project_id} > 0'),
+			'oper1' => array('开始采集', 'Dever::url("spider/lib/api.add?id={id}")', '{project_id} > 0 && {status} <= 2'),
 		),
-		
-		//'desc' => '注意:命令码',
 	),
 
 	# request 请求接口定义

+ 94 - 33
lib/Api.php

@@ -5,64 +5,100 @@ use Dever;
 class Api
 {
 	private $queue;
-	public function run($id)
+	public function add_api($id)
 	{
 		# 写入队列
-		$project = new Project($id);
-		$config = $project->get();
-		if (!$this->queue) {
-			$this->queue = new Queue('db');
-		}
-		$send['id'] = $config['id'];
-		$send['collect_rule'] = $config['collect_rule'];
-		foreach ($config['url'] as $k => $v) {
-			$this->queue->push($v, $send, $config['page_num']);
+		$config = Dever::load('spider/lib/project')->get($id);
+		if (!$config) {
+			Dever::alert('项目不存在');
 		}
+		Dever::load('spider/lib/project')->set($config, 3);
+		Dever::load('spider/lib/queue')->push($config['id']);
+		return 'yes';
 	}
 
 	public function test_api($id)
 	{
 		Dever::setInput('test', 1);
-		$this->queue = new Queue('data');
 		$this->run($id);
-		$this->cron();
 		return 'yes';
 	}
 
-	private function cron()
+	public function cron()
 	{
 		if (!$this->queue) {
-			$this->queue = new Queue('db');
+			$this->queue = new Queue();
 		}
 		Dever::import('task');
-		$state = true;
-		while ($state) {
-			$state = $this->load();
+		while (1) {
+			$this->load();
 		}
 	}
 
 	private function load()
 	{
-		$config = $this->queue->pop();
-		if ($config) {
-			# 此处开task
-			$col = $this->col($config['id']);
-			$this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
-			/*
-			Dever::task(function() use($config, $this)
-			{
-				$col = $this->col($config['id']);
-				$this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
-			});
-			*/
-			
-			$state = true;
+		try {
+			$id = $this->queue->pop();
+			if ($id) {
+				$config = Dever::load('spider/lib/project')->get($id);
+				if ($config && $config['status'] <= 2 && time() >= $config['sdate']) {
+					# 推到后台运行
+					# 获取当前执行的进程数量
+		            $num = $this->getNum();
+		            if ($num >= 1000) {
+		                # 等会儿再执行
+		                sleep(60);
+		            }
+		            $this->popen($id);
+				}
+			}
+			return true;
+		} catch (\Exception $e) {
+            return true;
+        }
+	}
+
+	public function run($id)
+	{
+		$config = Dever::load('spider/lib/project')->get($id);
+		if (!$config) {
+			return false;
+		}
+		# 此处开task
+		$col = $this->col($config['id']);
+
+		if (strpos($config['site'], '{') !== false && strpos($config['site'], '}') !== false) {
+			$this->preg($config, $col);
 		} else {
-			$state = false;
+			Dever::load('spider/lib/project')->set($config, 4, 1);
+			$this->parse($config['site'], $config['id'], $config['collect_rule'], $col);
 		}
-		return $state;
+		/*
+		Dever::task(function() use($config, $this)
+		{
+			$col = $this->col($config['id']);
+			$this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
+		});
+		*/
+		Dever::load('spider/lib/project')->set($config, 2);
 	}
 
+	# 将数据推到子进程处理
+    public function popen($id)
+    {
+    	$path = Dever::db('spider/project')->config['path'] . 'daemon/run.php';
+        $command = 'php '.$path.' -send id=' . $id . ' 1>/dev/null 2>&1 &';
+        exec($command);
+    }
+
+	# 获取当前执行的子进程数量
+    public function getNum()
+    {
+        $command = 'ps -ef | grep gateway/api.task/runOne | grep -v grep | wc -l';
+        $num = exec($command);
+        return $num;
+    }
+
 	private function col($project)
 	{
 		return Dever::db('spider/col')->getList(['where_pid' => $project]);
@@ -73,4 +109,29 @@ class Api
 		$parse = new Parse($url, $project, $rule, $col);
 		return $parse->get();
 	}
+
+	private function preg($config, $col)
+	{
+		$pat = '/{(.*?)}/i';
+		preg_match_all($pat, $config['site'], $match);
+		if (isset($match[1][0]) && $match[1][0]) {
+			if ($config['page_num'] <= 0) $config['page_num'] = 100;
+			parse_str($match[1][0], $param);
+			$this->page($param, $match[0][0], $config, $col);
+		}
+	}
+
+	private function page($param, $replace, $config, $col)
+	{
+		if (isset($param['page']) && $param['page']) {
+			for ($i = $param['page']; $i <= $config['page_num']; $i++) {
+				$url = str_replace($replace, $i, $config['site']);
+				Dever::load('spider/lib/project')->set($config, 4, $i);
+				$this->parse($url, $config['id'], $config['collect_rule'], $col);
+			}
+		} else {
+			Dever::load('spider/lib/project')->set($config, 4, 1);
+			$this->parse(str_replace($replace, '', $value), $config['id'], $config['collect_rule'], $col);
+		}
+	}
 }

+ 0 - 62
lib/Cron.php

@@ -1,62 +0,0 @@
-<?php
-namespace Spider\Lib;
-use Dever;
-
-class Cron
-{
-	private $queue;
-	public function project()
-	{
-		$param['option_sdate'] = time();
-		return Dever::db('spider/project')->get($param);
-	}
-
-	public function test_api($id)
-	{
-		$project = $this->project();
-		foreach ($project as $k => $v) {
-			
-		}
-	}
-
-	private function cron()
-	{
-		if (!$this->queue) {
-			$this->queue = new Queue('db');
-		}
-		Dever::import('task');
-		$state = true;
-		while ($state) {
-			$state = $this->load();
-		}
-	}
-
-	private function load()
-	{
-		$config = $this->queue->pop();
-		if ($config) {
-			# 此处开task
-			Dever::task(function() use($config, $this)
-			{
-				$col = $this->col($config['id']);
-				$this->parse($config['url'], $config['id'], $config['collect_rule'], $col);
-			});
-			
-			$state = true;
-		} else {
-			$state = false;
-		}
-		return $state;
-	}
-
-	private function col($project)
-	{
-		return Dever::db('spider/col')->getList(['where_pid' => $project]);
-	}
-
-	private function parse($url, $project, $rule, $col)
-	{
-		$parse = new Parse($url, $project, $rule, $col);
-		return $parse->get();
-	}
-}

+ 10 - 0
lib/Doc.php

@@ -93,6 +93,16 @@ class Doc
 		return ($this->getClass())::find($doc, $rule);
 	}
 
+	public function init($data)
+	{
+		if (is_string($data) && strstr($data, 'http')) {
+			$data = $this->doc($data);
+		} else {
+			$data = ($this->getClass())::get($data);
+		}
+		return $data;
+	}
+
 	public function rule($data, $col, $config)
 	{
 		$name = '字段[' . $config['name'] . '('.$config['key'].')]' . '"';

+ 3 - 3
lib/Log.php

@@ -22,21 +22,21 @@ class Log
 
 	public function add($string)
 	{
-		$time = time();
+		$time = Dever::msectime();
 		if ($this->time == 0) {
 			$hs = 0;
 			$this->time = $time;
 		} else {
 			$hs = $time - $this->time;
 		}
-		$time = date('Y-m-d H:i:s', $time);
+		$time = date('Y-m-d H:i:s');
 		$content = array
 		(
 			'时间:' . $time,
 			'耗时:' . $hs . 'MS',
 			'内容:' . str_replace("\n", '<--', $string),
 		);
-		$this->content[] = implode(' |-| ', $content);
+		$this->content[] = implode(' ', $content);
 	}
 
 	public function out()

+ 11 - 3
lib/Parse.php

@@ -19,8 +19,15 @@ class Parse
 			if (!is_array($data) && !is_object($data)) {
 				$data = Dever::json_decode($data);
 			}
-			foreach ($data as $k => $v) {
-				$this->data[$k] = $this->load($doc, $v, $col, $project);
+			if ($data) {
+				$domain = parse_url($url);
+				$host = $domain['scheme'] . '://' . $domain['host'];
+				foreach ($data as $k => $v) {
+					if (!strstr($v, 'http')) {
+						$v = $host . $v;
+					}
+					$this->data[$k] = $this->load($doc, $v, $col, $project);
+				}
 			}
 		}
 		$doc->saveLog();
@@ -34,6 +41,7 @@ class Parse
 	private function load($doc, $data, $col, $project)
 	{
 		$result = $table = array();
+		$data = $doc->init($data);
 		foreach ($col as $v) {
 			$callback = false;
 			if (strpos($v['key'], '.') !== false) {
@@ -69,7 +77,7 @@ class Parse
 	private function update($data, $project)
 	{
 		$param['option_pid'] = $project;
-		$param['option_value'] = json_encode($data);
+		$param['option_value'] = json_encode($data, JSON_UNESCAPED_UNICODE);
 		$info = Dever::db('spider/data')->one($param);
 		if ($info) {
 			$update = array();

+ 44 - 19
lib/Project.php

@@ -8,35 +8,60 @@ class Project
 {
 	private $config;
 
-	public function __construct($id)
+	public function status($id)
 	{
-		if (!$id) {
-			Dever::alert('id不存在');
+		$data = $this->get($id);
+
+		if (!$data) {
+			return '';
 		}
-		$this->config = Dever::db('spider/project')->one($id);
-		$this->check();
-	}
+		$num = Dever::db('spider/data')->total(array('pid' => $id));
+		$status = Dever::db('spider/project')->config['status'];
+		$table['采集状态'] = $status[$data['status']];
+		$table['采集次数'] = $data['num'];
+		$table['采集间隔'] = $data['interval'] . '秒';
+		$table['采集页数'] = $data['cur_page'];
+		$table['采集数据'] = $num . '条';
+		$table['采集时间'] = $data['sdate'] > 0 ? date('Y-m-d H:i:s', $data['sdate']) : '无';
 
-	public function get()
-	{
-		$this->setting();
-		return $this->config;
+		return Dever::table($table);
 	}
 
-	private function check()
+	public function get($id)
 	{
-		if (!$this->config) {
-			Dever::alert('项目未定义');
+		if (!$id) {
+			return false;
 		}
-
-		if ($this->config['status'] != 1) {
-			//Dever::alert('项目不是待机状态');
+		$config = Dever::db('spider/project')->one($id);
+		if ($config) {
+			if ($config['project_id'] == -1) {
+				return false;
+			}
+			return $config;
+		} else {
+			return false;
 		}
 	}
 
-	private function setting()
+	public function set($config, $status, $page = false)
 	{
-		Dever::db('spider/project')->update(['status' => 2, 'where_id' => $this->config['id']]);
-		$this->config['url'] = explode("\r\n", $this->config['site']);
+		$test = Dever::input('test');
+		if ($test == 1) {
+			return false;
+		}
+		$data['status'] = $status;
+		if ($page) {
+			$data['cur_page'] = $page;
+		}
+		
+		$data['where_id'] = $config['id'];
+		if ($status == 3) {
+			$data['num'] += 1;
+		}
+		if ($status == 2 && $config['interval'] > 0) {
+			# 已结束,设置下次的时间
+			$data['sdate'] = time() + $config['interval'];
+		}
+		return Dever::db('spider/project')->update($data);
 	}
 }

+ 4 - 46
lib/Queue.php

@@ -6,62 +6,20 @@ use Dever;
 
 class Queue
 {
-	public function __construct($method)
+	public function __construct($method = false)
 	{
-		if (!Dever::config('base')->queue) {
+		if ($method) {
 			Dever::config('base')->queue = array('method' => $method);
-		} else {
-			Dever::config('base')->queue['method'] = $method;
 		}
-		
 		Dever::import('queue');
 	}
 
-	public function push($url, $config, $num = 0)
+	public function push($project)
 	{
-		if (strpos($url, '{') !== false && strpos($url, '}') !== false) {
-			$this->preg($url, $num, $config);
-		} else {
-			$this->push_db($url, $config);
-		}
-		return true;
+		Dever::push($project);
 	}
 
 	public function pop()
-	{
-		return $this->pop_db();
-	}
-
-	private function preg($value, $num, $config)
-	{
-		$pat = '/{(.*?)}/i';
-		preg_match_all($pat, $value, $match);
-		if (isset($match[1][0]) && $match[1][0]) {
-			if ($num <= 0) $num = 1000;
-			parse_str($match[1][0], $param);
-			$this->page($param, $match[0][0], $value, $num, $config);
-		}
-	}
-
-	private function page($param, $replace, $value, $num, $config)
-	{
-		if (isset($param['page'])) {
-			for ($i = $param['page']; $i <= $num; $i++) {
-				$url = str_replace($replace, $i, $value);
-				$this->push($url, $config);
-			}
-		} else {
-			$this->push(str_replace($replace, '', $value), $config);
-		}
-	}
-
-	private function push_db($value, $config)
-	{
-		$config['url'] = $value;
-		Dever::push($config);
-	}
-
-	private function pop_db()
 	{
 		return Dever::pop();
 	}

+ 7 - 8
lib/doc/Dom.php

@@ -20,7 +20,7 @@ class Dom
 		return self::findAttr($dom, $attr);
 	}
 
-	public function each($rule)
+	public static function each($rule)
 	{
 		$attr = '';
 		if (strpos($rule, '.each().') !== false) {
@@ -31,7 +31,7 @@ class Dom
 		return array($rule, $attr);
 	}
 
-	public function findAttr($dom, $attr)
+	public static function findAttr($dom, $attr)
 	{
 		if (!$attr) {
 			return $dom;
@@ -43,14 +43,13 @@ class Dom
 		return json_encode($data, JSON_UNESCAPED_UNICODE);
 	}
 
-	public static function rule($doc, $dom, $col, $rule, $key)
+	public static function get($data)
 	{
-		if (is_string($dom) && strstr($dom, 'http')) {
-			$dom = $doc->doc($dom);
-		} else {
-			$dom = pq($dom);
-		}
+		return pq($data);
+	}
 
+	public static function rule($doc, $dom, $col, $rule, $key)
+	{
 		$result = Core::rule($doc, $dom, $col, $rule, $key);
 		return $result;
 	}

+ 5 - 0
lib/doc/Json.php

@@ -9,6 +9,11 @@ class Json
 		return json_decode($html, true);
 	}
 
+	public static function get($data)
+	{
+		return $data;
+	}
+
 	public static function find($data, $rule)
 	{
 		$rule = str_replace('$json', '$data', $rule);

+ 1 - 1
package.json

@@ -1,3 +1,3 @@
 {
-	"rely": "queue,process"
+	"rely": "queue,process,task"
 }

+ 0 - 30
src/Cate.php

@@ -1,30 +0,0 @@
-<?php
-
-namespace Spider\Src;
-
-use Dever;
-
-class Cate
-{
-	/**
-	 * 获取栏目列表
-	 *
-	 * @return mixed
-	 */
-	public function get()
-	{
-		$data = Dever::load('spider/cate-main');
-
-		if ($data) {
-			$child = Dever::load('spider/cate-child');
-
-			foreach ($data as $k => $v) {
-				if (isset($child[$k])) {
-					$data[$k]['child'] = $child[$k];
-				}
-			}
-		}
-
-		return $data;
-	}
-}

+ 9 - 1
src/Data.php

@@ -10,7 +10,15 @@ class Data
 	public function value($id)
 	{
 		$data = Dever::load('spider/data-one', $id);
-		$data['value'] = Dever::table(json_decode($data['value'], true));
+		$data = json_decode($data['value'], true);
+		$result = array();
+		foreach ($data as $k => $v) {
+			$col = Dever::db('spider/col')->one(array('key' => $k));
+			if ($col) {
+				$result[$col['name'] . '('.$k.')'] = $v;
+			}
+		}
+		$data['value'] = Dever::table($result);
 
 		return $data['value'];
 	}