dever 3 年之前
父節點
當前提交
09ee10eafb
共有 5 個文件被更改,包括 191 次插入40 次删除
  1. 114 0
      database/cate.php
  2. 2 2
      database/col.php
  3. 52 25
      database/project.php
  4. 3 2
      lib/Parse.php
  5. 20 11
      lib/doc/Core.php

+ 114 - 0
database/cate.php

@@ -0,0 +1,114 @@
+<?php
+
+return array
+(
+    # 表名
+    'name' => 'cate',
+    # 显示给用户看的名称
+    'lang' => '采集源设置',
+    # 后台菜单排序
+    'order' => 9,
+    # 数据结构
+    'struct' => array
+    (
+        'id'        => array
+        (
+            'type'      => 'int-11',
+            'name'      => 'ID',
+            'default'   => '',
+            'desc'      => '',
+            'match'     => 'is_numeric',
+            'search'    => 'order',
+            'list'      => true,
+            'order'     => 'desc',
+        ),
+        
+        'name'      => array
+        (
+            'type'      => 'varchar-32',
+            'name'      => '分类名称',
+            'default'   => '',
+            'desc'      => '请输入名称',
+            'match'     => 'is_string',
+            'update'    => 'text',
+            'search'    => 'fulltext',
+            'list'      => true,
+        ),
+        
+        'reorder'       => array
+        (
+            'type'      => 'int-11',
+            'name'      => '排序(数值越大越靠前)',
+            'default'   => '1',
+            'desc'      => '请输入排序',
+            'match'     => 'option',
+            'update'    => 'text',
+            'search'    => 'order',
+            'list'      => true,
+            'order'     => 'desc',
+            'edit'      => true,
+        ),
+
+        'state'     => array
+        (
+            'type'      => 'tinyint-1',
+            'name'      => '状态',
+            'default'   => '1',
+            'desc'      => '请选择状态',
+            'match'     => 'is_numeric',
+        ),
+        
+        'cdate'     => array
+        (
+            'type'      => 'int-11',
+            'name'      => '录入时间',
+            'match'     => array('is_numeric', time()),
+            'desc'      => '',
+            # 只有insert时才生效
+            'insert'    => true,
+            'list'      => 'date("Y-m-d H:i:s", {cdate})',
+        ),
+    ),
+
+    # 默认值
+    'default' => array
+    (
+        'col' => 'name,reorder,cdate',
+        'value' => array
+        (
+            '"默认分类",100,' . time(),
+        ),
+    ),
+
+    'manage' => array
+    (
+        'insert' => false,
+        'edit' => false,
+
+        # 自定义快捷新增和编辑
+        'button' => array
+        (
+            '新增' => array('fast'),
+        ),
+        # 快捷更新
+        'list_button' => array
+        (
+            'edit' => array('编辑'),
+        ),
+    ),
+
+    'request' => array
+    (
+        'getAll' => array
+        (
+            # 匹配的正则或函数 选填项
+            'option' => array
+            (
+                'state' => 1,
+            ),
+            'type' => 'all',
+            'order' => array('reorder' => 'desc','id' => 'desc'),
+            'col' => '*',
+        ),
+    ),
+);

+ 2 - 2
database/col.php

@@ -94,7 +94,7 @@ return array
 		(
 			'type' 		=> 'tinyint-1',
 			'name' 		=> '资源是否本地化-资源包括图片、视频等,本地化后将转成本地地址',
-			'default' 	=> '1',
+			'default' 	=> '2',
 			'desc' 		=> '资源是否本地化',
 			'match' 	=> 'is_numeric',
 			'option' 	=> $local,
@@ -116,7 +116,7 @@ return array
 		'collect_rule'		=> array
 		(
 			'type' 		=> 'varchar-500',
-			'name' 		=> '采集规则-支持dom解析、正则,如$(".info .title a").html(),或者直接输入正则表达式',
+			'name' 		=> '采集规则-支持dom解析、JSON、正则,dom解析如$(".info .title a").html(),JSON如data.url,正则就是正则表达式,如果想直接用其他字段,直接输入其他字段的唯一标识符即可,输入第二行可以解析其他字段',
 			'default' 	=> '',
 			'desc' 		=> '采集规则',
 			'match' 	=> 'is_string',

+ 52 - 25
database/project.php

@@ -41,6 +41,12 @@ $project = function()
 	return $array;
 };
 
+$cate = function()
+{
+	$info = Dever::load('spider/cate-state');
+	return $info;
+};
+
 $info = Dever::load('manage/project.get');
 
 $path = $info['spider']['path'];
@@ -50,14 +56,12 @@ if(isset($info['spider']['setup']))
 	$path = $info['spider']['setup'];
 }
 
-$id = Dever::input('option_pid', -1);
-
 return array
 (
 	# 表名
 	'name' => 'project',
 	# 显示给用户看的名称
-	'lang' => '项目管理',
+	'lang' => '项目配置',
 	'status' => $status,
 	'path' => $path,
 	# 后台菜单排序
@@ -79,6 +83,19 @@ return array
 			//'list'		=> true,
 			'order'		=> 'desc',
 		),
+
+		'cate_id'		=> array
+		(
+			'type' 		=> 'int-11',
+			'name' 		=> '采集源',
+			'default' 	=> '1',
+			'desc' 		=> '采集源',
+			'match' 	=> 'is_numeric',
+			'update'	=> 'select',
+			'search'	=> 'select',
+			'option' 	=> $cate,
+			'list'		=> true,
+		),
 		
 		'name'		=> array
 		(
@@ -97,11 +114,11 @@ return array
 		(
 			'type' 		=> 'int-11',
 			'name' 		=> '上级项目',
-			'default' 	=> $id,
+			//'default' 	=> $id,
 			'desc' 		=> '请选择上级分类',
 			'match' 	=> 'is_numeric',
-			'update'	=> 'select',
-			'search'	=> 'select',
+			//'update'	=> 'select',
+			//'search'	=> 'select',
 			'option' 	=> $project,
 		),
 		
@@ -112,7 +129,7 @@ return array
 			'default' 	=> '',
 			'desc' 		=> '采集网址',
 			'match' 	=> 'is_string',
-			'update'	=> $id < 0 ? 'hidden': 'textarea',
+			'update'	=> 'textarea',
 			//'list'		=> true,
 			//'edit'		=> 'textarea',
 		),
@@ -126,7 +143,7 @@ return array
 			'match' 	=> 'is_numeric',
 			'option' 	=> $request_type,
 			'list'		=> true,
-			'update'	=> $id < 0 ? 'hidden': 'radio',
+			'update'	=> 'radio',
 			//'edit'		=> true,
 		),
 
@@ -139,7 +156,7 @@ return array
 			'match' 	=> 'is_numeric',
 			'option' 	=> $content_type,
 			'list'		=> true,
-			'update'	=> $id < 0 ? 'hidden': 'radio',
+			'update'	=> 'radio',
 			//'edit'		=> true,
 		),
 
@@ -150,7 +167,7 @@ return array
 			'default' 	=> '',
 			'desc' 		=> '采集规则',
 			'match' 	=> 'option',
-			'update'	=> $id < 0 ? 'hidden': 'textarea',
+			'update'	=> 'textarea',
 			//'edit'		=> true,
 			//'list'		=> true,
 		),
@@ -162,7 +179,7 @@ return array
 			'default' 	=> '0',
 			'desc' 		=> '采集页数',
 			'match' 	=> 'option',
-			'update'	=> $id < 0 ? 'hidden': 'text',
+			'update'	=> 'text',
 		),
 
 		'status'		=> array
@@ -174,7 +191,8 @@ return array
 			'match' 	=> 'is_numeric',
 			'option' 	=> $status,
 			'list'		=> 'Dever::load("spider/lib/project.status", {id})',
-			'update'	=> $id < 0 ? 'hidden': 'radio',
+			'modal'		=> '查看详情',
+			'update'	=> 'radio',
 			//'edit'		=> true,
 		),
 
@@ -203,7 +221,6 @@ return array
 			'default' 	=> '0',
 			'desc' 		=> '下次采集时间',
 			'match' 	=> 'is_numeric',
-			//'update'	=> $id < 0 ? 'hidden' : 'date',
 			'callback'	=> 'maketime',
 		),
 
@@ -214,17 +231,17 @@ return array
 			'default' 	=> '0',
 			'desc' 		=> '采集间隔秒数',
 			'match' 	=> 'is_numeric',
-			'update'	=> $id < 0 ? 'hidden' : 'text',
+			'update'	=> 'text',
 		),
 
 		'header'		=> array
 		(
-			'type' 		=> 'varchar-8000',
+			'type' 		=> 'text-255',
 			'name' 		=> 'Header参数-换行为多个参数,格式cookie: 11',
 			'default' 	=> '',
 			'desc' 		=> 'Header参数',
 			'match' 	=> 'option',
-			'update'	=> $id < 0 ? 'hidden': 'textarea',
+			'update'	=> 'textarea',
 			//'edit'		=> true,
 			//'list'		=> true,
 		),
@@ -236,7 +253,7 @@ return array
 			'default' 	=> '',
 			'desc' 		=> '其他参数',
 			'match' 	=> 'option',
-			'update'	=> $id < 0 ? 'hidden': 'textarea',
+			'update'	=> 'textarea',
 			//'edit'		=> true,
 			//'list'		=> true,
 		),
@@ -248,7 +265,7 @@ return array
 			'default' 	=> '',
 			'desc' 		=> '数据推送',
 			'match' 	=> 'option',
-			'update'	=> $id < 0 ? 'hidden': 'textarea',
+			'update'	=> 'textarea',
 			//'edit'		=> true,
 			//'list'		=> true,
 		),
@@ -289,6 +306,17 @@ return array
 		),
 	),
 
+	# 更新表结构
+	'alter' => array
+	(
+		2 => array
+		(
+			array('update', 'header', 'header', 'text-255  header'),
+			//array('add', 'config', 'config', 'int-11 1 配置'),
+		),
+		'version' => 2,
+	),
+
 	'manage' => array
 	(
 		//'delete' => false,
@@ -304,24 +332,23 @@ return array
 		//'edit' => false,
 		
 		# 列表页的类型
-		'list_type' => 'parent',
+		//'list_type' => 'parent',
 		
 		# 可以删除
 		'list_button' => array
 		(
 			'update' => array('编辑', '"project&option_pid={project_id}"'),
-			'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"', '{project_id} > 0'),
+			'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"'),
 			'delete' => '删除',
 			'br1' => array('<br /><br />'),
-			'add' => array('新增子项目', '"project&option_pid={id}&oper_parent=project&oper_save_jump=project"', '{project_id} == -1'),
 
-			'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{project_id} > 0 && {status} <= 2'),
+			'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),
 
-			'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{project_id} > 0 && {status} <= 2'),
+			'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),
 			
 			'br2' => array('<br /><br />'),
-			'new' => array('测试采集', 'Dever::url("spider/lib/api.test?id={id}")', '{project_id} > 0'),
-			'oper1' => array('开始采集', 'Dever::url("spider/lib/api.add?id={id}")', '{project_id} > 0 && {status} <= 2'),
+			'new' => array('测试采集', 'Dever::url("spider/lib/api.test?id={id}")'),
+			'oper1' => array('开始采集', 'Dever::url("spider/lib/api.add?id={id}")', '{status} <= 2'),
 		),
 	),
 

+ 3 - 2
lib/Parse.php

@@ -25,10 +25,11 @@ class Parse
 			if ($data) {
 				if (is_array($data)) {
 					$domain = parse_url($url);
-					$host = $domain['scheme'] . '://' . $domain['host'];
+					$host = $domain['scheme'] . '://' . $domain['host'] . '/';
+
 					foreach ($data as $k => $v) {
 						if (is_string($v) && !strstr($v, 'http')) {
-							$v = $host . $v;
+							$v = $host . ltrim($v, '/');
 						}
 						$this->data[$k] = $this->load($doc, $k, $v, $col, $set, $push, $project);
 					}

+ 20 - 11
lib/doc/Core.php

@@ -22,18 +22,27 @@ class Core
 	{
 		$rule = explode("\n", str_replace("\r", '', $rule));
 		if (isset($rule[0]) && $rule[0]) {
-			if (is_string($data)) {
-				$data = json_decode($data, true);
-			}
-			if (is_object($data)) {
-				$result = Dom::find($data, $rule[0]);
-			} elseif (is_array($data)) {
-				$temp = explode('.', $rule[0]);
+			if (isset($col[$rule[0]]) && $rule[0] != $key) {
+				if (isset($rule[1]) && $rule[1]) {
+					$result = Doc::getInstance($doc->getUrl($data, $col, $col[$rule[0]]), $rule[1])->get();
+					array_shift($rule);
+				} else {
+					$result = self::rule($doc, $data, $col, $col[$rule[0]]['collect_rule'], $key);
+				}
+			} else {
+				if (is_string($data)) {
+					$data = json_decode($data, true);
+				}
+				if (is_object($data)) {
+					$result = Dom::find($data, $rule[0]);
+				} elseif (is_array($data)) {
+					$temp = explode('.', $rule[0]);
 
-				$result = $data;
-				foreach ($temp as $k => $v) {
-					if (isset($result[$v])) {
-						$result = $result[$v];
+					$result = $data;
+					foreach ($temp as $k => $v) {
+						if (isset($result[$v])) {
+							$result = $result[$v];
+						}
 					}
 				}
 			}