| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375 | 
							- <?php
 
- # 定义几个常用的选项
 
- $local = array
 
- (
 
- 	1 => '关闭',
 
- 	2 => '开启',
 
- );
 
- $status = array
 
- (
 
- 	1 => '未开始',
 
- 	2 => '已完成',
 
- 	3 => '队列中',
 
- 	4 => '运行中',
 
- );
 
- $request_type = array
 
- (
 
- 	1 => 'get',
 
- 	2 => 'post',
 
- );
 
- $content_type = array
 
- (
 
- 	1 => '普通表单',
 
- 	2 => 'JSON格式',
 
- );
 
- $project = function()
 
- {
 
- 	$array = array
 
- 	(
 
- 		-1 => array('id' => '-1', 'name' => '父级项目'),
 
- 	);
 
- 	$info = Dever::load('spider/project-main');
 
- 	if ($info) {
 
- 		$array += $info;
 
- 	}
 
- 	return $array;
 
- };
 
- $info = Dever::load('manage/project.get');
 
- $path = $info['spider']['path'];
 
- if(isset($info['spider']['setup']))
 
- {
 
- 	$path = $info['spider']['setup'];
 
- }
 
- $id = Dever::input('option_pid', -1);
 
- return array
 
- (
 
- 	# 表名
 
- 	'name' => 'project',
 
- 	# 显示给用户看的名称
 
- 	'lang' => '项目管理',
 
- 	'status' => $status,
 
- 	'path' => $path,
 
- 	# 后台菜单排序
 
- 	'order' => 20,
 
- 	'desc' => Dever::markdown('**启动守护进程的方法:**
 
- 	常规任务:请将 ```* * * * * root php '.$path.'daemon/main.php``` 放到crontab中[建议每分钟执行一次]'),
 
- 	# 数据结构
 
- 	'struct' => array
 
- 	(
 
- 	
 
- 		'id' 		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '项目ID',
 
- 			'default' 	=> '',
 
- 			'desc' 		=> '',
 
- 			'match' 	=> 'is_numeric',
 
- 			'search'	=> 'order',
 
- 			//'list'		=> true,
 
- 			'order'		=> 'desc',
 
- 		),
 
- 		
 
- 		'name'		=> array
 
- 		(
 
- 			'type' 		=> 'varchar-100',
 
- 			'name' 		=> '项目名称',
 
- 			'default' 	=> '',
 
- 			'desc' 		=> '请输入项目名称',
 
- 			'match' 	=> 'is_string',
 
- 			'update'	=> 'text',
 
- 			'search'	=> 'order,fulltext',
 
- 			'list'		=> true,
 
- 			'edit'		=> true,
 
- 		),
 
- 		'project_id'		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '上级项目',
 
- 			'default' 	=> $id,
 
- 			'desc' 		=> '请选择上级分类',
 
- 			'match' 	=> 'is_numeric',
 
- 			'update'	=> 'select',
 
- 			'search'	=> 'select',
 
- 			'option' 	=> $project,
 
- 		),
 
- 		
 
- 		'site'		=> array
 
- 		(
 
- 			'type' 		=> 'text-255',
 
- 			'name' 		=> '采集网址-如有分页,请写成这样{page=1}',
 
- 			'default' 	=> '',
 
- 			'desc' 		=> '采集网址',
 
- 			'match' 	=> 'is_string',
 
- 			'update'	=> $id < 0 ? 'hidden': 'textarea',
 
- 			//'list'		=> true,
 
- 			//'edit'		=> 'textarea',
 
- 		),
 
- 		'request_type'		=> array
 
- 		(
 
- 			'type' 		=> 'tinyint-1',
 
- 			'name' 		=> '请求方式',
 
- 			'default' 	=> '1',
 
- 			'desc' 		=> '请求方式',
 
- 			'match' 	=> 'is_numeric',
 
- 			'option' 	=> $request_type,
 
- 			'list'		=> true,
 
- 			'update'	=> $id < 0 ? 'hidden': 'radio',
 
- 			//'edit'		=> true,
 
- 		),
 
- 		'content_type'		=> array
 
- 		(
 
- 			'type' 		=> 'tinyint-1',
 
- 			'name' 		=> '请求媒体类型',
 
- 			'default' 	=> '1',
 
- 			'desc' 		=> '请求媒体类型',
 
- 			'match' 	=> 'is_numeric',
 
- 			'option' 	=> $content_type,
 
- 			'list'		=> true,
 
- 			'update'	=> $id < 0 ? 'hidden': 'radio',
 
- 			//'edit'		=> true,
 
- 		),
 
- 		'collect_rule'		=> array
 
- 		(
 
- 			'type' 		=> 'varchar-500',
 
- 			'name' 		=> '采集规则-为空则获取整个页面的内容,支持dom解析、json格式,dom解析$(".info .title a").each().attr("href"),json格式$json[\'data\']',
 
- 			'default' 	=> '',
 
- 			'desc' 		=> '采集规则',
 
- 			'match' 	=> 'option',
 
- 			'update'	=> $id < 0 ? 'hidden': 'textarea',
 
- 			//'edit'		=> true,
 
- 			//'list'		=> true,
 
- 		),
 
- 		'page_num'		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '采集页数-值为0则默认采集100页',
 
- 			'default' 	=> '0',
 
- 			'desc' 		=> '采集页数',
 
- 			'match' 	=> 'option',
 
- 			'update'	=> $id < 0 ? 'hidden': 'text',
 
- 		),
 
- 		'status'		=> array
 
- 		(
 
- 			'type' 		=> 'tinyint-1',
 
- 			'name' 		=> '状态',
 
- 			'default' 	=> '1',
 
- 			'desc' 		=> '状态',
 
- 			'match' 	=> 'is_numeric',
 
- 			'option' 	=> $status,
 
- 			'list'		=> 'Dever::load("spider/lib/project.status", {id})',
 
- 			'update'	=> $id < 0 ? 'hidden': 'radio',
 
- 			//'edit'		=> true,
 
- 		),
 
- 		'num'		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '采集次数',
 
- 			'default' 	=> '0',
 
- 			'desc' 		=> '采集次数',
 
- 			'match' 	=> 'is_numeric',
 
- 		),
 
- 		'cur_page'		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '当前采集页数',
 
- 			'default' 	=> '0',
 
- 			'desc' 		=> '当前采集页数',
 
- 			'match' 	=> 'is_numeric',
 
- 		),
 
- 		'sdate'		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '下次采集时间',
 
- 			'default' 	=> '0',
 
- 			'desc' 		=> '下次采集时间',
 
- 			'match' 	=> 'is_numeric',
 
- 			//'update'	=> $id < 0 ? 'hidden' : 'date',
 
- 			'callback'	=> 'maketime',
 
- 		),
 
- 		'interval'		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '采集间隔秒数-填写开始时间之后的间隔采集的秒数,为0则只采集一次',
 
- 			'default' 	=> '0',
 
- 			'desc' 		=> '采集间隔秒数',
 
- 			'match' 	=> 'is_numeric',
 
- 			'update'	=> $id < 0 ? 'hidden' : 'text',
 
- 		),
 
- 		'header'		=> array
 
- 		(
 
- 			'type' 		=> 'varchar-8000',
 
- 			'name' 		=> 'Header参数-换行为多个参数,格式cookie: 11',
 
- 			'default' 	=> '',
 
- 			'desc' 		=> 'Header参数',
 
- 			'match' 	=> 'option',
 
- 			'update'	=> $id < 0 ? 'hidden': 'textarea',
 
- 			//'edit'		=> true,
 
- 			//'list'		=> true,
 
- 		),
 
- 		'param'		=> array
 
- 		(
 
- 			'type' 		=> 'varchar-8000',
 
- 			'name' 		=> '其他参数-一般为post传入的参数,必须是json格式',
 
- 			'default' 	=> '',
 
- 			'desc' 		=> '其他参数',
 
- 			'match' 	=> 'option',
 
- 			'update'	=> $id < 0 ? 'hidden': 'textarea',
 
- 			//'edit'		=> true,
 
- 			//'list'		=> true,
 
- 		),
 
- 		'push'		=> array
 
- 		(
 
- 			'type' 		=> 'varchar-2000',
 
- 			'name' 		=> '数据推送-采集数据时,会自动往设置好的数据推送接口推送数据,多个用换行隔开,支持http协议和dever协议',
 
- 			'default' 	=> '',
 
- 			'desc' 		=> '数据推送',
 
- 			'match' 	=> 'option',
 
- 			'update'	=> $id < 0 ? 'hidden': 'textarea',
 
- 			//'edit'		=> true,
 
- 			//'list'		=> true,
 
- 		),
 
- 		'reorder'		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '排序(数值越大越靠前)',
 
- 			'default' 	=> '1',
 
- 			'desc' 		=> '请输入排序',
 
- 			'match' 	=> 'option',
 
- 			'update'	=> 'text',
 
- 			'search'	=> 'order',
 
- 			'list_name' => '排序',
 
- 			'list'		=> true,
 
- 			'order'		=> 'desc',
 
- 			'edit'		=> true,
 
- 		),
 
- 		'state'		=> array
 
- 		(
 
- 			'type' 		=> 'tinyint-1',
 
- 			'name' 		=> '状态',
 
- 			'default' 	=> '1',
 
- 			'desc' 		=> '请选择状态',
 
- 			'match' 	=> 'is_numeric',
 
- 		),
 
- 		
 
- 		'cdate'		=> array
 
- 		(
 
- 			'type' 		=> 'int-11',
 
- 			'name' 		=> '更新时间',
 
- 			'match' 	=> array('is_numeric', time()),
 
- 			'desc' 		=> '',
 
- 			# 只有insert时才生效
 
- 			//'insert'	=> true,
 
- 			//'list'		=> 'date("Y-m-d H:i:s", {cdate})',
 
- 		),
 
- 	),
 
- 	'manage' => array
 
- 	(
 
- 		//'delete' => false,
 
- 		# 更新数据时,要显示的按钮,这里填写js脚本事件即可。保存当前数据可为固定参数:"save-data",复制数据为:copy-data
 
- 		'update_button' => array
 
- 		(
 
- 			'提交保存' => 'save-data',
 
- 			'放弃保存' => "msg({status:1,msg:'yes'})",
 
- 			//'复制数据' => 'copy-data',
 
- 		),
 
- 		# 不允许编辑
 
- 		//'edit' => false,
 
- 		
 
- 		# 列表页的类型
 
- 		'list_type' => 'parent',
 
- 		
 
- 		# 可以删除
 
- 		'list_button' => array
 
- 		(
 
- 			'update' => array('编辑', '"project&option_pid={project_id}"'),
 
- 			'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"', '{project_id} > 0'),
 
- 			'delete' => '删除',
 
- 			'br1' => array('<br /><br />'),
 
- 			'add' => array('新增子项目', '"project&option_pid={id}&oper_parent=project&oper_save_jump=project"', '{project_id} == -1'),
 
- 			'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{project_id} > 0 && {status} <= 2'),
 
- 			'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{project_id} > 0 && {status} <= 2'),
 
- 			
 
- 			'br2' => array('<br /><br />'),
 
- 			'new' => array('测试采集', 'Dever::url("spider/lib/api.test?id={id}")', '{project_id} > 0'),
 
- 			'oper1' => array('开始采集', 'Dever::url("spider/lib/api.add?id={id}")', '{project_id} > 0 && {status} <= 2'),
 
- 		),
 
- 	),
 
- 	# request 请求接口定义
 
- 	'request' => array
 
- 	(	
 
- 		# main 取所有主分类
 
- 		'main' => array
 
- 		(
 
- 			'where' => array
 
- 			(
 
- 				'project_id' => -1,
 
- 				'state' => 1,
 
- 			),
 
- 			'type' => 'all',
 
- 			'order' => array
 
- 			(
 
- 				'reorder' => 'desc',
 
- 				'id' => 'desc',
 
- 			),
 
- 			'col' => '*|id',
 
- 		),
 
- 		# 获取所有已完成,并且有时间间隔的数据,重新入队
 
- 		'getAll' => array
 
- 		(
 
- 			'option' => array
 
- 			(
 
- 				'id' => 'yes',
 
- 				'status' => 2,
 
- 				'interval' => array('yes', '>='),
 
- 				'sdate' => array('yes-sdate', '<='),
 
- 				'state' => 1,
 
- 			),
 
- 			'type' => 'all',
 
- 			'order' => array('id' => 'desc'),
 
- 			'col' => '*',
 
- 		),
 
- 		'getOne' => array
 
- 		(
 
- 			'option' => array
 
- 			(
 
- 				'id' => 'yes',
 
- 				'status' => array('yes'),
 
- 				'sdate' => array('yes-sdate', '<='),
 
- 				'state' => 1,
 
- 			),
 
- 			'type' => 'one',
 
- 		),
 
- 	)
 
- );
 
 
  |