| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426 | <?php# 定义几个常用的选项$local = array(	1 => '关闭',	2 => '开启',);$status = array(	1 => '未开始',	2 => '已完成',	3 => '队列中',	4 => '运行中',);$request_type = array(	1 => 'get',	2 => 'post',);$content_type = array(	1 => '普通表单',	2 => 'JSON格式',);$project = function(){	$array = array	(		-1 => array('id' => '-1', 'name' => '父级项目'),	);	$info = Dever::load('spider/project-main');	if ($info) {		$array += $info;	}	return $array;};$cate = function(){	$info = Dever::load('spider/cate-state');	return $info;};$info = Dever::load('manage/project.get');$path = $info['spider']['path'];if(isset($info['spider']['setup'])){	$path = $info['spider']['setup'];}return array(	# 表名	'name' => 'project',	# 显示给用户看的名称	'lang' => '采集规则',	'status' => $status,	'path' => $path,	# 后台菜单排序	'order' => 20,	'desc' => Dever::markdown('**启动守护进程的方法:**	常规任务:请将 ```* * * * * root php '.$path.'daemon/main.php``` 放到crontab中[建议每分钟执行一次]'),	# 数据结构	'struct' => array	(			'id' 		=> array		(			'type' 		=> 'int-11',			'name' 		=> '项目ID',			'default' 	=> '',			'desc' 		=> '',			'match' 	=> 'is_numeric',			'search'	=> 'order',			//'list'		=> true,			'order'		=> 'desc',		),		'cate_id'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '采集源',			'default' 	=> '1',			'desc' 		=> '采集源',			'match' 	=> 'is_numeric',			'update'	=> 'select',			'search'	=> 'select',			'option' 	=> $cate,			'list'		=> true,		),				'name'		=> array		(			'type' 		=> 'varchar-100',			'name' 		=> '名称',			'default' 	=> '',			'desc' 		=> '请输入项目名称',			'match' 	=> 'is_string',			'update'	=> 'text',			'search'	=> 'order,fulltext',			'list'		=> true,			'edit'		=> true,		),		'project_id'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '上级项目',			//'default' 	=> $id,			'desc' 		=> '请选择上级分类',			'match' 	=> 'is_numeric',			//'update'	=> 'select',			//'search'	=> 'select',			'option' 	=> $project,		),				'site'		=> array		(			'type' 		=> 'text-255',			'name' 		=> '采集网址-第一行填写首页链接,第二行填写后续的分页部分,分页写成{page=1},分类写成{cate=1}',			'default' 	=> '',			'desc' 		=> '采集网址',			'match' 	=> 'is_string',			'update'	=> 'textarea',			//'list'		=> true,			//'edit'		=> 'textarea',		),		'request_type'		=> array		(			'type' 		=> 'tinyint-1',			'name' 		=> '请求方式',			'default' 	=> '1',			'desc' 		=> '请求方式',			'match' 	=> 'is_numeric',			'option' 	=> $request_type,			'list'		=> true,			'update'	=> 'radio',			//'edit'		=> true,		),		'content_type'		=> array		(			'type' 		=> 'tinyint-1',			'name' 		=> '请求媒体类型',			'default' 	=> '1',			'desc' 		=> '请求媒体类型',			'match' 	=> 'is_numeric',			'option' 	=> $content_type,			'list'		=> true,			'update'	=> 'radio',			//'edit'		=> true,		),		'collect_list_rule'		=> array		(			'type' 		=> 'varchar-500',			'name' 		=> '采集列表规则-采集列表规则,仅支持dom解析,采集字段如果在列表页中,需要定义好该规则,json格式无需定义本规则',			'default' 	=> '',			'desc' 		=> '采集规则',			'match' 	=> 'option',			'update'	=> 'textarea',			//'edit'		=> true,			//'list'		=> true,		),		'collect_rule'		=> array		(			'type' 		=> 'varchar-500',			'name' 		=> '采集详情链接-采集详情页面链接规则,为空则进行单页采集,直接获取整个页面的内容,支持dom解析、json格式,dom解析$(".info .title a").each().attr("href"),json格式$json["data"]',			'default' 	=> '',			'desc' 		=> '采集规则',			'match' 	=> 'option',			'update'	=> 'textarea',			//'edit'		=> true,			//'list'		=> true,		),		'page_num'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '采集页数-值为0则默认采集100页',			'default' 	=> '0',			'desc' 		=> '采集页数',			'match' 	=> 'option',			'update'	=> 'text',		),		'status'		=> array		(			'type' 		=> 'tinyint-1',			'name' 		=> '状态',			'default' 	=> '1',			'desc' 		=> '状态',			'match' 	=> 'is_numeric',			'option' 	=> $status,			'list'		=> 'Dever::load("spider/lib/project.status", {id})',			'modal'		=> '查看详情',			'update'	=> 'radio',			//'edit'		=> true,		),		'num'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '采集次数',			'default' 	=> '0',			'desc' 		=> '采集次数',			'match' 	=> 'is_numeric',		),		'cur_page'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '当前采集页数',			'default' 	=> '0',			'desc' 		=> '当前采集页数',			'match' 	=> 'is_numeric',		),		'sdate'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '下次采集时间',			'default' 	=> '0',			'desc' 		=> '下次采集时间',			'match' 	=> 'is_numeric',			'callback'	=> 'maketime',		),		'interval'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '采集间隔秒数-填写开始时间之后的间隔采集的秒数,为0则只采集一次',			'default' 	=> '0',			'desc' 		=> '采集间隔秒数',			'match' 	=> 'is_numeric',			'update'	=> 'text',		),		'header'		=> array		(			'type' 		=> 'text-255',			'name' 		=> 'Header参数-换行为多个参数,格式cookie: 11',			'default' 	=> '',			'desc' 		=> 'Header参数',			'match' 	=> 'option',			'update'	=> 'textarea',			//'edit'		=> true,			//'list'		=> true,		),		'param'		=> array		(			'type' 		=> 'varchar-8000',			'name' 		=> '其他参数-一般为post传入的参数,必须是json格式',			'default' 	=> '',			'desc' 		=> '其他参数',			'match' 	=> 'option',			'update'	=> 'textarea',			//'edit'		=> true,			//'list'		=> true,		),		'config'		=> array		(			'type' 		=> 'varchar-2000',			'name' 		=> '基础配置-用于与数据推送的项目约定的配置信息,可以是json格式,也可是字符串',			'default' 	=> '',			'desc' 		=> '基础配置',			'match' 	=> 'option',			'update'	=> 'textarea',			//'edit'		=> true,			//'list'		=> true,		),		'push'		=> array		(			'type' 		=> 'varchar-2000',			'name' 		=> '数据推送-采集数据时,会自动往设置好的数据推送接口推送数据,多个用换行隔开,支持http协议和dever协议',			'default' 	=> '',			'desc' 		=> '数据推送',			'match' 	=> 'option',			'update'	=> 'textarea',			//'edit'		=> true,			//'list'		=> true,		),		'reorder'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '排序(数值越大越靠前)',			'default' 	=> '1',			'desc' 		=> '请输入排序',			'match' 	=> 'option',			'update'	=> 'text',			'search'	=> 'order',			'list_name' => '排序',			'list'		=> true,			'order'		=> 'desc',			'edit'		=> true,		),		'state'		=> array		(			'type' 		=> 'tinyint-1',			'name' 		=> '状态',			'default' 	=> '1',			'desc' 		=> '请选择状态',			'match' 	=> 'is_numeric',		),				'cdate'		=> array		(			'type' 		=> 'int-11',			'name' 		=> '更新时间',			'match' 	=> array('is_numeric', time()),			'desc' 		=> '',			# 只有insert时才生效			//'insert'	=> true,			//'list'		=> 'date("Y-m-d H:i:s", {cdate})',		),	),	# 更新表结构	'alter' => array	(		2 => array		(			array('update', 'header', 'header', 'text-255  header'),			//array('add', 'config', 'config', 'int-11 1 配置'),		),		//'version' => 2,	),	'manage' => array	(		//'delete' => false,		# 更新数据时,要显示的按钮,这里填写js脚本事件即可。保存当前数据可为固定参数:"save-data",复制数据为:copy-data		'update_button' => array		(			'提交保存' => 'save-data',			'放弃保存' => "msg({status:1,msg:'yes'})",			//'复制数据' => 'copy-data',		),		# 不允许编辑		//'edit' => false,				# 列表页的类型		//'list_type' => 'parent',				# 可以删除		'list_button' => array		(			'update' => array('编辑', '"project&option_pid={project_id}"'),			'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"'),			'delete' => '删除',			'br1' => array('<br /><br />'),			'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),			'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),						'br2' => array('<br /><br />'),			'new' => array('测试采集', Dever::url('lib/api.test', 'spider')),			'oper1' => array('开始采集', '"spider/lib/api.add?id={id}"', '{status} <= 2'),		),	),	# request 请求接口定义	'request' => array	(			# main 取所有主分类		'main' => array		(			'where' => array			(				'project_id' => -1,				'state' => 1,			),			'type' => 'all',			'order' => array			(				'reorder' => 'desc',				'id' => 'desc',			),			'col' => '*|id',		),		# 获取所有已完成,并且有时间间隔的数据,重新入队		'getAll' => array		(			'option' => array			(				'id' => 'yes',				'status' => 2,				'interval' => array('yes', '>='),				'sdate' => array('yes-sdate', '<='),				'state' => 1,			),			'type' => 'all',			'order' => array('id' => 'desc'),			'col' => '*',		),		'getOne' => array		(			'option' => array			(				'id' => 'yes',				'status' => array('yes'),				'sdate' => array('yes-sdate', '<='),				'state' => 1,			),			'type' => 'one',		),	));
 |