123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426 |
- <?php
- # 定义几个常用的选项
- $local = array
- (
- 1 => '关闭',
- 2 => '开启',
- );
- $status = array
- (
- 1 => '未开始',
- 2 => '已完成',
- 3 => '队列中',
- 4 => '运行中',
- );
- $request_type = array
- (
- 1 => 'get',
- 2 => 'post',
- );
- $content_type = array
- (
- 1 => '普通表单',
- 2 => 'JSON格式',
- );
- $project = function()
- {
- $array = array
- (
- -1 => array('id' => '-1', 'name' => '父级项目'),
- );
- $info = Dever::load('spider/project-main');
- if ($info) {
- $array += $info;
- }
- return $array;
- };
- $cate = function()
- {
- $info = Dever::load('spider/cate-state');
- return $info;
- };
- $info = Dever::load('manage/project.get');
- $path = $info['spider']['path'];
- if(isset($info['spider']['setup']))
- {
- $path = $info['spider']['setup'];
- }
- return array
- (
- # 表名
- 'name' => 'project',
- # 显示给用户看的名称
- 'lang' => '采集规则',
- 'status' => $status,
- 'path' => $path,
- # 后台菜单排序
- 'order' => 20,
- 'desc' => Dever::markdown('**启动守护进程的方法:**
- 常规任务:请将 ```* * * * * root php '.$path.'daemon/main.php``` 放到crontab中[建议每分钟执行一次]'),
- # 数据结构
- 'struct' => array
- (
-
- 'id' => array
- (
- 'type' => 'int-11',
- 'name' => '项目ID',
- 'default' => '',
- 'desc' => '',
- 'match' => 'is_numeric',
- 'search' => 'order',
- //'list' => true,
- 'order' => 'desc',
- ),
- 'cate_id' => array
- (
- 'type' => 'int-11',
- 'name' => '采集源',
- 'default' => '1',
- 'desc' => '采集源',
- 'match' => 'is_numeric',
- 'update' => 'select',
- 'search' => 'select',
- 'option' => $cate,
- 'list' => true,
- ),
-
- 'name' => array
- (
- 'type' => 'varchar-100',
- 'name' => '名称',
- 'default' => '',
- 'desc' => '请输入项目名称',
- 'match' => 'is_string',
- 'update' => 'text',
- 'search' => 'order,fulltext',
- 'list' => true,
- 'edit' => true,
- ),
- 'project_id' => array
- (
- 'type' => 'int-11',
- 'name' => '上级项目',
- //'default' => $id,
- 'desc' => '请选择上级分类',
- 'match' => 'is_numeric',
- //'update' => 'select',
- //'search' => 'select',
- 'option' => $project,
- ),
-
- 'site' => array
- (
- 'type' => 'text-255',
- 'name' => '采集网址-第一行填写首页链接,第二行填写后续的分页部分,分页写成{page=1},分类写成{cate=1}',
- 'default' => '',
- 'desc' => '采集网址',
- 'match' => 'is_string',
- 'update' => 'textarea',
- //'list' => true,
- //'edit' => 'textarea',
- ),
- 'request_type' => array
- (
- 'type' => 'tinyint-1',
- 'name' => '请求方式',
- 'default' => '1',
- 'desc' => '请求方式',
- 'match' => 'is_numeric',
- 'option' => $request_type,
- 'list' => true,
- 'update' => 'radio',
- //'edit' => true,
- ),
- 'content_type' => array
- (
- 'type' => 'tinyint-1',
- 'name' => '请求媒体类型',
- 'default' => '1',
- 'desc' => '请求媒体类型',
- 'match' => 'is_numeric',
- 'option' => $content_type,
- 'list' => true,
- 'update' => 'radio',
- //'edit' => true,
- ),
- 'collect_list_rule' => array
- (
- 'type' => 'varchar-500',
- 'name' => '采集列表规则-采集列表规则,仅支持dom解析,采集字段如果在列表页中,需要定义好该规则,json格式无需定义本规则',
- 'default' => '',
- 'desc' => '采集规则',
- 'match' => 'option',
- 'update' => 'textarea',
- //'edit' => true,
- //'list' => true,
- ),
- 'collect_rule' => array
- (
- 'type' => 'varchar-500',
- 'name' => '采集详情链接-采集详情页面链接规则,为空则进行单页采集,直接获取整个页面的内容,支持dom解析、json格式,dom解析$(".info .title a").each().attr("href"),json格式$json["data"]',
- 'default' => '',
- 'desc' => '采集规则',
- 'match' => 'option',
- 'update' => 'textarea',
- //'edit' => true,
- //'list' => true,
- ),
- 'page_num' => array
- (
- 'type' => 'int-11',
- 'name' => '采集页数-值为0则默认采集100页',
- 'default' => '0',
- 'desc' => '采集页数',
- 'match' => 'option',
- 'update' => 'text',
- ),
- 'status' => array
- (
- 'type' => 'tinyint-1',
- 'name' => '状态',
- 'default' => '1',
- 'desc' => '状态',
- 'match' => 'is_numeric',
- 'option' => $status,
- 'list' => 'Dever::load("spider/lib/project.status", {id})',
- 'modal' => '查看详情',
- 'update' => 'radio',
- //'edit' => true,
- ),
- 'num' => array
- (
- 'type' => 'int-11',
- 'name' => '采集次数',
- 'default' => '0',
- 'desc' => '采集次数',
- 'match' => 'is_numeric',
- ),
- 'cur_page' => array
- (
- 'type' => 'int-11',
- 'name' => '当前采集页数',
- 'default' => '0',
- 'desc' => '当前采集页数',
- 'match' => 'is_numeric',
- ),
- 'sdate' => array
- (
- 'type' => 'int-11',
- 'name' => '下次采集时间',
- 'default' => '0',
- 'desc' => '下次采集时间',
- 'match' => 'is_numeric',
- 'callback' => 'maketime',
- ),
- 'interval' => array
- (
- 'type' => 'int-11',
- 'name' => '采集间隔秒数-填写开始时间之后的间隔采集的秒数,为0则只采集一次',
- 'default' => '0',
- 'desc' => '采集间隔秒数',
- 'match' => 'is_numeric',
- 'update' => 'text',
- ),
- 'header' => array
- (
- 'type' => 'text-255',
- 'name' => 'Header参数-换行为多个参数,格式cookie: 11',
- 'default' => '',
- 'desc' => 'Header参数',
- 'match' => 'option',
- 'update' => 'textarea',
- //'edit' => true,
- //'list' => true,
- ),
- 'param' => array
- (
- 'type' => 'varchar-8000',
- 'name' => '其他参数-一般为post传入的参数,必须是json格式',
- 'default' => '',
- 'desc' => '其他参数',
- 'match' => 'option',
- 'update' => 'textarea',
- //'edit' => true,
- //'list' => true,
- ),
- 'config' => array
- (
- 'type' => 'varchar-2000',
- 'name' => '基础配置-用于与数据推送的项目约定的配置信息,可以是json格式,也可是字符串',
- 'default' => '',
- 'desc' => '基础配置',
- 'match' => 'option',
- 'update' => 'textarea',
- //'edit' => true,
- //'list' => true,
- ),
- 'push' => array
- (
- 'type' => 'varchar-2000',
- 'name' => '数据推送-采集数据时,会自动往设置好的数据推送接口推送数据,多个用换行隔开,支持http协议和dever协议',
- 'default' => '',
- 'desc' => '数据推送',
- 'match' => 'option',
- 'update' => 'textarea',
- //'edit' => true,
- //'list' => true,
- ),
- 'reorder' => array
- (
- 'type' => 'int-11',
- 'name' => '排序(数值越大越靠前)',
- 'default' => '1',
- 'desc' => '请输入排序',
- 'match' => 'option',
- 'update' => 'text',
- 'search' => 'order',
- 'list_name' => '排序',
- 'list' => true,
- 'order' => 'desc',
- 'edit' => true,
- ),
- 'state' => array
- (
- 'type' => 'tinyint-1',
- 'name' => '状态',
- 'default' => '1',
- 'desc' => '请选择状态',
- 'match' => 'is_numeric',
- ),
-
- 'cdate' => array
- (
- 'type' => 'int-11',
- 'name' => '更新时间',
- 'match' => array('is_numeric', time()),
- 'desc' => '',
- # 只有insert时才生效
- //'insert' => true,
- //'list' => 'date("Y-m-d H:i:s", {cdate})',
- ),
- ),
- # 更新表结构
- 'alter' => array
- (
- 2 => array
- (
- array('update', 'header', 'header', 'text-255 header'),
- //array('add', 'config', 'config', 'int-11 1 配置'),
- ),
- //'version' => 2,
- ),
- 'manage' => array
- (
- //'delete' => false,
- # 更新数据时,要显示的按钮,这里填写js脚本事件即可。保存当前数据可为固定参数:"save-data",复制数据为:copy-data
- 'update_button' => array
- (
- '提交保存' => 'save-data',
- '放弃保存' => "msg({status:1,msg:'yes'})",
- //'复制数据' => 'copy-data',
- ),
- # 不允许编辑
- //'edit' => false,
-
- # 列表页的类型
- //'list_type' => 'parent',
-
- # 可以删除
- 'list_button' => array
- (
- 'update' => array('编辑', '"project&option_pid={project_id}"'),
- 'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"'),
- 'delete' => '删除',
- 'br1' => array('<br /><br />'),
- 'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),
- 'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),
-
- 'br2' => array('<br /><br />'),
- 'new' => array('测试采集', Dever::url('lib/api.test', 'spider')),
- 'oper1' => array('开始采集', '"spider/lib/api.add?id={id}"', '{status} <= 2'),
- ),
- ),
- # request 请求接口定义
- 'request' => array
- (
- # main 取所有主分类
- 'main' => array
- (
- 'where' => array
- (
- 'project_id' => -1,
- 'state' => 1,
- ),
- 'type' => 'all',
- 'order' => array
- (
- 'reorder' => 'desc',
- 'id' => 'desc',
- ),
- 'col' => '*|id',
- ),
- # 获取所有已完成,并且有时间间隔的数据,重新入队
- 'getAll' => array
- (
- 'option' => array
- (
- 'id' => 'yes',
- 'status' => 2,
- 'interval' => array('yes', '>='),
- 'sdate' => array('yes-sdate', '<='),
- 'state' => 1,
- ),
- 'type' => 'all',
- 'order' => array('id' => 'desc'),
- 'col' => '*',
- ),
- 'getOne' => array
- (
- 'option' => array
- (
- 'id' => 'yes',
- 'status' => array('yes'),
- 'sdate' => array('yes-sdate', '<='),
- 'state' => 1,
- ),
- 'type' => 'one',
- ),
- )
- );
|