'关闭', 2 => '开启', ); $status = array ( 1 => '未开始', 2 => '已完成', 3 => '队列中', 4 => '运行中', ); $request_type = array ( 1 => 'get', 2 => 'post', ); $content_type = array ( 1 => '普通表单', 2 => 'JSON格式', ); $project = function() { $array = array ( -1 => array('id' => '-1', 'name' => '父级项目'), ); $info = Dever::load('spider/project-main'); if ($info) { $array += $info; } return $array; }; $cate = function() { $info = Dever::load('spider/cate-state'); return $info; }; $info = Dever::load('manage/project.get'); $path = $info['spider']['path']; if(isset($info['spider']['setup'])) { $path = $info['spider']['setup']; } return array ( # 表名 'name' => 'project', # 显示给用户看的名称 'lang' => '采集规则', 'status' => $status, 'path' => $path, # 后台菜单排序 'order' => 20, 'desc' => Dever::markdown('**启动守护进程的方法:** 常规任务:请将 ```* * * * * root php '.$path.'daemon/main.php``` 放到crontab中[建议每分钟执行一次]'), # 数据结构 'struct' => array ( 'id' => array ( 'type' => 'int-11', 'name' => '项目ID', 'default' => '', 'desc' => '', 'match' => 'is_numeric', 'search' => 'order', //'list' => true, 'order' => 'desc', ), 'cate_id' => array ( 'type' => 'int-11', 'name' => '采集源', 'default' => '1', 'desc' => '采集源', 'match' => 'is_numeric', 'update' => 'select', 'search' => 'select', 'option' => $cate, 'list' => true, ), 'name' => array ( 'type' => 'varchar-100', 'name' => '名称', 'default' => '', 'desc' => '请输入项目名称', 'match' => 'is_string', 'update' => 'text', 'search' => 'order,fulltext', 'list' => true, 'edit' => true, ), 'project_id' => array ( 'type' => 'int-11', 'name' => '上级项目', //'default' => $id, 'desc' => '请选择上级分类', 'match' => 'is_numeric', //'update' => 'select', //'search' => 'select', 'option' => $project, ), 'site' => array ( 'type' => 'text-255', 'name' => '采集网址-第一行填写首页链接,第二行填写后续的分页部分,分页写成{page=1},分类写成{cate=1}', 'default' => '', 'desc' => '采集网址', 'match' => 'is_string', 'update' => 'textarea', //'list' => true, //'edit' => 'textarea', ), 'request_type' => array ( 'type' => 'tinyint-1', 'name' => '请求方式', 'default' => '1', 'desc' => '请求方式', 'match' => 'is_numeric', 'option' => $request_type, 'list' => true, 'update' => 'radio', //'edit' => true, ), 'content_type' => array ( 'type' => 'tinyint-1', 'name' => '请求媒体类型', 'default' => '1', 'desc' => '请求媒体类型', 'match' => 'is_numeric', 'option' => $content_type, 'list' => true, 'update' => 'radio', //'edit' => true, ), 'collect_list_rule' => array ( 'type' => 'varchar-500', 'name' => '采集列表规则-采集列表规则,仅支持dom解析,采集字段如果在列表页中,需要定义好该规则,json格式无需定义本规则', 'default' => '', 'desc' => '采集规则', 'match' => 'option', 'update' => 'textarea', //'edit' => true, //'list' => true, ), 'collect_rule' => array ( 'type' => 'varchar-500', 'name' => '采集详情链接-采集详情页面链接规则,为空则进行单页采集,直接获取整个页面的内容,支持dom解析、json格式,dom解析$(".info .title a").each().attr("href"),json格式$json["data"]', 'default' => '', 'desc' => '采集规则', 'match' => 'option', 'update' => 'textarea', //'edit' => true, //'list' => true, ), 'page_num' => array ( 'type' => 'int-11', 'name' => '采集页数-值为0则默认采集100页', 'default' => '0', 'desc' => '采集页数', 'match' => 'option', 'update' => 'text', ), 'status' => array ( 'type' => 'tinyint-1', 'name' => '状态', 'default' => '1', 'desc' => '状态', 'match' => 'is_numeric', 'option' => $status, 'list' => 'Dever::load("spider/lib/project.status", {id})', 'modal' => '查看详情', 'update' => 'radio', //'edit' => true, ), 'num' => array ( 'type' => 'int-11', 'name' => '采集次数', 'default' => '0', 'desc' => '采集次数', 'match' => 'is_numeric', ), 'cur_page' => array ( 'type' => 'int-11', 'name' => '当前采集页数', 'default' => '0', 'desc' => '当前采集页数', 'match' => 'is_numeric', ), 'sdate' => array ( 'type' => 'int-11', 'name' => '下次采集时间', 'default' => '0', 'desc' => '下次采集时间', 'match' => 'is_numeric', 'callback' => 'maketime', ), 'interval' => array ( 'type' => 'int-11', 'name' => '采集间隔秒数-填写开始时间之后的间隔采集的秒数,为0则只采集一次', 'default' => '0', 'desc' => '采集间隔秒数', 'match' => 'is_numeric', 'update' => 'text', ), 'header' => array ( 'type' => 'text-255', 'name' => 'Header参数-换行为多个参数,格式cookie: 11', 'default' => '', 'desc' => 'Header参数', 'match' => 'option', 'update' => 'textarea', //'edit' => true, //'list' => true, ), 'param' => array ( 'type' => 'varchar-8000', 'name' => '其他参数-一般为post传入的参数,必须是json格式', 'default' => '', 'desc' => '其他参数', 'match' => 'option', 'update' => 'textarea', //'edit' => true, //'list' => true, ), 'config' => array ( 'type' => 'varchar-2000', 'name' => '基础配置-用于与数据推送的项目约定的配置信息,可以是json格式,也可是字符串', 'default' => '', 'desc' => '基础配置', 'match' => 'option', 'update' => 'textarea', //'edit' => true, //'list' => true, ), 'push' => array ( 'type' => 'varchar-2000', 'name' => '数据推送-采集数据时,会自动往设置好的数据推送接口推送数据,多个用换行隔开,支持http协议和dever协议', 'default' => '', 'desc' => '数据推送', 'match' => 'option', 'update' => 'textarea', //'edit' => true, //'list' => true, ), 'reorder' => array ( 'type' => 'int-11', 'name' => '排序(数值越大越靠前)', 'default' => '1', 'desc' => '请输入排序', 'match' => 'option', 'update' => 'text', 'search' => 'order', 'list_name' => '排序', 'list' => true, 'order' => 'desc', 'edit' => true, ), 'state' => array ( 'type' => 'tinyint-1', 'name' => '状态', 'default' => '1', 'desc' => '请选择状态', 'match' => 'is_numeric', ), 'cdate' => array ( 'type' => 'int-11', 'name' => '更新时间', 'match' => array('is_numeric', time()), 'desc' => '', # 只有insert时才生效 //'insert' => true, //'list' => 'date("Y-m-d H:i:s", {cdate})', ), ), # 更新表结构 'alter' => array ( 2 => array ( array('update', 'header', 'header', 'text-255 header'), //array('add', 'config', 'config', 'int-11 1 配置'), ), //'version' => 2, ), 'manage' => array ( //'delete' => false, # 更新数据时,要显示的按钮,这里填写js脚本事件即可。保存当前数据可为固定参数:"save-data",复制数据为:copy-data 'update_button' => array ( '提交保存' => 'save-data', '放弃保存' => "msg({status:1,msg:'yes'})", //'复制数据' => 'copy-data', ), # 不允许编辑 //'edit' => false, # 列表页的类型 //'list_type' => 'parent', # 可以删除 'list_button' => array ( 'update' => array('编辑', '"project&option_pid={project_id}"'), 'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"'), 'delete' => '删除', 'br1' => array('

'), 'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{status} <= 2'), 'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{status} <= 2'), 'br2' => array('

'), 'new' => array('测试采集', Dever::url('lib/api.test', 'spider')), 'oper1' => array('开始采集', '"spider/lib/api.add?id={id}"', '{status} <= 2'), ), ), # request 请求接口定义 'request' => array ( # main 取所有主分类 'main' => array ( 'where' => array ( 'project_id' => -1, 'state' => 1, ), 'type' => 'all', 'order' => array ( 'reorder' => 'desc', 'id' => 'desc', ), 'col' => '*|id', ), # 获取所有已完成,并且有时间间隔的数据,重新入队 'getAll' => array ( 'option' => array ( 'id' => 'yes', 'status' => 2, 'interval' => array('yes', '>='), 'sdate' => array('yes-sdate', '<='), 'state' => 1, ), 'type' => 'all', 'order' => array('id' => 'desc'), 'col' => '*', ), 'getOne' => array ( 'option' => array ( 'id' => 'yes', 'status' => array('yes'), 'sdate' => array('yes-sdate', '<='), 'state' => 1, ), 'type' => 'one', ), ) );