'关闭',
2 => '开启',
);
$status = array
(
1 => '未开始',
2 => '已完成',
3 => '队列中',
4 => '运行中',
);
$request_type = array
(
1 => 'get',
2 => 'post',
);
$content_type = array
(
1 => '普通表单',
2 => 'JSON格式',
);
$project = function()
{
$array = array
(
-1 => array('id' => '-1', 'name' => '父级项目'),
);
$info = Dever::load('spider/project-main');
if ($info) {
$array += $info;
}
return $array;
};
$cate = function()
{
$info = Dever::load('spider/cate-state');
return $info;
};
$info = Dever::load('manage/project.get');
$path = $info['spider']['path'];
if(isset($info['spider']['setup']))
{
$path = $info['spider']['setup'];
}
return array
(
# 表名
'name' => 'project',
# 显示给用户看的名称
'lang' => '采集规则',
'status' => $status,
'path' => $path,
# 后台菜单排序
'order' => 20,
'desc' => Dever::markdown('**启动守护进程的方法:**
常规任务:请将 ```* * * * * root php '.$path.'daemon/main.php``` 放到crontab中[建议每分钟执行一次]'),
# 数据结构
'struct' => array
(
'id' => array
(
'type' => 'int-11',
'name' => '项目ID',
'default' => '',
'desc' => '',
'match' => 'is_numeric',
'search' => 'order',
//'list' => true,
'order' => 'desc',
),
'cate_id' => array
(
'type' => 'int-11',
'name' => '采集源',
'default' => '1',
'desc' => '采集源',
'match' => 'is_numeric',
'update' => 'select',
'search' => 'select',
'option' => $cate,
'list' => true,
),
'name' => array
(
'type' => 'varchar-100',
'name' => '名称',
'default' => '',
'desc' => '请输入项目名称',
'match' => 'is_string',
'update' => 'text',
'search' => 'order,fulltext',
'list' => true,
'edit' => true,
),
'project_id' => array
(
'type' => 'int-11',
'name' => '上级项目',
//'default' => $id,
'desc' => '请选择上级分类',
'match' => 'is_numeric',
//'update' => 'select',
//'search' => 'select',
'option' => $project,
),
'site' => array
(
'type' => 'text-255',
'name' => '采集网址-第一行填写首页链接,第二行填写后续的分页部分,分页写成{page=1},分类写成{cate=1}',
'default' => '',
'desc' => '采集网址',
'match' => 'is_string',
'update' => 'textarea',
//'list' => true,
//'edit' => 'textarea',
),
'request_type' => array
(
'type' => 'tinyint-1',
'name' => '请求方式',
'default' => '1',
'desc' => '请求方式',
'match' => 'is_numeric',
'option' => $request_type,
'list' => true,
'update' => 'radio',
//'edit' => true,
),
'content_type' => array
(
'type' => 'tinyint-1',
'name' => '请求媒体类型',
'default' => '1',
'desc' => '请求媒体类型',
'match' => 'is_numeric',
'option' => $content_type,
'list' => true,
'update' => 'radio',
//'edit' => true,
),
'collect_list_rule' => array
(
'type' => 'varchar-500',
'name' => '采集列表规则-采集列表规则,仅支持dom解析,采集字段如果在列表页中,需要定义好该规则,json格式无需定义本规则',
'default' => '',
'desc' => '采集规则',
'match' => 'option',
'update' => 'textarea',
//'edit' => true,
//'list' => true,
),
'collect_rule' => array
(
'type' => 'varchar-500',
'name' => '采集详情链接-采集详情页面链接规则,为空则进行单页采集,直接获取整个页面的内容,支持dom解析、json格式,dom解析$(".info .title a").each().attr("href"),json格式$json["data"]',
'default' => '',
'desc' => '采集规则',
'match' => 'option',
'update' => 'textarea',
//'edit' => true,
//'list' => true,
),
'page_num' => array
(
'type' => 'int-11',
'name' => '采集页数-值为0则默认采集100页',
'default' => '0',
'desc' => '采集页数',
'match' => 'option',
'update' => 'text',
),
'status' => array
(
'type' => 'tinyint-1',
'name' => '状态',
'default' => '1',
'desc' => '状态',
'match' => 'is_numeric',
'option' => $status,
'list' => 'Dever::load("spider/lib/project.status", {id})',
'modal' => '查看详情',
'update' => 'radio',
//'edit' => true,
),
'num' => array
(
'type' => 'int-11',
'name' => '采集次数',
'default' => '0',
'desc' => '采集次数',
'match' => 'is_numeric',
),
'cur_page' => array
(
'type' => 'int-11',
'name' => '当前采集页数',
'default' => '0',
'desc' => '当前采集页数',
'match' => 'is_numeric',
),
'sdate' => array
(
'type' => 'int-11',
'name' => '下次采集时间',
'default' => '0',
'desc' => '下次采集时间',
'match' => 'is_numeric',
'callback' => 'maketime',
),
'interval' => array
(
'type' => 'int-11',
'name' => '采集间隔秒数-填写开始时间之后的间隔采集的秒数,为0则只采集一次',
'default' => '0',
'desc' => '采集间隔秒数',
'match' => 'is_numeric',
'update' => 'text',
),
'header' => array
(
'type' => 'text-255',
'name' => 'Header参数-换行为多个参数,格式cookie: 11',
'default' => '',
'desc' => 'Header参数',
'match' => 'option',
'update' => 'textarea',
//'edit' => true,
//'list' => true,
),
'param' => array
(
'type' => 'varchar-8000',
'name' => '其他参数-一般为post传入的参数,必须是json格式',
'default' => '',
'desc' => '其他参数',
'match' => 'option',
'update' => 'textarea',
//'edit' => true,
//'list' => true,
),
'config' => array
(
'type' => 'varchar-2000',
'name' => '基础配置-用于与数据推送的项目约定的配置信息,可以是json格式,也可是字符串',
'default' => '',
'desc' => '基础配置',
'match' => 'option',
'update' => 'textarea',
//'edit' => true,
//'list' => true,
),
'push' => array
(
'type' => 'varchar-2000',
'name' => '数据推送-采集数据时,会自动往设置好的数据推送接口推送数据,多个用换行隔开,支持http协议和dever协议',
'default' => '',
'desc' => '数据推送',
'match' => 'option',
'update' => 'textarea',
//'edit' => true,
//'list' => true,
),
'reorder' => array
(
'type' => 'int-11',
'name' => '排序(数值越大越靠前)',
'default' => '1',
'desc' => '请输入排序',
'match' => 'option',
'update' => 'text',
'search' => 'order',
'list_name' => '排序',
'list' => true,
'order' => 'desc',
'edit' => true,
),
'state' => array
(
'type' => 'tinyint-1',
'name' => '状态',
'default' => '1',
'desc' => '请选择状态',
'match' => 'is_numeric',
),
'cdate' => array
(
'type' => 'int-11',
'name' => '更新时间',
'match' => array('is_numeric', time()),
'desc' => '',
# 只有insert时才生效
//'insert' => true,
//'list' => 'date("Y-m-d H:i:s", {cdate})',
),
),
# 更新表结构
'alter' => array
(
2 => array
(
array('update', 'header', 'header', 'text-255 header'),
//array('add', 'config', 'config', 'int-11 1 配置'),
),
//'version' => 2,
),
'manage' => array
(
//'delete' => false,
# 更新数据时,要显示的按钮,这里填写js脚本事件即可。保存当前数据可为固定参数:"save-data",复制数据为:copy-data
'update_button' => array
(
'提交保存' => 'save-data',
'放弃保存' => "msg({status:1,msg:'yes'})",
//'复制数据' => 'copy-data',
),
# 不允许编辑
//'edit' => false,
# 列表页的类型
//'list_type' => 'parent',
# 可以删除
'list_button' => array
(
'update' => array('编辑', '"project&option_pid={project_id}"'),
'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"'),
'delete' => '删除',
'br1' => array('
'),
'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),
'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),
'br2' => array('
'),
'new' => array('测试采集', Dever::url('lib/api.test', 'spider')),
'oper1' => array('开始采集', '"spider/lib/api.add?id={id}"', '{status} <= 2'),
),
),
# request 请求接口定义
'request' => array
(
# main 取所有主分类
'main' => array
(
'where' => array
(
'project_id' => -1,
'state' => 1,
),
'type' => 'all',
'order' => array
(
'reorder' => 'desc',
'id' => 'desc',
),
'col' => '*|id',
),
# 获取所有已完成,并且有时间间隔的数据,重新入队
'getAll' => array
(
'option' => array
(
'id' => 'yes',
'status' => 2,
'interval' => array('yes', '>='),
'sdate' => array('yes-sdate', '<='),
'state' => 1,
),
'type' => 'all',
'order' => array('id' => 'desc'),
'col' => '*',
),
'getOne' => array
(
'option' => array
(
'id' => 'yes',
'status' => array('yes'),
'sdate' => array('yes-sdate', '<='),
'state' => 1,
),
'type' => 'one',
),
)
);