project.php 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. <?php
  2. # 定义几个常用的选项
  3. $local = array
  4. (
  5. 1 => '关闭',
  6. 2 => '开启',
  7. );
  8. $status = array
  9. (
  10. 1 => '未开始',
  11. 2 => '已完成',
  12. 3 => '队列中',
  13. 4 => '运行中',
  14. );
  15. $request_type = array
  16. (
  17. 1 => 'get',
  18. 2 => 'post',
  19. );
  20. $content_type = array
  21. (
  22. 1 => '普通表单',
  23. 2 => 'JSON格式',
  24. );
  25. $project = function()
  26. {
  27. $array = array
  28. (
  29. -1 => array('id' => '-1', 'name' => '父级项目'),
  30. );
  31. $info = Dever::load('spider/project-main');
  32. if ($info) {
  33. $array += $info;
  34. }
  35. return $array;
  36. };
  37. $cate = function()
  38. {
  39. $info = Dever::load('spider/cate-state');
  40. return $info;
  41. };
  42. $info = Dever::load('manage/project.get');
  43. $path = $info['spider']['path'];
  44. if(isset($info['spider']['setup']))
  45. {
  46. $path = $info['spider']['setup'];
  47. }
  48. return array
  49. (
  50. # 表名
  51. 'name' => 'project',
  52. # 显示给用户看的名称
  53. 'lang' => '采集规则',
  54. 'status' => $status,
  55. 'path' => $path,
  56. # 后台菜单排序
  57. 'order' => 20,
  58. 'desc' => Dever::markdown('**启动守护进程的方法:**
  59. 常规任务:请将 ```* * * * * root php '.$path.'daemon/main.php``` 放到crontab中[建议每分钟执行一次]'),
  60. # 数据结构
  61. 'struct' => array
  62. (
  63. 'id' => array
  64. (
  65. 'type' => 'int-11',
  66. 'name' => '项目ID',
  67. 'default' => '',
  68. 'desc' => '',
  69. 'match' => 'is_numeric',
  70. 'search' => 'order',
  71. //'list' => true,
  72. 'order' => 'desc',
  73. ),
  74. 'cate_id' => array
  75. (
  76. 'type' => 'int-11',
  77. 'name' => '采集源',
  78. 'default' => '1',
  79. 'desc' => '采集源',
  80. 'match' => 'is_numeric',
  81. 'update' => 'select',
  82. 'search' => 'select',
  83. 'option' => $cate,
  84. 'list' => true,
  85. ),
  86. 'name' => array
  87. (
  88. 'type' => 'varchar-100',
  89. 'name' => '名称',
  90. 'default' => '',
  91. 'desc' => '请输入项目名称',
  92. 'match' => 'is_string',
  93. 'update' => 'text',
  94. 'search' => 'order,fulltext',
  95. 'list' => true,
  96. 'edit' => true,
  97. ),
  98. 'project_id' => array
  99. (
  100. 'type' => 'int-11',
  101. 'name' => '上级项目',
  102. //'default' => $id,
  103. 'desc' => '请选择上级分类',
  104. 'match' => 'is_numeric',
  105. //'update' => 'select',
  106. //'search' => 'select',
  107. 'option' => $project,
  108. ),
  109. 'site' => array
  110. (
  111. 'type' => 'text-255',
  112. 'name' => '采集网址-第一行填写首页链接,第二行填写后续的分页部分,分页写成{page=1},分类写成{cate=1}',
  113. 'default' => '',
  114. 'desc' => '采集网址',
  115. 'match' => 'is_string',
  116. 'update' => 'textarea',
  117. //'list' => true,
  118. //'edit' => 'textarea',
  119. ),
  120. 'request_type' => array
  121. (
  122. 'type' => 'tinyint-1',
  123. 'name' => '请求方式',
  124. 'default' => '1',
  125. 'desc' => '请求方式',
  126. 'match' => 'is_numeric',
  127. 'option' => $request_type,
  128. 'list' => true,
  129. 'update' => 'radio',
  130. //'edit' => true,
  131. ),
  132. 'content_type' => array
  133. (
  134. 'type' => 'tinyint-1',
  135. 'name' => '请求媒体类型',
  136. 'default' => '1',
  137. 'desc' => '请求媒体类型',
  138. 'match' => 'is_numeric',
  139. 'option' => $content_type,
  140. 'list' => true,
  141. 'update' => 'radio',
  142. //'edit' => true,
  143. ),
  144. 'collect_list_rule' => array
  145. (
  146. 'type' => 'varchar-500',
  147. 'name' => '采集列表规则-采集列表规则,仅支持dom解析,采集字段如果在列表页中,需要定义好该规则,json格式无需定义本规则',
  148. 'default' => '',
  149. 'desc' => '采集规则',
  150. 'match' => 'option',
  151. 'update' => 'textarea',
  152. //'edit' => true,
  153. //'list' => true,
  154. ),
  155. 'collect_rule' => array
  156. (
  157. 'type' => 'varchar-500',
  158. 'name' => '采集详情链接-采集详情页面链接规则,为空则进行单页采集,直接获取整个页面的内容,支持dom解析、json格式,dom解析$(".info .title a").each().attr("href"),json格式$json["data"]',
  159. 'default' => '',
  160. 'desc' => '采集规则',
  161. 'match' => 'option',
  162. 'update' => 'textarea',
  163. //'edit' => true,
  164. //'list' => true,
  165. ),
  166. 'page_num' => array
  167. (
  168. 'type' => 'int-11',
  169. 'name' => '采集页数-值为0则默认采集100页',
  170. 'default' => '0',
  171. 'desc' => '采集页数',
  172. 'match' => 'option',
  173. 'update' => 'text',
  174. ),
  175. 'status' => array
  176. (
  177. 'type' => 'tinyint-1',
  178. 'name' => '状态',
  179. 'default' => '1',
  180. 'desc' => '状态',
  181. 'match' => 'is_numeric',
  182. 'option' => $status,
  183. 'list' => 'Dever::load("spider/lib/project.status", {id})',
  184. 'modal' => '查看详情',
  185. 'update' => 'radio',
  186. //'edit' => true,
  187. ),
  188. 'num' => array
  189. (
  190. 'type' => 'int-11',
  191. 'name' => '采集次数',
  192. 'default' => '0',
  193. 'desc' => '采集次数',
  194. 'match' => 'is_numeric',
  195. ),
  196. 'cur_page' => array
  197. (
  198. 'type' => 'int-11',
  199. 'name' => '当前采集页数',
  200. 'default' => '0',
  201. 'desc' => '当前采集页数',
  202. 'match' => 'is_numeric',
  203. ),
  204. 'sdate' => array
  205. (
  206. 'type' => 'int-11',
  207. 'name' => '下次采集时间',
  208. 'default' => '0',
  209. 'desc' => '下次采集时间',
  210. 'match' => 'is_numeric',
  211. 'callback' => 'maketime',
  212. ),
  213. 'interval' => array
  214. (
  215. 'type' => 'int-11',
  216. 'name' => '采集间隔秒数-填写开始时间之后的间隔采集的秒数,为0则只采集一次',
  217. 'default' => '0',
  218. 'desc' => '采集间隔秒数',
  219. 'match' => 'is_numeric',
  220. 'update' => 'text',
  221. ),
  222. 'header' => array
  223. (
  224. 'type' => 'text-255',
  225. 'name' => 'Header参数-换行为多个参数,格式cookie: 11',
  226. 'default' => '',
  227. 'desc' => 'Header参数',
  228. 'match' => 'option',
  229. 'update' => 'textarea',
  230. //'edit' => true,
  231. //'list' => true,
  232. ),
  233. 'param' => array
  234. (
  235. 'type' => 'varchar-8000',
  236. 'name' => '其他参数-一般为post传入的参数,必须是json格式',
  237. 'default' => '',
  238. 'desc' => '其他参数',
  239. 'match' => 'option',
  240. 'update' => 'textarea',
  241. //'edit' => true,
  242. //'list' => true,
  243. ),
  244. 'config' => array
  245. (
  246. 'type' => 'varchar-2000',
  247. 'name' => '基础配置-用于与数据推送的项目约定的配置信息,可以是json格式,也可是字符串',
  248. 'default' => '',
  249. 'desc' => '基础配置',
  250. 'match' => 'option',
  251. 'update' => 'textarea',
  252. //'edit' => true,
  253. //'list' => true,
  254. ),
  255. 'push' => array
  256. (
  257. 'type' => 'varchar-2000',
  258. 'name' => '数据推送-采集数据时,会自动往设置好的数据推送接口推送数据,多个用换行隔开,支持http协议和dever协议',
  259. 'default' => '',
  260. 'desc' => '数据推送',
  261. 'match' => 'option',
  262. 'update' => 'textarea',
  263. //'edit' => true,
  264. //'list' => true,
  265. ),
  266. 'reorder' => array
  267. (
  268. 'type' => 'int-11',
  269. 'name' => '排序(数值越大越靠前)',
  270. 'default' => '1',
  271. 'desc' => '请输入排序',
  272. 'match' => 'option',
  273. 'update' => 'text',
  274. 'search' => 'order',
  275. 'list_name' => '排序',
  276. 'list' => true,
  277. 'order' => 'desc',
  278. 'edit' => true,
  279. ),
  280. 'state' => array
  281. (
  282. 'type' => 'tinyint-1',
  283. 'name' => '状态',
  284. 'default' => '1',
  285. 'desc' => '请选择状态',
  286. 'match' => 'is_numeric',
  287. ),
  288. 'cdate' => array
  289. (
  290. 'type' => 'int-11',
  291. 'name' => '更新时间',
  292. 'match' => array('is_numeric', time()),
  293. 'desc' => '',
  294. # 只有insert时才生效
  295. //'insert' => true,
  296. //'list' => 'date("Y-m-d H:i:s", {cdate})',
  297. ),
  298. ),
  299. # 更新表结构
  300. 'alter' => array
  301. (
  302. 2 => array
  303. (
  304. array('update', 'header', 'header', 'text-255 header'),
  305. //array('add', 'config', 'config', 'int-11 1 配置'),
  306. ),
  307. //'version' => 2,
  308. ),
  309. 'manage' => array
  310. (
  311. //'delete' => false,
  312. # 更新数据时,要显示的按钮,这里填写js脚本事件即可。保存当前数据可为固定参数:"save-data",复制数据为:copy-data
  313. 'update_button' => array
  314. (
  315. '提交保存' => 'save-data',
  316. '放弃保存' => "msg({status:1,msg:'yes'})",
  317. //'复制数据' => 'copy-data',
  318. ),
  319. # 不允许编辑
  320. //'edit' => false,
  321. # 列表页的类型
  322. //'list_type' => 'parent',
  323. # 可以删除
  324. 'list_button' => array
  325. (
  326. 'update' => array('编辑', '"project&option_pid={project_id}"'),
  327. 'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"'),
  328. 'delete' => '删除',
  329. 'br1' => array('<br /><br />'),
  330. 'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),
  331. 'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{status} <= 2'),
  332. 'br2' => array('<br /><br />'),
  333. 'new' => array('测试采集', Dever::url('lib/api.test', 'spider')),
  334. 'oper1' => array('开始采集', '"spider/lib/api.add?id={id}"', '{status} <= 2'),
  335. ),
  336. ),
  337. # request 请求接口定义
  338. 'request' => array
  339. (
  340. # main 取所有主分类
  341. 'main' => array
  342. (
  343. 'where' => array
  344. (
  345. 'project_id' => -1,
  346. 'state' => 1,
  347. ),
  348. 'type' => 'all',
  349. 'order' => array
  350. (
  351. 'reorder' => 'desc',
  352. 'id' => 'desc',
  353. ),
  354. 'col' => '*|id',
  355. ),
  356. # 获取所有已完成,并且有时间间隔的数据,重新入队
  357. 'getAll' => array
  358. (
  359. 'option' => array
  360. (
  361. 'id' => 'yes',
  362. 'status' => 2,
  363. 'interval' => array('yes', '>='),
  364. 'sdate' => array('yes-sdate', '<='),
  365. 'state' => 1,
  366. ),
  367. 'type' => 'all',
  368. 'order' => array('id' => 'desc'),
  369. 'col' => '*',
  370. ),
  371. 'getOne' => array
  372. (
  373. 'option' => array
  374. (
  375. 'id' => 'yes',
  376. 'status' => array('yes'),
  377. 'sdate' => array('yes-sdate', '<='),
  378. 'state' => 1,
  379. ),
  380. 'type' => 'one',
  381. ),
  382. )
  383. );