project.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. <?php
  2. # 定义几个常用的选项
  3. $local = array
  4. (
  5. 1 => '关闭',
  6. 2 => '开启',
  7. );
  8. $status = array
  9. (
  10. 1 => '未开始',
  11. 2 => '已完成',
  12. 3 => '队列中',
  13. 4 => '运行中',
  14. );
  15. $request_type = array
  16. (
  17. 1 => 'get',
  18. 2 => 'post',
  19. );
  20. $content_type = array
  21. (
  22. 1 => '普通表单',
  23. 2 => 'JSON格式',
  24. );
  25. $project = function()
  26. {
  27. $array = array
  28. (
  29. -1 => array('id' => '-1', 'name' => '父级项目'),
  30. );
  31. $info = Dever::load('spider/project-main');
  32. if ($info) {
  33. $array += $info;
  34. }
  35. return $array;
  36. };
  37. $info = Dever::load('manage/project.get');
  38. $path = $info['spider']['path'];
  39. if(isset($info['spider']['setup']))
  40. {
  41. $path = $info['spider']['setup'];
  42. }
  43. $id = Dever::input('option_pid', -1);
  44. return array
  45. (
  46. # 表名
  47. 'name' => 'project',
  48. # 显示给用户看的名称
  49. 'lang' => '项目管理',
  50. 'status' => $status,
  51. 'path' => $path,
  52. # 后台菜单排序
  53. 'order' => 20,
  54. 'desc' => Dever::markdown('**启动守护进程的方法:**
  55. 常规任务:请将 ```* * * * * root php '.$path.'daemon/main.php``` 放到crontab中[建议每分钟执行一次]'),
  56. # 数据结构
  57. 'struct' => array
  58. (
  59. 'id' => array
  60. (
  61. 'type' => 'int-11',
  62. 'name' => '项目ID',
  63. 'default' => '',
  64. 'desc' => '',
  65. 'match' => 'is_numeric',
  66. 'search' => 'order',
  67. //'list' => true,
  68. 'order' => 'desc',
  69. ),
  70. 'name' => array
  71. (
  72. 'type' => 'varchar-100',
  73. 'name' => '项目名称',
  74. 'default' => '',
  75. 'desc' => '请输入项目名称',
  76. 'match' => 'is_string',
  77. 'update' => 'text',
  78. 'search' => 'order,fulltext',
  79. 'list' => true,
  80. 'edit' => true,
  81. ),
  82. 'project_id' => array
  83. (
  84. 'type' => 'int-11',
  85. 'name' => '上级项目',
  86. 'default' => $id,
  87. 'desc' => '请选择上级分类',
  88. 'match' => 'is_numeric',
  89. 'update' => 'select',
  90. 'search' => 'select',
  91. 'option' => $project,
  92. ),
  93. 'site' => array
  94. (
  95. 'type' => 'text-255',
  96. 'name' => '采集网址-如有分页,请写成这样{page=1}',
  97. 'default' => '',
  98. 'desc' => '采集网址',
  99. 'match' => 'is_string',
  100. 'update' => $id < 0 ? 'hidden': 'textarea',
  101. //'list' => true,
  102. //'edit' => 'textarea',
  103. ),
  104. 'request_type' => array
  105. (
  106. 'type' => 'tinyint-1',
  107. 'name' => '请求方式',
  108. 'default' => '1',
  109. 'desc' => '请求方式',
  110. 'match' => 'is_numeric',
  111. 'option' => $request_type,
  112. 'list' => true,
  113. 'update' => $id < 0 ? 'hidden': 'radio',
  114. //'edit' => true,
  115. ),
  116. 'content_type' => array
  117. (
  118. 'type' => 'tinyint-1',
  119. 'name' => '请求媒体类型',
  120. 'default' => '1',
  121. 'desc' => '请求媒体类型',
  122. 'match' => 'is_numeric',
  123. 'option' => $content_type,
  124. 'list' => true,
  125. 'update' => $id < 0 ? 'hidden': 'radio',
  126. //'edit' => true,
  127. ),
  128. 'collect_rule' => array
  129. (
  130. 'type' => 'varchar-500',
  131. 'name' => '采集规则-为空则获取整个页面的内容,支持dom解析、json格式,dom解析$(".info .title a").each().attr("href"),json格式$json[\'data\']',
  132. 'default' => '',
  133. 'desc' => '采集规则',
  134. 'match' => 'option',
  135. 'update' => $id < 0 ? 'hidden': 'textarea',
  136. //'edit' => true,
  137. //'list' => true,
  138. ),
  139. 'page_num' => array
  140. (
  141. 'type' => 'int-11',
  142. 'name' => '采集页数-值为0则默认采集100页',
  143. 'default' => '0',
  144. 'desc' => '采集页数',
  145. 'match' => 'option',
  146. 'update' => $id < 0 ? 'hidden': 'text',
  147. ),
  148. 'status' => array
  149. (
  150. 'type' => 'tinyint-1',
  151. 'name' => '状态',
  152. 'default' => '1',
  153. 'desc' => '状态',
  154. 'match' => 'is_numeric',
  155. 'option' => $status,
  156. 'list' => 'Dever::load("spider/lib/project.status", {id})',
  157. 'update' => $id < 0 ? 'hidden': 'radio',
  158. //'edit' => true,
  159. ),
  160. 'num' => array
  161. (
  162. 'type' => 'int-11',
  163. 'name' => '采集次数',
  164. 'default' => '0',
  165. 'desc' => '采集次数',
  166. 'match' => 'is_numeric',
  167. ),
  168. 'cur_page' => array
  169. (
  170. 'type' => 'int-11',
  171. 'name' => '当前采集页数',
  172. 'default' => '0',
  173. 'desc' => '当前采集页数',
  174. 'match' => 'is_numeric',
  175. ),
  176. 'sdate' => array
  177. (
  178. 'type' => 'int-11',
  179. 'name' => '下次采集时间',
  180. 'default' => '0',
  181. 'desc' => '下次采集时间',
  182. 'match' => 'is_numeric',
  183. //'update' => $id < 0 ? 'hidden' : 'date',
  184. 'callback' => 'maketime',
  185. ),
  186. 'interval' => array
  187. (
  188. 'type' => 'int-11',
  189. 'name' => '采集间隔秒数-填写开始时间之后的间隔采集的秒数,为0则只采集一次',
  190. 'default' => '0',
  191. 'desc' => '采集间隔秒数',
  192. 'match' => 'is_numeric',
  193. 'update' => $id < 0 ? 'hidden' : 'text',
  194. ),
  195. 'header' => array
  196. (
  197. 'type' => 'varchar-8000',
  198. 'name' => 'Header参数-换行为多个参数,格式cookie: 11',
  199. 'default' => '',
  200. 'desc' => 'Header参数',
  201. 'match' => 'option',
  202. 'update' => $id < 0 ? 'hidden': 'textarea',
  203. //'edit' => true,
  204. //'list' => true,
  205. ),
  206. 'param' => array
  207. (
  208. 'type' => 'varchar-8000',
  209. 'name' => '其他参数-一般为post传入的参数,必须是json格式',
  210. 'default' => '',
  211. 'desc' => '其他参数',
  212. 'match' => 'option',
  213. 'update' => $id < 0 ? 'hidden': 'textarea',
  214. //'edit' => true,
  215. //'list' => true,
  216. ),
  217. 'push' => array
  218. (
  219. 'type' => 'varchar-2000',
  220. 'name' => '数据推送-采集数据时,会自动往设置好的数据推送接口推送数据,多个用换行隔开,支持http协议和dever协议',
  221. 'default' => '',
  222. 'desc' => '数据推送',
  223. 'match' => 'option',
  224. 'update' => $id < 0 ? 'hidden': 'textarea',
  225. //'edit' => true,
  226. //'list' => true,
  227. ),
  228. 'reorder' => array
  229. (
  230. 'type' => 'int-11',
  231. 'name' => '排序(数值越大越靠前)',
  232. 'default' => '1',
  233. 'desc' => '请输入排序',
  234. 'match' => 'option',
  235. 'update' => 'text',
  236. 'search' => 'order',
  237. 'list_name' => '排序',
  238. 'list' => true,
  239. 'order' => 'desc',
  240. 'edit' => true,
  241. ),
  242. 'state' => array
  243. (
  244. 'type' => 'tinyint-1',
  245. 'name' => '状态',
  246. 'default' => '1',
  247. 'desc' => '请选择状态',
  248. 'match' => 'is_numeric',
  249. ),
  250. 'cdate' => array
  251. (
  252. 'type' => 'int-11',
  253. 'name' => '更新时间',
  254. 'match' => array('is_numeric', time()),
  255. 'desc' => '',
  256. # 只有insert时才生效
  257. //'insert' => true,
  258. //'list' => 'date("Y-m-d H:i:s", {cdate})',
  259. ),
  260. ),
  261. 'manage' => array
  262. (
  263. //'delete' => false,
  264. # 更新数据时,要显示的按钮,这里填写js脚本事件即可。保存当前数据可为固定参数:"save-data",复制数据为:copy-data
  265. 'update_button' => array
  266. (
  267. '提交保存' => 'save-data',
  268. '放弃保存' => "msg({status:1,msg:'yes'})",
  269. //'复制数据' => 'copy-data',
  270. ),
  271. # 不允许编辑
  272. //'edit' => false,
  273. # 列表页的类型
  274. 'list_type' => 'parent',
  275. # 可以删除
  276. 'list_button' => array
  277. (
  278. 'update' => array('编辑', '"project&option_pid={project_id}"'),
  279. 'list_data' => array('查看采集数据', '"data&search_option_pid={id}&oper_save_jump=project&oper_parent=project"', '{project_id} > 0'),
  280. 'delete' => '删除',
  281. 'br1' => array('<br /><br />'),
  282. 'add' => array('新增子项目', '"project&option_pid={id}&oper_parent=project&oper_save_jump=project"', '{project_id} == -1'),
  283. 'list_col' => array('设置采集字段', '"col&search_option_pid={id}&oper_parent=project"', '{project_id} > 0 && {status} <= 2'),
  284. 'list_col1' => array('设置自定义字段', '"set&search_option_pid={id}&oper_parent=project"', '{project_id} > 0 && {status} <= 2'),
  285. 'br2' => array('<br /><br />'),
  286. 'new' => array('测试采集', 'Dever::url("spider/lib/api.test?id={id}")', '{project_id} > 0'),
  287. 'oper1' => array('开始采集', 'Dever::url("spider/lib/api.add?id={id}")', '{project_id} > 0 && {status} <= 2'),
  288. ),
  289. ),
  290. # request 请求接口定义
  291. 'request' => array
  292. (
  293. # main 取所有主分类
  294. 'main' => array
  295. (
  296. 'where' => array
  297. (
  298. 'project_id' => -1,
  299. 'state' => 1,
  300. ),
  301. 'type' => 'all',
  302. 'order' => array
  303. (
  304. 'reorder' => 'desc',
  305. 'id' => 'desc',
  306. ),
  307. 'col' => '*|id',
  308. ),
  309. # 获取所有已完成,并且有时间间隔的数据,重新入队
  310. 'getAll' => array
  311. (
  312. 'option' => array
  313. (
  314. 'id' => 'yes',
  315. 'status' => 2,
  316. 'interval' => array('yes', '>='),
  317. 'sdate' => array('yes-sdate', '<='),
  318. 'state' => 1,
  319. ),
  320. 'type' => 'all',
  321. 'order' => array('id' => 'desc'),
  322. 'col' => '*',
  323. ),
  324. 'getOne' => array
  325. (
  326. 'option' => array
  327. (
  328. 'id' => 'yes',
  329. 'status' => array('yes'),
  330. 'sdate' => array('yes-sdate', '<='),
  331. 'state' => 1,
  332. ),
  333. 'type' => 'one',
  334. ),
  335. )
  336. );