|
@@ -2,38 +2,53 @@
|
|
|
namespace Spider\Lib\Doc;
|
|
|
include(DEVER_APP_PATH . 'third/phpQuery.php');
|
|
|
use Dever;
|
|
|
-use Spider\Lib\Doc as Doc;
|
|
|
use phpQuery;
|
|
|
|
|
|
-class Dom extends Core
|
|
|
+class Dom
|
|
|
{
|
|
|
public static function init($html)
|
|
|
- {
|
|
|
- return phpQuery::newDocumentHTML($html);
|
|
|
- }
|
|
|
+ {
|
|
|
+ return phpQuery::newDocumentHTML($html);
|
|
|
+ }
|
|
|
|
|
|
public static function find($dom, $rule)
|
|
|
- {
|
|
|
- $rule = str_replace(array('$', ').'), array('$dom->find', ')->'), $rule);
|
|
|
+ {
|
|
|
+ list($rule, $attr) = self::each($rule);
|
|
|
+ $rule = str_replace(array('$', ').', '$dom->find.'), array('$dom->find', ')->', '$dom->'), $rule);
|
|
|
$cmd = '$dom = ' . $rule . ';';
|
|
|
eval($cmd);
|
|
|
- return $dom;
|
|
|
- }
|
|
|
+ return self::findAttr($dom, $attr);
|
|
|
+ }
|
|
|
|
|
|
- public static function rule($doc, $dom, $col, $rule)
|
|
|
- {
|
|
|
- $dom = pq($dom);
|
|
|
- $result = $dom->html();
|
|
|
- $rule = explode("\n", $rule);
|
|
|
- if (isset($rule[0]) && $rule[0]) {
|
|
|
- if (isset($col[$rule[0]])) {
|
|
|
- $url = $doc->getUrl($dom, $col, $col[$rule[0]]);
|
|
|
- $doc = Doc::getInstance($url, '')->get();
|
|
|
- array_shift($rule);
|
|
|
- }
|
|
|
- $result = self::find($doc, $rule[0]);
|
|
|
+ public function each($rule)
|
|
|
+ {
|
|
|
+ $attr = '';
|
|
|
+ if (strpos($rule, '.each().') !== false) {
|
|
|
+ $temp = explode('.each()', $rule);
|
|
|
+ $rule = $temp[0];
|
|
|
+ $attr = '$' . $temp[1];
|
|
|
}
|
|
|
- if (isset($rule[1]) && $rule[1]) $result = parent::match($rule[1], $result);
|
|
|
+ return array($rule, $attr);
|
|
|
+ }
|
|
|
+
|
|
|
+ public function findAttr($dom, $attr)
|
|
|
+ {
|
|
|
+ if (!$attr) {
|
|
|
+ return $dom;
|
|
|
+ }
|
|
|
+ $data = array();
|
|
|
+ foreach ($dom as $k => $v) {
|
|
|
+ $data[] = self::find(pq($v), $attr);
|
|
|
+ }
|
|
|
+ return json_encode($data, JSON_UNESCAPED_UNICODE);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static function rule($doc, $dom, $col, $rule, $key)
|
|
|
+ {
|
|
|
+ $dom = pq($dom);
|
|
|
+ $result = $dom->html();
|
|
|
+
|
|
|
+ $result = Core::rule($doc, $dom, $col, $rule, $key);
|
|
|
return $result;
|
|
|
- }
|
|
|
+ }
|
|
|
}
|