123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- <?php
- /**
- * Created by PhpStorm.
- * User: Jaeger <JaegerCode@gmail.com>
- * Date: 2017/10/1
- * Baidu searcher
- */
- namespace QL\Ext;
- use QL\Contracts\PluginContract;
- use QL\QueryList;
- class Baidu implements PluginContract
- {
- protected $ql;
- protected $keyword;
- protected $pageNumber = 10;
- protected $httpOpt = [
- 'headers' => [
- 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
- 'Accept-Encoding' => 'gzip, deflate, br',
- ]
- ];
- const API = 'https://www.baidu.com/s';
- const RULES = [
- 'title' => ['h3','text'],
- 'link' => ['h3>a','href']
- ];
- const RANGE = '.result';
- public function __construct(QueryList $ql, $pageNumber)
- {
- $this->ql = $ql->rules(self::RULES)->range(self::RANGE);
- $this->pageNumber = $pageNumber;
- }
- public static function install(QueryList $queryList, ...$opt)
- {
- $name = $opt[0] ?? 'baidu';
- $queryList->bind($name,function ($pageNumber = 10){
- return new Baidu($this,$pageNumber);
- });
- }
- public function setHttpOpt(array $httpOpt = [])
- {
- $this->httpOpt = $httpOpt;
- return $this;
- }
- public function search($keyword)
- {
- $this->keyword = $keyword;
- return $this;
- }
- public function page($page = 1,$realURL = false)
- {
- return $this->query($page)->query()->getData(function ($item) use($realURL){
- $realURL && $item['link'] = $this->getRealURL($item['link']);
- return $item;
- });
- }
- public function getCount()
- {
- $count = 0;
- $text = $this->query(1)->find('.nums')->text();
- if(preg_match('/[\d,]+/',$text,$arr))
- {
- $count = str_replace(',','',$arr[0]);
- }
- return (int)$count;
- }
- public function getCountPage()
- {
- $count = $this->getCount();
- $countPage = ceil($count / $this->pageNumber);
- return $countPage;
- }
- protected function query($page = 1)
- {
- $this->ql->get(self::API,[
- 'wd' => $this->keyword,
- 'rn' => $this->pageNumber,
- 'pn' => $this->pageNumber * ($page-1)
- ],$this->httpOpt);
- return $this->ql;
- }
- /**
- * 得到百度跳转的真正地址
- * @param $url
- * @return mixed
- */
- protected function getRealURL($url)
- {
- if(empty($url)) return $url;
- $header = get_headers($url,1);
- if (strpos($header[0],'301') || strpos($header[0],'302'))
- {
- if(is_array($header['Location']))
- {
- //return $header['Location'][count($header['Location'])-1];
- return $header['Location'][0];
- }
- else
- {
- return $header['Location'];
- }
- }
- else
- {
- return $url;
- }
- }
- }
|