| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 | <?php/** * Created by PhpStorm. * User: Jaeger <JaegerCode@gmail.com> * Date: 2017/10/1 * Baidu searcher */namespace QL\Ext;use QL\Contracts\PluginContract;use QL\QueryList;class Baidu implements PluginContract{    protected $ql;    protected $keyword;    protected $pageNumber = 10;    protected $httpOpt = [        'headers' => [            'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',            'Accept-Encoding' => 'gzip, deflate, br',        ]    ];    const API = 'https://www.baidu.com/s';    const RULES = [      'title' => ['h3','text'],      'link' => ['h3>a','href']    ];    const RANGE = '.result';    public function __construct(QueryList $ql, $pageNumber)    {        $this->ql = $ql->rules(self::RULES)->range(self::RANGE);        $this->pageNumber = $pageNumber;    }    public static function install(QueryList $queryList, ...$opt)    {        $name = $opt[0] ?? 'baidu';        $queryList->bind($name,function ($pageNumber = 10){            return new Baidu($this,$pageNumber);        });    }    public function setHttpOpt(array $httpOpt = [])    {        $this->httpOpt = $httpOpt;        return $this;    }    public function search($keyword)    {        $this->keyword = $keyword;        return $this;    }    public function page($page = 1,$realURL = false)    {        return $this->query($page)->query()->getData(function ($item) use($realURL){            $realURL && $item['link'] = $this->getRealURL($item['link']);            return $item;        });    }    public function getCount()    {        $count = 0;        $text =  $this->query(1)->find('.nums')->text();        if(preg_match('/[\d,]+/',$text,$arr))        {            $count = str_replace(',','',$arr[0]);        }        return (int)$count;    }    public function getCountPage()    {        $count = $this->getCount();        $countPage = ceil($count / $this->pageNumber);        return $countPage;    }    protected function query($page = 1)    {        $this->ql->get(self::API,[            'wd' => $this->keyword,            'rn' => $this->pageNumber,            'pn' => $this->pageNumber * ($page-1)        ],$this->httpOpt);        return $this->ql;    }    /**     * 得到百度跳转的真正地址     * @param $url     * @return mixed     */    protected  function getRealURL($url)    {        if(empty($url)) return $url;        $header = get_headers($url,1);        if (strpos($header[0],'301') || strpos($header[0],'302'))        {            if(is_array($header['Location']))            {                //return $header['Location'][count($header['Location'])-1];                return $header['Location'][0];            }            else            {                return $header['Location'];            }        }        else        {            return $url;        }    }}
 |