Baidu.php 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. <?php
  2. /**
  3. * Created by PhpStorm.
  4. * User: Jaeger <JaegerCode@gmail.com>
  5. * Date: 2017/10/1
  6. * Baidu searcher
  7. */
  8. namespace QL\Ext;
  9. use QL\Contracts\PluginContract;
  10. use QL\QueryList;
  11. class Baidu implements PluginContract
  12. {
  13. protected $ql;
  14. protected $keyword;
  15. protected $pageNumber = 10;
  16. protected $httpOpt = [
  17. 'headers' => [
  18. 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
  19. 'Accept-Encoding' => 'gzip, deflate, br',
  20. ]
  21. ];
  22. const API = 'https://www.baidu.com/s';
  23. const RULES = [
  24. 'title' => ['h3','text'],
  25. 'link' => ['h3>a','href']
  26. ];
  27. const RANGE = '.result';
  28. public function __construct(QueryList $ql, $pageNumber)
  29. {
  30. $this->ql = $ql->rules(self::RULES)->range(self::RANGE);
  31. $this->pageNumber = $pageNumber;
  32. }
  33. public static function install(QueryList $queryList, ...$opt)
  34. {
  35. $name = $opt[0] ?? 'baidu';
  36. $queryList->bind($name,function ($pageNumber = 10){
  37. return new Baidu($this,$pageNumber);
  38. });
  39. }
  40. public function setHttpOpt(array $httpOpt = [])
  41. {
  42. $this->httpOpt = $httpOpt;
  43. return $this;
  44. }
  45. public function search($keyword)
  46. {
  47. $this->keyword = $keyword;
  48. return $this;
  49. }
  50. public function page($page = 1,$realURL = false)
  51. {
  52. return $this->query($page)->query()->getData(function ($item) use($realURL){
  53. $realURL && $item['link'] = $this->getRealURL($item['link']);
  54. return $item;
  55. });
  56. }
  57. public function getCount()
  58. {
  59. $count = 0;
  60. $text = $this->query(1)->find('.nums')->text();
  61. if(preg_match('/[\d,]+/',$text,$arr))
  62. {
  63. $count = str_replace(',','',$arr[0]);
  64. }
  65. return (int)$count;
  66. }
  67. public function getCountPage()
  68. {
  69. $count = $this->getCount();
  70. $countPage = ceil($count / $this->pageNumber);
  71. return $countPage;
  72. }
  73. protected function query($page = 1)
  74. {
  75. $this->ql->get(self::API,[
  76. 'wd' => $this->keyword,
  77. 'rn' => $this->pageNumber,
  78. 'pn' => $this->pageNumber * ($page-1)
  79. ],$this->httpOpt);
  80. return $this->ql;
  81. }
  82. /**
  83. * 得到百度跳转的真正地址
  84. * @param $url
  85. * @return mixed
  86. */
  87. protected function getRealURL($url)
  88. {
  89. if(empty($url)) return $url;
  90. $header = get_headers($url,1);
  91. if (strpos($header[0],'301') || strpos($header[0],'302'))
  92. {
  93. if(is_array($header['Location']))
  94. {
  95. //return $header['Location'][count($header['Location'])-1];
  96. return $header['Location'][0];
  97. }
  98. else
  99. {
  100. return $header['Location'];
  101. }
  102. }
  103. else
  104. {
  105. return $url;
  106. }
  107. }
  108. }