|
@@ -0,0 +1,262 @@
|
|
|
+<?php
|
|
|
+
|
|
|
+namespace Spider\Lib;
|
|
|
+
|
|
|
+use Dever;
|
|
|
+
|
|
|
+class Get
|
|
|
+{
|
|
|
+
|
|
|
+ public function wechat_api()
|
|
|
+ {
|
|
|
+ $time = time();
|
|
|
+ $config = Dever::db('spider/wechat')->get(array('where_zdate' => $time));
|
|
|
+
|
|
|
+ if ($config) {
|
|
|
+ foreach ($config as $k => $v) {
|
|
|
+ //$this->get($v['id'], $v['name'], $v['wechat']);
|
|
|
+
|
|
|
+ Dever::daemon('lib/get.get?id=' . $v['id'], 'spider');
|
|
|
+
|
|
|
+ if (!$v['zdate'] || $v['zdate'] < 100000) {
|
|
|
+ $v['zdate'] = time();
|
|
|
+ }
|
|
|
+ Dever::db('spider/wechat')->update(array('where_id' => $v['id'], 'zdate' => $v['zdate'] + ($v['pdate']*3600)));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return 'ok';
|
|
|
+ }
|
|
|
+
|
|
|
+ public function up($id, $name, $data)
|
|
|
+ {
|
|
|
+ $audit = Dever::param('audit', $data);
|
|
|
+ $info = Dever::db('spider/data')->one($id);
|
|
|
+ $wechat = Dever::db('spider/wechat')->one($info['wechat_id']);
|
|
|
+
|
|
|
+ # 查询文章表里有没有
|
|
|
+ $article = Dever::db('content/article')->one(array('wechat_data_id' => $id));
|
|
|
+ if ($audit == 2) {
|
|
|
+ $insert['cate_id'] = $wechat['cate_id'];
|
|
|
+ $insert['author_id'] = $wechat['author_id'];
|
|
|
+ $insert['name'] = $info['name'];
|
|
|
+ $insert['pdate'] = $info['pdate'];
|
|
|
+ $insert['pic_cover'] = $info['cover'];
|
|
|
+ $insert['content'] = $info['content'];
|
|
|
+ $insert['audit'] = 2;
|
|
|
+ $insert['wechat'] = 2;
|
|
|
+ $insert['wechat_data_id'] = $info['id'];
|
|
|
+ if (!$article) {
|
|
|
+ Dever::db('content/article')->insert($insert);
|
|
|
+ } else {
|
|
|
+ $insert['where_id'] = $article['id'];
|
|
|
+ Dever::db('content/article')->update($insert);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if ($article) {
|
|
|
+ $where['where_id'] = $article['id'];
|
|
|
+ $where['audit'] = 1;
|
|
|
+ Dever::db('content/article')->update($where);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ public function get()
|
|
|
+ {
|
|
|
+ $id = Dever::input('id');
|
|
|
+ $info = Dever::db('spider/wechat')->one($id);
|
|
|
+ $name = $info['name'];
|
|
|
+ $wechat = $info['wechat'];
|
|
|
+
|
|
|
+ $url = 'https://weixin.sogou.com/weixin?type=1&query='.$name.'&ie=utf8&s_from=input&_sug_=n&_sug_type_=';
|
|
|
+
|
|
|
+ $content = $this->content($name, $url);
|
|
|
+
|
|
|
+ preg_match_all('/<a target="_blank" uigs="account_name_0" href="(.*?)">(.*?)<\/a>/i', $content, $matches);
|
|
|
+
|
|
|
+ if (isset($matches[1][0])) {
|
|
|
+ $url = $matches[1][0];
|
|
|
+
|
|
|
+ $url = str_replace('&', '&', $url);
|
|
|
+
|
|
|
+ $content = $this->content($name, $url);
|
|
|
+
|
|
|
+ # 获取微信号 <p class="profile_account">微信号: tesexiaozhenzmg</p>
|
|
|
+ if (!$wechat) {
|
|
|
+ preg_match_all('/<p class="profile_account">微信号: (.*?)<\/p>/i', $content, $matches);
|
|
|
+ if (isset($matches[1][0]) && $matches[1][0]) {
|
|
|
+ $wechat = $matches[1][0];
|
|
|
+ Dever::db('spider/wechat')->update(array('where_id' => $id, 'wechat' => $wechat));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ # 获取微信前10条内容
|
|
|
+ preg_match_all('/var msgList = {(.*?)};/i', $content, $matches);
|
|
|
+
|
|
|
+ if (isset($matches[1][0]) && $matches[1][0])
|
|
|
+ {
|
|
|
+ $string = '{'.$matches[1][0].'}';
|
|
|
+ $array = Dever::json_decode($string);
|
|
|
+
|
|
|
+ if (isset($array['list'])) {
|
|
|
+ foreach ($array['list'] as $k => $v) {
|
|
|
+ $this->getArticle($id, $name, $v['app_msg_ext_info']);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private function getArticle($id, $name, $data)
|
|
|
+ {
|
|
|
+ $host = 'https://mp.weixin.qq.com';
|
|
|
+
|
|
|
+ $url = $host . $data['content_url'];
|
|
|
+
|
|
|
+ $url = str_replace('&', '&', $url);
|
|
|
+
|
|
|
+ $content = $this->content($name, $url);
|
|
|
+
|
|
|
+ preg_match_all('/<div class="rich_media_content " id="js_content">([\s\S]*?)<\/div>/i', $content, $matches);
|
|
|
+
|
|
|
+ $result['content'] = '';
|
|
|
+ if (isset($matches[1][0]) && $matches[1][0]) {
|
|
|
+ $result['content'] = $this->convertContent($matches[1][0]);
|
|
|
+ }
|
|
|
+ $result['title'] = $data['title'];
|
|
|
+ $result['cover'] = $this->pic($data['cover']);
|
|
|
+ $result['copyright'] = $data['copyright_stat'] == 11 ? 1 : 2;
|
|
|
+ $result['pdate'] = '';
|
|
|
+
|
|
|
+ # 获取时间
|
|
|
+ preg_match_all('/var ct = "(.*?)";/i', $content, $matches);
|
|
|
+ if ($matches[1][0] && $matches[1][0]) {
|
|
|
+ $result['pdate'] = $matches[1][0];
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ $where['wechat_id'] = $id;
|
|
|
+ $where['name'] = $result['title'];
|
|
|
+ $info = Dever::db('spider/data')->one($where);
|
|
|
+
|
|
|
+ $where['cover'] = $result['cover'];
|
|
|
+ $where['content'] = $result['content'];
|
|
|
+ $where['copyright'] = $result['copyright'];
|
|
|
+ $where['pdate'] = $result['pdate'];
|
|
|
+ if (!$info) {
|
|
|
+ Dever::db('spider/data')->insert($where);
|
|
|
+ } else {
|
|
|
+ $where['where_id'] = $info['id'];
|
|
|
+ Dever::db('spider/data')->update($where);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private function convertContent($content)
|
|
|
+ {
|
|
|
+ $content = trim($content);
|
|
|
+
|
|
|
+ preg_match_all('/data-src="(.*?)"/i', $content, $matches);
|
|
|
+
|
|
|
+ if (isset($matches[1]) && $matches[1]) {
|
|
|
+ foreach ($matches[1] as $k => $v) {
|
|
|
+ $pic = $this->pic($v);
|
|
|
+ $content = str_replace($v, $pic, $content);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ $content = str_replace('data-src', 'src', $content);
|
|
|
+
|
|
|
+ return $content;
|
|
|
+ }
|
|
|
+
|
|
|
+ private function pic($pic)
|
|
|
+ {
|
|
|
+ $data = Dever::load('upload/save.copy?key=1&file=' . urlencode($pic));
|
|
|
+
|
|
|
+ if ($data && isset($data['url'])) {
|
|
|
+ return $data['url'];
|
|
|
+ } else {
|
|
|
+ return '';
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private function header()
|
|
|
+ {
|
|
|
+ $ip = $this->ip();
|
|
|
+ $header = array();
|
|
|
+ $header['Accept'] = 'image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, */*';
|
|
|
+ $header['Connection'] = 'Keep-Alive';
|
|
|
+ $header['Pragma'] = 'no-cache';
|
|
|
+ $header['Accept-Language'] = 'zh-Hans-CN,zh-Hans;q=0.8,en-US;q=0.5,en;q=0.3';
|
|
|
+ $header['User-Agent'] = $this->agent();
|
|
|
+ $header['CLIENT-IP'] = $ip;
|
|
|
+ $header['X-FORWARDED-FOR'] = $ip;
|
|
|
+
|
|
|
+ return $header;
|
|
|
+ }
|
|
|
+
|
|
|
+ private function agent()
|
|
|
+ {
|
|
|
+ $agent = [
|
|
|
+ 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
|
|
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)',
|
|
|
+ 'Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
|
|
+ 'Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)',
|
|
|
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)',
|
|
|
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)',
|
|
|
+ 'Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)',
|
|
|
+ 'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)',
|
|
|
+ 'Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527 (KHTML, like Gecko, Safari/419.3) Arora/0.6',
|
|
|
+ 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1',
|
|
|
+ 'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0',
|
|
|
+ 'Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5',
|
|
|
+ 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6',
|
|
|
+ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
|
|
|
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20',
|
|
|
+ 'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52'];
|
|
|
+ return $agent[rand(0,count($agent) - 1)];;
|
|
|
+ }
|
|
|
+
|
|
|
+ private function ip()
|
|
|
+ {
|
|
|
+ $ip_long = array(
|
|
|
+ array('607649792', '608174079'), //36.56.0.0-36.63.255.255
|
|
|
+ array('1038614528', '1039007743'), //61.232.0.0-61.237.255.255
|
|
|
+ array('1783627776', '1784676351'), //106.80.0.0-106.95.255.255
|
|
|
+ array('2035023872', '2035154943'), //121.76.0.0-121.77.255.255
|
|
|
+ array('2078801920', '2079064063'), //123.232.0.0-123.235.255.255
|
|
|
+ array('-1950089216', '-1948778497'), //139.196.0.0-139.215.255.255
|
|
|
+ array('-1425539072', '-1425014785'), //171.8.0.0-171.15.255.255
|
|
|
+ array('-1236271104', '-1235419137'), //182.80.0.0-182.92.255.255
|
|
|
+ array('-770113536', '-768606209'), //210.25.0.0-210.47.255.255
|
|
|
+ array('-569376768', '-564133889'), //222.16.0.0-222.95.255.255
|
|
|
+ );
|
|
|
+ $rand_key = mt_rand(0, 9);
|
|
|
+ $ip = long2ip(mt_rand($ip_long[$rand_key][0], $ip_long[$rand_key][1]));
|
|
|
+ return $ip;
|
|
|
+ }
|
|
|
+
|
|
|
+ private function content($name, $url)
|
|
|
+ {
|
|
|
+ $key = md5($name) . '_' . md5($url);
|
|
|
+ $time = time();
|
|
|
+ $date = explode('-', date('Y-m-d', $time));
|
|
|
+
|
|
|
+ $file = Dever::path(Dever::data() . 'tmp/', $date[0] . '/' . $date[1] . '/' . $date[2] . '/' . $key);
|
|
|
+ $num = 3600;
|
|
|
+
|
|
|
+ if (!is_file($file) || (is_file($file) && filemtime($file) - $time > $num)) {
|
|
|
+ $header = $this->header();
|
|
|
+ $content = Dever::curl($url, false, 'get', false, $header, $header['User-Agent'], 'http://weixin.sogou.com/');
|
|
|
+ file_put_contents($file, $content);
|
|
|
+ } else {
|
|
|
+ $content = file_get_contents($file);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!$content) {
|
|
|
+ Dever::alert('内容已失效');
|
|
|
+ }
|
|
|
+
|
|
|
+ return $content;
|
|
|
+ }
|
|
|
+}
|