tiktok_crawler.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. # -*- coding: utf-8 -*-
  2. # @Author : lihuiwen
  3. # @file : tiktok_crawler
  4. # @Email : huiwennear@163.com
  5. # @Time : 2024/5/23 16:59
  6. """
  7. Tiktok评论爬取
  8. """
  9. from utils.common_utils import CommonUtils
  10. import requests
  11. from urllib.parse import urlparse
  12. class TiktokComment:
  13. def __init__(self):
  14. self.common_utils = CommonUtils()
  15. self.comment_list_headers = {
  16. 'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
  17. 'sec-ch-ua-mobile': '?0',
  18. 'User-Agent': self.common_utils.user_agent,
  19. 'sec-ch-ua-platform': '"Windows"',
  20. 'Accept': '*/*',
  21. 'Sec-Fetch-Site': 'same-origin',
  22. 'Sec-Fetch-Mode': 'cors',
  23. 'Sec-Fetch-Dest': 'empty',
  24. 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
  25. }
  26. def get_comment_list(self, req_url):
  27. aweme_id = urlparse(req_url).path.split("/")[-1]
  28. ms_token = self.common_utils.get_ms_token()
  29. req_url = f"https://www.tiktok.com/api/comment/list/?WebIdLastTime=1715249710&aid=1988&app_language=ja-JP&app_name=tiktok_web&aweme_id={aweme_id}&browser_language=zh-CN&browser_name=Mozilla&browser_online=true&browser_platform=Win32&browser_version=5.0%20%28Windows%20NT%2010.0%3B%20Win64%3B%20x64%29%20AppleWebKit%2F537.36%20%28KHTML%2C%20like%20Gecko%29%20Chrome%2F123.0.0.0%20Safari%2F537.36&channel=tiktok_web&cookie_enabled=true&count=20&current_region=JP&cursor=0&device_id=7366941338308609569&device_platform=web_pc&enter_from=tiktok_web&focus_state=true&fromWeb=1&from_page=video&history_len=2&is_fullscreen=false&is_non_personalized=false&is_page_visible=true&odinId=7367172442253296673&os=windows&priority_region=&referer=&region=GB&screen_height=1080&screen_width=1920&tz_name=Asia%2FShanghai&webcast_language=zh-Hans&msToken={ms_token}"
  30. xbogus = self.common_utils.get_xbogus(req_url, self.common_utils.user_agent)
  31. req_url = req_url + f'&X-Bogus={xbogus}&_signature=_02B4Z6wo000016M20awAAIDAnp.LMKuZmC-jNtUAAI6L17'
  32. response = requests.request("GET", req_url, headers=self.comment_list_headers,verify=False, timeout=3)
  33. if (response.text):
  34. req_json = response.json()
  35. total = req_json.get('total')
  36. comments = req_json.get('comments')
  37. if (comments):
  38. for comment_index in range(len(comments)):
  39. comment_item = comments[comment_index]
  40. print(f"爬取成功:{comment_item.get('user').get('nickname')}:{comment_item.get('text')}")
  41. else:
  42. print(f"爬取结束:评论数={total}")
  43. else:
  44. print(f"爬取失败或没有评论")
  45. if __name__ == '__main__':
  46. req_url = "https://www.tiktok.com/@.jisvnq/video/7341777664224677153"
  47. tiktok_comment = TiktokComment()
  48. tiktok_comment.get_comment_list(req_url)