🐛修复TikTok解析失败并添加封面数据

This commit is contained in:
Evil0ctal 2022-06-29 15:33:35 -07:00 committed by GitHub
parent bffaa773ac
commit e12d342449
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,7 +2,7 @@
# -*- encoding: utf-8 -*-
# @Author: https://github.com/Evil0ctal/
# @Time: 2021/11/06
# @Update: 2022/06/27
# @Update: 2022/06/29
# @Function:
# 核心代码估值1块(๑•̀ㅂ•́)و✧
# 用于爬取Douyin/TikTok数据并以字典形式返回。
@ -241,6 +241,12 @@ class Scraper:
video_share_count = str(js['item_list'][0]['statistics']['share_count'])
# 上传时间戳
video_create_time = str(js['item_list'][0]['create_time'])
# 视频封面
video_cover = js['item_list'][0]['video']['cover']['url_list'][0]
# 视频动态封面
video_dynamic_cover = js['item_list'][0]['video']['dynamic_cover']['url_list'][0]
# 视频原始封面
video_origin_cover = js['item_list'][0]['video']['origin_cover']['url_list'][0]
# 将话题保存在列表中
video_hashtags = []
for tag in js['item_list'][0]['text_extra']:
@ -275,6 +281,9 @@ class Scraper:
'video_play_count': video_play_count,
'video_share_count': video_share_count,
'video_create_time': video_create_time,
'video_cover': video_cover,
'video_dynamic_cover': video_dynamic_cover,
'video_origin_cover': video_origin_cover,
'video_hashtags': video_hashtags}
return video_data
except Exception as e:
@ -310,7 +319,6 @@ class Scraper:
video_id = re.findall('video/(\d+)?', original_url)[0]
print('获取到的TikTok视频ID是{}'.format(video_id))
# 尝试从TikTok网页获取部分视频数据失败后判断为图集
try:
try:
tiktok_headers = self.tiktok_headers
html = requests.get(url=original_url, headers=tiktok_headers, proxies=self.proxies)
@ -329,6 +337,8 @@ class Scraper:
response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text
# 将API获取到的内容格式化为JSON
result = json.loads(response)
for i in result["aweme_details"][0]:
if i != 'image_post_info':
# 类型为视频
url_type = 'video'
print('类型为视频')
@ -371,10 +381,19 @@ class Scraper:
video_download_count = result["aweme_details"][0]['statistics']['download_count']
# 分享次数
video_share_count = result["aweme_details"][0]['statistics']['share_count']
# 视频封面
video_cover = result["aweme_details"][0]['video']['cover']['url_list'][0]
# 视频动态封面
video_dynamic_cover = result["aweme_details"][0]['video']['dynamic_cover']['url_list'][0]
# 视频原始封面
video_origin_cover = result["aweme_details"][0]['video']['origin_cover']['url_list'][0]
# 将话题保存在列表中
video_hashtags = []
for tag in result["aweme_details"][0]['text_extra']:
if 'hashtag_name' in tag:
video_hashtags.append(tag['hashtag_name'])
else:
continue
if video_info != None:
# 作者粉丝数量
video_author_followerCount = video_info['authorStats']['followerCount']
@ -429,11 +448,14 @@ class Scraper:
'video_author_heartCount': video_author_heartCount,
'video_author_videoCount': video_author_videoCount,
'video_author_diggCount': video_author_diggCount,
'video_cover': video_cover,
'video_dynamic_cover': video_dynamic_cover,
'video_origin_cover': video_origin_cover,
'video_hashtags': video_hashtags
}
# 返回包含数据的字典
return video_data
except:
else:
# 判断链接是图集链接
# https://www.tiktok.com/@tamm6636/video/7105440975878655278
video_id = re.findall('video/(\d+)?', original_url)[0]