🐛 Fix the TikTok pictures album

This commit is contained in:
Evil0ctal 2022-08-08 18:15:08 -07:00 committed by GitHub
parent 7961f11e1e
commit 86683c8219
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,7 +2,7 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
# @Author: https://github.com/Evil0ctal/ # @Author: https://github.com/Evil0ctal/
# @Time: 2021/11/06 # @Time: 2021/11/06
# @Update: 2022/07/29 # @Update: 2022/08/08
# @Function: # @Function:
# 核心代码估值1块(๑•̀ㅂ•́)و✧ # 核心代码估值1块(๑•̀ㅂ•́)و✧
# 用于爬取Douyin/TikTok数据并以字典形式返回。 # 用于爬取Douyin/TikTok数据并以字典形式返回。
@ -334,211 +334,197 @@ class Scraper:
except: except:
video_info = None video_info = None
# 从TikTok官方API获取部分视频数据 # 从TikTok官方API获取部分视频数据
# 新API2022年7月29日 https://api.tiktokv.com/aweme/v1/aweme/detail/?aweme_id={}
# 旧API https://api.tiktokv.com/aweme/v1/multi/aweme/detail/?aweme_ids=%5B{}%5D
tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/aweme/detail/?aweme_id={}'.format( tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/aweme/detail/?aweme_id={}'.format(
video_id) video_id)
print('正在请求API链接:{}'.format(tiktok_api_link)) print('正在请求API链接:{}'.format(tiktok_api_link))
response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text
# 将API获取到的内容格式化为JSON # 将API获取到的内容格式化为JSON
result = json.loads(response) result = json.loads(response)
for i in result["aweme_detail"]: if 'image_post_info' in response:
if i != 'image_post_info': # 判断链接是图集链接
# 类型为视频 url_type = 'album'
url_type = 'video' print('类型为图集')
print('类型为视频') # 视频标题
# 无水印视频链接 album_title = result["aweme_detail"]["desc"]
nwm_video_url = result["aweme_detail"]["video"]["play_addr"]["url_list"][0] # 视频作者昵称
try: album_author_nickname = result["aweme_detail"]['author']["nickname"]
# 有水印视频链接 # 视频作者ID
wm_video_url = result["aweme_detail"]["video"]['download_addr']['url_list'][0] album_author_id = result["aweme_detail"]['author']["unique_id"]
except Exception: # 上传时间戳
# 有水印视频链接 album_create_time = result["aweme_detail"]['create_time']
wm_video_url = 'None' # 视频ID
# 视频标题 album_aweme_id = result["aweme_detail"]['statistics']['aweme_id']
video_title = result["aweme_detail"]["desc"] try:
# 视频作者昵称 # 视频BGM标题
video_author_nickname = result["aweme_detail"]['author']["nickname"] album_music_title = result["aweme_detail"]['music']['title']
# 视频作者ID # 视频BGM作者
video_author_id = result["aweme_detail"]['author']["unique_id"] album_music_author = result["aweme_detail"]['music']['author']
# 上传时间戳 # 视频BGM ID
video_create_time = result["aweme_detail"]['create_time'] album_music_id = result["aweme_detail"]['music']['id']
# 视频ID # 视频BGM链接
video_aweme_id = result["aweme_detail"]['statistics']['aweme_id'] album_music_url = result["aweme_detail"]['music']['play_url']['url_list'][0]
try: except:
# 视频BGM标题 album_music_title, album_music_author, album_music_id, album_music_url = "None", "None", "None", "None"
video_music_title = result["aweme_detail"]['music']['title'] # 评论数量
# 视频BGM作者 album_comment_count = result["aweme_detail"]['statistics']['comment_count']
video_music_author = result["aweme_detail"]['music']['author'] # 获赞数量
# 视频BGM ID album_digg_count = result["aweme_detail"]['statistics']['digg_count']
video_music_id = result["aweme_detail"]['music']['id'] # 播放次数
# 视频BGM链接 album_play_count = result["aweme_detail"]['statistics']['play_count']
video_music_url = result["aweme_detail"]['music']['play_url']['url_list'][0] # 下载次数
except: album_download_count = result["aweme_detail"]['statistics']['download_count']
video_music_title, video_music_author, video_music_id, video_music_url = "None", "None", "None", "None" # 分享次数
# 评论数量 album_share_count = result["aweme_detail"]['statistics']['share_count']
video_comment_count = result["aweme_detail"]['statistics']['comment_count'] # 无水印图集
# 获赞数量 album_list = []
video_digg_count = result["aweme_detail"]['statistics']['digg_count'] for i in result["aweme_detail"]['image_post_info']['images']:
# 播放次数 album_list.append(i['display_image']['url_list'][0])
video_play_count = result["aweme_detail"]['statistics']['play_count'] # 结束时间
# 下载次数 end = time.time()
video_download_count = result["aweme_detail"]['statistics']['download_count'] # 解析时间
# 分享次数 analyze_time = format((end - start), '.4f')
video_share_count = result["aweme_detail"]['statistics']['share_count'] # 储存数据
# 视频封面 album_data = {'status': 'success',
video_cover = result["aweme_detail"]['video']['cover']['url_list'][0] 'analyze_time': (analyze_time + 's'),
# 视频动态封面 'url_type': url_type,
video_dynamic_cover = result["aweme_detail"]['video']['dynamic_cover']['url_list'][0] 'api_url': tiktok_api_link,
# 视频原始封面 'original_url': original_url,
video_origin_cover = result["aweme_detail"]['video']['origin_cover']['url_list'][0] 'platform': 'tiktok',
# 将话题保存在列表中 'album_title': album_title,
video_hashtags = [] 'album_list': album_list,
for tag in result["aweme_detail"]['text_extra']: 'album_author_nickname': album_author_nickname,
if 'hashtag_name' in tag: 'album_author_id': album_author_id,
video_hashtags.append(tag['hashtag_name']) 'album_create_time': album_create_time,
else: 'album_aweme_id': album_aweme_id,
continue 'album_music_title': album_music_title,
if video_info != None: 'album_music_author': album_music_author,
# 作者粉丝数量 'album_music_id': album_music_id,
video_author_followerCount = video_info['authorStats']['followerCount'] 'album_music_url': album_music_url,
# 作者关注数量 'album_comment_count': album_comment_count,
video_author_followingCount = video_info['authorStats']['followingCount'] 'album_digg_count': album_digg_count,
# 作者获赞数量 'album_play_count': album_play_count,
video_author_heartCount = video_info['authorStats']['heartCount'] 'album_share_count': album_share_count,
# 作者视频数量 'album_download_count': album_download_count
video_author_videoCount = video_info['authorStats']['videoCount'] }
# 作者已赞作品数量 # 返回包含数据的字典
video_author_diggCount = video_info['authorStats']['diggCount'] return album_data
else:
# 类型为视频
url_type = 'video'
print('类型为视频')
# 无水印视频链接
nwm_video_url = result["aweme_detail"]["video"]["play_addr"]["url_list"][0]
try:
# 有水印视频链接
wm_video_url = result["aweme_detail"]["video"]['download_addr']['url_list'][0]
except Exception:
# 有水印视频链接
wm_video_url = 'None'
# 视频标题
video_title = result["aweme_detail"]["desc"]
# 视频作者昵称
video_author_nickname = result["aweme_detail"]['author']["nickname"]
# 视频作者ID
video_author_id = result["aweme_detail"]['author']["unique_id"]
# 上传时间戳
video_create_time = result["aweme_detail"]['create_time']
# 视频ID
video_aweme_id = result["aweme_detail"]['statistics']['aweme_id']
try:
# 视频BGM标题
video_music_title = result["aweme_detail"]['music']['title']
# 视频BGM作者
video_music_author = result["aweme_detail"]['music']['author']
# 视频BGM ID
video_music_id = result["aweme_detail"]['music']['id']
# 视频BGM链接
video_music_url = result["aweme_detail"]['music']['play_url']['url_list'][0]
except:
video_music_title, video_music_author, video_music_id, video_music_url = "None", "None", "None", "None"
# 评论数量
video_comment_count = result["aweme_detail"]['statistics']['comment_count']
# 获赞数量
video_digg_count = result["aweme_detail"]['statistics']['digg_count']
# 播放次数
video_play_count = result["aweme_detail"]['statistics']['play_count']
# 下载次数
video_download_count = result["aweme_detail"]['statistics']['download_count']
# 分享次数
video_share_count = result["aweme_detail"]['statistics']['share_count']
# 视频封面
video_cover = result["aweme_detail"]['video']['cover']['url_list'][0]
# 视频动态封面
video_dynamic_cover = result["aweme_detail"]['video']['dynamic_cover']['url_list'][0]
# 视频原始封面
video_origin_cover = result["aweme_detail"]['video']['origin_cover']['url_list'][0]
# 将话题保存在列表中
video_hashtags = []
for tag in result["aweme_detail"]['text_extra']:
if 'hashtag_name' in tag:
video_hashtags.append(tag['hashtag_name'])
else: else:
# 作者粉丝数量 continue
video_author_followerCount = 'None' if video_info != None:
# 作者关注数量 # 作者粉丝数量
video_author_followingCount = 'None' video_author_followerCount = video_info['authorStats']['followerCount']
# 作者获赞数量 # 作者关注数量
video_author_heartCount = 'None' video_author_followingCount = video_info['authorStats']['followingCount']
# 作者视频数量 # 作者获赞数量
video_author_videoCount = 'None' video_author_heartCount = video_info['authorStats']['heartCount']
# 作者已赞作品数量 # 作者视频数量
video_author_diggCount = 'None' video_author_videoCount = video_info['authorStats']['videoCount']
# 结束时间 # 作者已赞作品数量
end = time.time() video_author_diggCount = video_info['authorStats']['diggCount']
# 解析时间
analyze_time = format((end - start), '.4f')
# 储存数据
video_data = {'status': 'success',
'analyze_time': (analyze_time + 's'),
'url_type': url_type,
'api_url': tiktok_api_link,
'original_url': original_url,
'platform': 'tiktok',
'video_title': video_title,
'nwm_video_url': nwm_video_url,
'wm_video_url': wm_video_url,
'video_author_nickname': video_author_nickname,
'video_author_id': video_author_id,
'video_create_time': video_create_time,
'video_aweme_id': video_aweme_id,
'video_music_title': video_music_title,
'video_music_author': video_music_author,
'video_music_id': video_music_id,
'video_music_url': video_music_url,
'video_comment_count': video_comment_count,
'video_digg_count': video_digg_count,
'video_play_count': video_play_count,
'video_share_count': video_share_count,
'video_download_count': video_download_count,
'video_author_followerCount': video_author_followerCount,
'video_author_followingCount': video_author_followingCount,
'video_author_heartCount': video_author_heartCount,
'video_author_videoCount': video_author_videoCount,
'video_author_diggCount': video_author_diggCount,
'video_cover': video_cover,
'video_dynamic_cover': video_dynamic_cover,
'video_origin_cover': video_origin_cover,
'video_hashtags': video_hashtags
}
# 返回包含数据的字典
return video_data
else: else:
# 判断链接是图集链接 # 作者粉丝数量
# https://www.tiktok.com/@tamm6636/video/7105440975878655278 video_author_followerCount = 'None'
video_id = re.findall('video/(\d+)?', original_url)[0] # 作者关注数量
print('视频ID为: {}'.format(video_id)) video_author_followingCount = 'None'
# 从TikTok官方API获取部分视频数据 # 作者获赞数量
tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/multi/aweme/detail/?aweme_ids=%5B{}%5D'.format( video_author_heartCount = 'None'
video_id) # 作者视频数量
print('正在请求API链接:{}'.format(tiktok_api_link)) video_author_videoCount = 'None'
response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text # 作者已赞作品数量
# 将API获取到的内容格式化为JSON video_author_diggCount = 'None'
result = json.loads(response) # 结束时间
# 类型为图集 end = time.time()
url_type = 'album' # 解析时间
print('类型为图集') analyze_time = format((end - start), '.4f')
# 视频标题 # 储存数据
album_title = result["aweme_detail"]["desc"] video_data = {'status': 'success',
# 视频作者昵称 'analyze_time': (analyze_time + 's'),
album_author_nickname = result["aweme_detail"]['author']["nickname"] 'url_type': url_type,
# 视频作者ID 'api_url': tiktok_api_link,
album_author_id = result["aweme_detail"]['author']["unique_id"] 'original_url': original_url,
# 上传时间戳 'platform': 'tiktok',
album_create_time = result["aweme_detail"]['create_time'] 'video_title': video_title,
# 视频ID 'nwm_video_url': nwm_video_url,
album_aweme_id = result["aweme_detail"]['statistics']['aweme_id'] 'wm_video_url': wm_video_url,
try: 'video_author_nickname': video_author_nickname,
# 视频BGM标题 'video_author_id': video_author_id,
album_music_title = result["aweme_detail"]['music']['title'] 'video_create_time': video_create_time,
# 视频BGM作者 'video_aweme_id': video_aweme_id,
album_music_author = result["aweme_detail"]['music']['author'] 'video_music_title': video_music_title,
# 视频BGM ID 'video_music_author': video_music_author,
album_music_id = result["aweme_detail"]['music']['id'] 'video_music_id': video_music_id,
# 视频BGM链接 'video_music_url': video_music_url,
album_music_url = result["aweme_detail"]['music']['play_url']['url_list'][0] 'video_comment_count': video_comment_count,
except: 'video_digg_count': video_digg_count,
album_music_title, album_music_author, album_music_id, album_music_url = "None", "None", "None", "None" 'video_play_count': video_play_count,
# 评论数量 'video_share_count': video_share_count,
album_comment_count = result["aweme_detail"]['statistics']['comment_count'] 'video_download_count': video_download_count,
# 获赞数量 'video_author_followerCount': video_author_followerCount,
album_digg_count = result["aweme_detail"]['statistics']['digg_count'] 'video_author_followingCount': video_author_followingCount,
# 播放次数 'video_author_heartCount': video_author_heartCount,
album_play_count = result["aweme_detail"]['statistics']['play_count'] 'video_author_videoCount': video_author_videoCount,
# 下载次数 'video_author_diggCount': video_author_diggCount,
album_download_count = result["aweme_detail"]['statistics']['download_count'] 'video_cover': video_cover,
# 分享次数 'video_dynamic_cover': video_dynamic_cover,
album_share_count = result["aweme_detail"]['statistics']['share_count'] 'video_origin_cover': video_origin_cover,
# 无水印图集 'video_hashtags': video_hashtags
album_list = [] }
for i in result["aweme_detail"]['image_post_info']['images']: # 返回包含数据的字典
album_list.append(i['display_image']['url_list'][0]) return video_data
# 结束时间
end = time.time()
# 解析时间
analyze_time = format((end - start), '.4f')
# 储存数据
album_data = {'status': 'success',
'analyze_time': (analyze_time + 's'),
'url_type': url_type,
'api_url': tiktok_api_link,
'original_url': original_url,
'platform': 'tiktok',
'album_title': album_title,
'album_list': album_list,
'album_author_nickname': album_author_nickname,
'album_author_id': album_author_id,
'album_create_time': album_create_time,
'album_aweme_id': album_aweme_id,
'album_music_title': album_music_title,
'album_music_author': album_music_author,
'album_music_id': album_music_id,
'album_music_url': album_music_url,
'album_comment_count': album_comment_count,
'album_digg_count': album_digg_count,
'album_play_count': album_play_count,
'album_share_count': album_share_count,
'album_download_count': album_download_count
}
# 返回包含数据的字典
return album_data
except Exception as e: except Exception as e:
# 异常捕获 # 异常捕获
return {'status': 'failed', 'reason': e, 'function': 'Scraper.tiktok()', 'value': original_url} return {'status': 'failed', 'reason': e, 'function': 'Scraper.tiktok()', 'value': original_url}