mirror of
https://github.com/Evil0ctal/Douyin_TikTok_Download_API.git
synced 2025-04-23 05:24:26 +08:00
🐛修复TikTok解析失败并添加封面数据
This commit is contained in:
parent
bffaa773ac
commit
e12d342449
424
scraper.py
424
scraper.py
@ -2,7 +2,7 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
# @Author: https://github.com/Evil0ctal/
|
||||
# @Time: 2021/11/06
|
||||
# @Update: 2022/06/27
|
||||
# @Update: 2022/06/29
|
||||
# @Function:
|
||||
# 核心代码,估值1块(๑•̀ㅂ•́)و✧
|
||||
# 用于爬取Douyin/TikTok数据并以字典形式返回。
|
||||
@ -241,6 +241,12 @@ class Scraper:
|
||||
video_share_count = str(js['item_list'][0]['statistics']['share_count'])
|
||||
# 上传时间戳
|
||||
video_create_time = str(js['item_list'][0]['create_time'])
|
||||
# 视频封面
|
||||
video_cover = js['item_list'][0]['video']['cover']['url_list'][0]
|
||||
# 视频动态封面
|
||||
video_dynamic_cover = js['item_list'][0]['video']['dynamic_cover']['url_list'][0]
|
||||
# 视频原始封面
|
||||
video_origin_cover = js['item_list'][0]['video']['origin_cover']['url_list'][0]
|
||||
# 将话题保存在列表中
|
||||
video_hashtags = []
|
||||
for tag in js['item_list'][0]['text_extra']:
|
||||
@ -275,6 +281,9 @@ class Scraper:
|
||||
'video_play_count': video_play_count,
|
||||
'video_share_count': video_share_count,
|
||||
'video_create_time': video_create_time,
|
||||
'video_cover': video_cover,
|
||||
'video_dynamic_cover': video_dynamic_cover,
|
||||
'video_origin_cover': video_origin_cover,
|
||||
'video_hashtags': video_hashtags}
|
||||
return video_data
|
||||
except Exception as e:
|
||||
@ -311,207 +320,220 @@ class Scraper:
|
||||
print('获取到的TikTok视频ID是{}'.format(video_id))
|
||||
# 尝试从TikTok网页获取部分视频数据,失败后判断为图集
|
||||
try:
|
||||
try:
|
||||
tiktok_headers = self.tiktok_headers
|
||||
html = requests.get(url=original_url, headers=tiktok_headers, proxies=self.proxies)
|
||||
# 正则检索网页中存在的JSON信息
|
||||
resp = re.search('"ItemModule":{(.*)},"UserModule":', html.text).group(1)
|
||||
resp_info = ('{"ItemModule":{' + resp + '}}')
|
||||
result = json.loads(resp_info)
|
||||
# 从网页中获得的视频JSON数据
|
||||
video_info = result["ItemModule"][video_id]
|
||||
except:
|
||||
video_info = None
|
||||
# 从TikTok官方API获取部分视频数据
|
||||
tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/multi/aweme/detail/?aweme_ids=%5B{}%5D'.format(
|
||||
video_id)
|
||||
print('正在请求API链接:{}'.format(tiktok_api_link))
|
||||
response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text
|
||||
# 将API获取到的内容格式化为JSON
|
||||
result = json.loads(response)
|
||||
# 类型为视频
|
||||
url_type = 'video'
|
||||
print('类型为视频')
|
||||
# 无水印视频链接
|
||||
nwm_video_url = result["aweme_details"][0]["video"]["play_addr"]["url_list"][0]
|
||||
try:
|
||||
# 有水印视频链接
|
||||
wm_video_url = result["aweme_details"][0]["video"]['download_addr']['url_list'][0]
|
||||
except Exception:
|
||||
# 有水印视频链接
|
||||
wm_video_url = 'None'
|
||||
# 视频标题
|
||||
video_title = result["aweme_details"][0]["desc"]
|
||||
# 视频作者昵称
|
||||
video_author_nickname = result["aweme_details"][0]['author']["nickname"]
|
||||
# 视频作者ID
|
||||
video_author_id = result["aweme_details"][0]['author']["unique_id"]
|
||||
# 上传时间戳
|
||||
video_create_time = result["aweme_details"][0]['create_time']
|
||||
# 视频ID
|
||||
video_aweme_id = result["aweme_details"][0]['statistics']['aweme_id']
|
||||
try:
|
||||
# 视频BGM标题
|
||||
video_music_title = result["aweme_details"][0]['music']['title']
|
||||
# 视频BGM作者
|
||||
video_music_author = result["aweme_details"][0]['music']['author']
|
||||
# 视频BGM ID
|
||||
video_music_id = result["aweme_details"][0]['music']['id']
|
||||
# 视频BGM链接
|
||||
video_music_url = result["aweme_details"][0]['music']['play_url']['url_list'][0]
|
||||
except:
|
||||
video_music_title, video_music_author, video_music_id, video_music_url = "None", "None", "None", "None"
|
||||
# 评论数量
|
||||
video_comment_count = result["aweme_details"][0]['statistics']['comment_count']
|
||||
# 获赞数量
|
||||
video_digg_count = result["aweme_details"][0]['statistics']['digg_count']
|
||||
# 播放次数
|
||||
video_play_count = result["aweme_details"][0]['statistics']['play_count']
|
||||
# 下载次数
|
||||
video_download_count = result["aweme_details"][0]['statistics']['download_count']
|
||||
# 分享次数
|
||||
video_share_count = result["aweme_details"][0]['statistics']['share_count']
|
||||
# 将话题保存在列表中
|
||||
video_hashtags = []
|
||||
for tag in result["aweme_details"][0]['text_extra']:
|
||||
video_hashtags.append(tag['hashtag_name'])
|
||||
if video_info != None:
|
||||
# 作者粉丝数量
|
||||
video_author_followerCount = video_info['authorStats']['followerCount']
|
||||
# 作者关注数量
|
||||
video_author_followingCount = video_info['authorStats']['followingCount']
|
||||
# 作者获赞数量
|
||||
video_author_heartCount = video_info['authorStats']['heartCount']
|
||||
# 作者视频数量
|
||||
video_author_videoCount = video_info['authorStats']['videoCount']
|
||||
# 作者已赞作品数量
|
||||
video_author_diggCount = video_info['authorStats']['diggCount']
|
||||
else:
|
||||
# 作者粉丝数量
|
||||
video_author_followerCount = 'None'
|
||||
# 作者关注数量
|
||||
video_author_followingCount = 'None'
|
||||
# 作者获赞数量
|
||||
video_author_heartCount = 'None'
|
||||
# 作者视频数量
|
||||
video_author_videoCount = 'None'
|
||||
# 作者已赞作品数量
|
||||
video_author_diggCount = 'None'
|
||||
# 结束时间
|
||||
end = time.time()
|
||||
# 解析时间
|
||||
analyze_time = format((end - start), '.4f')
|
||||
# 储存数据
|
||||
video_data = {'status': 'success',
|
||||
'analyze_time': (analyze_time + 's'),
|
||||
'url_type': url_type,
|
||||
'api_url': tiktok_api_link,
|
||||
'original_url': original_url,
|
||||
'platform': 'tiktok',
|
||||
'video_title': video_title,
|
||||
'nwm_video_url': nwm_video_url,
|
||||
'wm_video_url': wm_video_url,
|
||||
'video_author_nickname': video_author_nickname,
|
||||
'video_author_id': video_author_id,
|
||||
'video_create_time': video_create_time,
|
||||
'video_aweme_id': video_aweme_id,
|
||||
'video_music_title': video_music_title,
|
||||
'video_music_author': video_music_author,
|
||||
'video_music_id': video_music_id,
|
||||
'video_music_url': video_music_url,
|
||||
'video_comment_count': video_comment_count,
|
||||
'video_digg_count': video_digg_count,
|
||||
'video_play_count': video_play_count,
|
||||
'video_share_count': video_share_count,
|
||||
'video_download_count': video_download_count,
|
||||
'video_author_followerCount': video_author_followerCount,
|
||||
'video_author_followingCount': video_author_followingCount,
|
||||
'video_author_heartCount': video_author_heartCount,
|
||||
'video_author_videoCount': video_author_videoCount,
|
||||
'video_author_diggCount': video_author_diggCount,
|
||||
'video_hashtags': video_hashtags
|
||||
}
|
||||
# 返回包含数据的字典
|
||||
return video_data
|
||||
tiktok_headers = self.tiktok_headers
|
||||
html = requests.get(url=original_url, headers=tiktok_headers, proxies=self.proxies)
|
||||
# 正则检索网页中存在的JSON信息
|
||||
resp = re.search('"ItemModule":{(.*)},"UserModule":', html.text).group(1)
|
||||
resp_info = ('{"ItemModule":{' + resp + '}}')
|
||||
result = json.loads(resp_info)
|
||||
# 从网页中获得的视频JSON数据
|
||||
video_info = result["ItemModule"][video_id]
|
||||
except:
|
||||
# 判断链接是图集链接
|
||||
# https://www.tiktok.com/@tamm6636/video/7105440975878655278
|
||||
video_id = re.findall('video/(\d+)?', original_url)[0]
|
||||
print('视频ID为: {}'.format(video_id))
|
||||
# 从TikTok官方API获取部分视频数据
|
||||
tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/multi/aweme/detail/?aweme_ids=%5B{}%5D'.format(
|
||||
video_id)
|
||||
print('正在请求API链接:{}'.format(tiktok_api_link))
|
||||
response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text
|
||||
# 将API获取到的内容格式化为JSON
|
||||
result = json.loads(response)
|
||||
# 类型为图集
|
||||
url_type = 'album'
|
||||
print('类型为图集')
|
||||
# 视频标题
|
||||
album_title = result["aweme_details"][0]["desc"]
|
||||
# 视频作者昵称
|
||||
album_author_nickname = result["aweme_details"][0]['author']["nickname"]
|
||||
# 视频作者ID
|
||||
album_author_id = result["aweme_details"][0]['author']["unique_id"]
|
||||
# 上传时间戳
|
||||
album_create_time = result["aweme_details"][0]['create_time']
|
||||
# 视频ID
|
||||
album_aweme_id = result["aweme_details"][0]['statistics']['aweme_id']
|
||||
try:
|
||||
# 视频BGM标题
|
||||
album_music_title = result["aweme_details"][0]['music']['title']
|
||||
# 视频BGM作者
|
||||
album_music_author = result["aweme_details"][0]['music']['author']
|
||||
# 视频BGM ID
|
||||
album_music_id = result["aweme_details"][0]['music']['id']
|
||||
# 视频BGM链接
|
||||
album_music_url = result["aweme_details"][0]['music']['play_url']['url_list'][0]
|
||||
except:
|
||||
album_music_title, album_music_author, album_music_id, album_music_url = "None", "None", "None", "None"
|
||||
# 评论数量
|
||||
album_comment_count = result["aweme_details"][0]['statistics']['comment_count']
|
||||
# 获赞数量
|
||||
album_digg_count = result["aweme_details"][0]['statistics']['digg_count']
|
||||
# 播放次数
|
||||
album_play_count = result["aweme_details"][0]['statistics']['play_count']
|
||||
# 下载次数
|
||||
album_download_count = result["aweme_details"][0]['statistics']['download_count']
|
||||
# 分享次数
|
||||
album_share_count = result["aweme_details"][0]['statistics']['share_count']
|
||||
# 无水印图集
|
||||
album_list = []
|
||||
for i in result["aweme_details"][0]['image_post_info']['images']:
|
||||
album_list.append(i['display_image']['url_list'][0])
|
||||
# 结束时间
|
||||
end = time.time()
|
||||
# 解析时间
|
||||
analyze_time = format((end - start), '.4f')
|
||||
# 储存数据
|
||||
album_data = {'status': 'success',
|
||||
'analyze_time': (analyze_time + 's'),
|
||||
'url_type': url_type,
|
||||
'api_url': tiktok_api_link,
|
||||
'original_url': original_url,
|
||||
'platform': 'tiktok',
|
||||
'album_title': album_title,
|
||||
'album_list': album_list,
|
||||
'album_author_nickname': album_author_nickname,
|
||||
'album_author_id': album_author_id,
|
||||
'album_create_time': album_create_time,
|
||||
'album_aweme_id': album_aweme_id,
|
||||
'album_music_title': album_music_title,
|
||||
'album_music_author': album_music_author,
|
||||
'album_music_id': album_music_id,
|
||||
'album_music_url': album_music_url,
|
||||
'album_comment_count': album_comment_count,
|
||||
'album_digg_count': album_digg_count,
|
||||
'album_play_count': album_play_count,
|
||||
'album_share_count': album_share_count,
|
||||
'album_download_count': album_download_count
|
||||
}
|
||||
# 返回包含数据的字典
|
||||
return album_data
|
||||
video_info = None
|
||||
# 从TikTok官方API获取部分视频数据
|
||||
tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/multi/aweme/detail/?aweme_ids=%5B{}%5D'.format(
|
||||
video_id)
|
||||
print('正在请求API链接:{}'.format(tiktok_api_link))
|
||||
response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text
|
||||
# 将API获取到的内容格式化为JSON
|
||||
result = json.loads(response)
|
||||
for i in result["aweme_details"][0]:
|
||||
if i != 'image_post_info':
|
||||
# 类型为视频
|
||||
url_type = 'video'
|
||||
print('类型为视频')
|
||||
# 无水印视频链接
|
||||
nwm_video_url = result["aweme_details"][0]["video"]["play_addr"]["url_list"][0]
|
||||
try:
|
||||
# 有水印视频链接
|
||||
wm_video_url = result["aweme_details"][0]["video"]['download_addr']['url_list'][0]
|
||||
except Exception:
|
||||
# 有水印视频链接
|
||||
wm_video_url = 'None'
|
||||
# 视频标题
|
||||
video_title = result["aweme_details"][0]["desc"]
|
||||
# 视频作者昵称
|
||||
video_author_nickname = result["aweme_details"][0]['author']["nickname"]
|
||||
# 视频作者ID
|
||||
video_author_id = result["aweme_details"][0]['author']["unique_id"]
|
||||
# 上传时间戳
|
||||
video_create_time = result["aweme_details"][0]['create_time']
|
||||
# 视频ID
|
||||
video_aweme_id = result["aweme_details"][0]['statistics']['aweme_id']
|
||||
try:
|
||||
# 视频BGM标题
|
||||
video_music_title = result["aweme_details"][0]['music']['title']
|
||||
# 视频BGM作者
|
||||
video_music_author = result["aweme_details"][0]['music']['author']
|
||||
# 视频BGM ID
|
||||
video_music_id = result["aweme_details"][0]['music']['id']
|
||||
# 视频BGM链接
|
||||
video_music_url = result["aweme_details"][0]['music']['play_url']['url_list'][0]
|
||||
except:
|
||||
video_music_title, video_music_author, video_music_id, video_music_url = "None", "None", "None", "None"
|
||||
# 评论数量
|
||||
video_comment_count = result["aweme_details"][0]['statistics']['comment_count']
|
||||
# 获赞数量
|
||||
video_digg_count = result["aweme_details"][0]['statistics']['digg_count']
|
||||
# 播放次数
|
||||
video_play_count = result["aweme_details"][0]['statistics']['play_count']
|
||||
# 下载次数
|
||||
video_download_count = result["aweme_details"][0]['statistics']['download_count']
|
||||
# 分享次数
|
||||
video_share_count = result["aweme_details"][0]['statistics']['share_count']
|
||||
# 视频封面
|
||||
video_cover = result["aweme_details"][0]['video']['cover']['url_list'][0]
|
||||
# 视频动态封面
|
||||
video_dynamic_cover = result["aweme_details"][0]['video']['dynamic_cover']['url_list'][0]
|
||||
# 视频原始封面
|
||||
video_origin_cover = result["aweme_details"][0]['video']['origin_cover']['url_list'][0]
|
||||
# 将话题保存在列表中
|
||||
video_hashtags = []
|
||||
for tag in result["aweme_details"][0]['text_extra']:
|
||||
if 'hashtag_name' in tag:
|
||||
video_hashtags.append(tag['hashtag_name'])
|
||||
else:
|
||||
continue
|
||||
if video_info != None:
|
||||
# 作者粉丝数量
|
||||
video_author_followerCount = video_info['authorStats']['followerCount']
|
||||
# 作者关注数量
|
||||
video_author_followingCount = video_info['authorStats']['followingCount']
|
||||
# 作者获赞数量
|
||||
video_author_heartCount = video_info['authorStats']['heartCount']
|
||||
# 作者视频数量
|
||||
video_author_videoCount = video_info['authorStats']['videoCount']
|
||||
# 作者已赞作品数量
|
||||
video_author_diggCount = video_info['authorStats']['diggCount']
|
||||
else:
|
||||
# 作者粉丝数量
|
||||
video_author_followerCount = 'None'
|
||||
# 作者关注数量
|
||||
video_author_followingCount = 'None'
|
||||
# 作者获赞数量
|
||||
video_author_heartCount = 'None'
|
||||
# 作者视频数量
|
||||
video_author_videoCount = 'None'
|
||||
# 作者已赞作品数量
|
||||
video_author_diggCount = 'None'
|
||||
# 结束时间
|
||||
end = time.time()
|
||||
# 解析时间
|
||||
analyze_time = format((end - start), '.4f')
|
||||
# 储存数据
|
||||
video_data = {'status': 'success',
|
||||
'analyze_time': (analyze_time + 's'),
|
||||
'url_type': url_type,
|
||||
'api_url': tiktok_api_link,
|
||||
'original_url': original_url,
|
||||
'platform': 'tiktok',
|
||||
'video_title': video_title,
|
||||
'nwm_video_url': nwm_video_url,
|
||||
'wm_video_url': wm_video_url,
|
||||
'video_author_nickname': video_author_nickname,
|
||||
'video_author_id': video_author_id,
|
||||
'video_create_time': video_create_time,
|
||||
'video_aweme_id': video_aweme_id,
|
||||
'video_music_title': video_music_title,
|
||||
'video_music_author': video_music_author,
|
||||
'video_music_id': video_music_id,
|
||||
'video_music_url': video_music_url,
|
||||
'video_comment_count': video_comment_count,
|
||||
'video_digg_count': video_digg_count,
|
||||
'video_play_count': video_play_count,
|
||||
'video_share_count': video_share_count,
|
||||
'video_download_count': video_download_count,
|
||||
'video_author_followerCount': video_author_followerCount,
|
||||
'video_author_followingCount': video_author_followingCount,
|
||||
'video_author_heartCount': video_author_heartCount,
|
||||
'video_author_videoCount': video_author_videoCount,
|
||||
'video_author_diggCount': video_author_diggCount,
|
||||
'video_cover': video_cover,
|
||||
'video_dynamic_cover': video_dynamic_cover,
|
||||
'video_origin_cover': video_origin_cover,
|
||||
'video_hashtags': video_hashtags
|
||||
}
|
||||
# 返回包含数据的字典
|
||||
return video_data
|
||||
else:
|
||||
# 判断链接是图集链接
|
||||
# https://www.tiktok.com/@tamm6636/video/7105440975878655278
|
||||
video_id = re.findall('video/(\d+)?', original_url)[0]
|
||||
print('视频ID为: {}'.format(video_id))
|
||||
# 从TikTok官方API获取部分视频数据
|
||||
tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/multi/aweme/detail/?aweme_ids=%5B{}%5D'.format(
|
||||
video_id)
|
||||
print('正在请求API链接:{}'.format(tiktok_api_link))
|
||||
response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text
|
||||
# 将API获取到的内容格式化为JSON
|
||||
result = json.loads(response)
|
||||
# 类型为图集
|
||||
url_type = 'album'
|
||||
print('类型为图集')
|
||||
# 视频标题
|
||||
album_title = result["aweme_details"][0]["desc"]
|
||||
# 视频作者昵称
|
||||
album_author_nickname = result["aweme_details"][0]['author']["nickname"]
|
||||
# 视频作者ID
|
||||
album_author_id = result["aweme_details"][0]['author']["unique_id"]
|
||||
# 上传时间戳
|
||||
album_create_time = result["aweme_details"][0]['create_time']
|
||||
# 视频ID
|
||||
album_aweme_id = result["aweme_details"][0]['statistics']['aweme_id']
|
||||
try:
|
||||
# 视频BGM标题
|
||||
album_music_title = result["aweme_details"][0]['music']['title']
|
||||
# 视频BGM作者
|
||||
album_music_author = result["aweme_details"][0]['music']['author']
|
||||
# 视频BGM ID
|
||||
album_music_id = result["aweme_details"][0]['music']['id']
|
||||
# 视频BGM链接
|
||||
album_music_url = result["aweme_details"][0]['music']['play_url']['url_list'][0]
|
||||
except:
|
||||
album_music_title, album_music_author, album_music_id, album_music_url = "None", "None", "None", "None"
|
||||
# 评论数量
|
||||
album_comment_count = result["aweme_details"][0]['statistics']['comment_count']
|
||||
# 获赞数量
|
||||
album_digg_count = result["aweme_details"][0]['statistics']['digg_count']
|
||||
# 播放次数
|
||||
album_play_count = result["aweme_details"][0]['statistics']['play_count']
|
||||
# 下载次数
|
||||
album_download_count = result["aweme_details"][0]['statistics']['download_count']
|
||||
# 分享次数
|
||||
album_share_count = result["aweme_details"][0]['statistics']['share_count']
|
||||
# 无水印图集
|
||||
album_list = []
|
||||
for i in result["aweme_details"][0]['image_post_info']['images']:
|
||||
album_list.append(i['display_image']['url_list'][0])
|
||||
# 结束时间
|
||||
end = time.time()
|
||||
# 解析时间
|
||||
analyze_time = format((end - start), '.4f')
|
||||
# 储存数据
|
||||
album_data = {'status': 'success',
|
||||
'analyze_time': (analyze_time + 's'),
|
||||
'url_type': url_type,
|
||||
'api_url': tiktok_api_link,
|
||||
'original_url': original_url,
|
||||
'platform': 'tiktok',
|
||||
'album_title': album_title,
|
||||
'album_list': album_list,
|
||||
'album_author_nickname': album_author_nickname,
|
||||
'album_author_id': album_author_id,
|
||||
'album_create_time': album_create_time,
|
||||
'album_aweme_id': album_aweme_id,
|
||||
'album_music_title': album_music_title,
|
||||
'album_music_author': album_music_author,
|
||||
'album_music_id': album_music_id,
|
||||
'album_music_url': album_music_url,
|
||||
'album_comment_count': album_comment_count,
|
||||
'album_digg_count': album_digg_count,
|
||||
'album_play_count': album_play_count,
|
||||
'album_share_count': album_share_count,
|
||||
'album_download_count': album_download_count
|
||||
}
|
||||
# 返回包含数据的字典
|
||||
return album_data
|
||||
except Exception as e:
|
||||
# 异常捕获
|
||||
return {'status': 'failed', 'reason': e, 'function': 'Scraper.tiktok()', 'value': original_url}
|
||||
|
Loading…
x
Reference in New Issue
Block a user