🗳修复Douyin部分链接因无背景音乐导致解析失败

This commit is contained in:
Evil0ctal 2022-04-06 14:24:30 -07:00 committed by GitHub
parent 7e64e396da
commit 4906f07429
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,7 +2,7 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
# @Author: https://github.com/Evil0ctal/ # @Author: https://github.com/Evil0ctal/
# @Time: 2021/11/06 # @Time: 2021/11/06
# @Update: 2022/04/05 # @Update: 2022/04/06
# @Function: # @Function:
# 核心代码估值1块(๑•̀ㅂ•́)و✧ # 核心代码估值1块(๑•̀ㅂ•́)و✧
# 用于爬取Douyin/TikTok数据并以字典形式返回。 # 用于爬取Douyin/TikTok数据并以字典形式返回。
@ -78,20 +78,30 @@ class Scraper:
if album_author_id == "": if album_author_id == "":
# 如果作者未修改过抖音号应使用此值以避免无法获取其抖音ID # 如果作者未修改过抖音号应使用此值以避免无法获取其抖音ID
album_author_id = str(js['item_list'][0]['author']['short_id']) album_author_id = str(js['item_list'][0]['author']['short_id'])
# 图集BGM链接 # 尝试获取图集BGM信息
if len(js['item_list'][0]['music']['play_url']['url_list']) > 0: try:
# 图集BGM链接
album_music = str(js['item_list'][0]['music']['play_url']['url_list'][0]) album_music = str(js['item_list'][0]['music']['play_url']['url_list'][0])
else: # 图集BGM标题
# 部分视频的API数据中没有BGM链接返回None album_music_title = str(js['item_list'][0]['music']['title'])
album_music = "None" # 图集BGM作者
# 图集BGM标题 album_music_author = str(js['item_list'][0]['music']['author'])
album_music_title = str(js['item_list'][0]['music']['title']) # 图集BGM ID
# 图集BGM作者 album_music_id = str(js['item_list'][0]['music']['id'])
album_music_author = str(js['item_list'][0]['music']['author']) # 图集BGM MID
# 图集BGM ID album_music_mid = str(js['item_list'][0]['music']['mid'])
album_music_id = str(js['item_list'][0]['music']['id']) except:
# 图集BGM MID # 报错后代表无背景音乐
album_music_mid = str(js['item_list'][0]['music']['mid']) # 图集BGM链接
album_music = 'No BGM found'
# 图集BGM标题
album_music_title = 'No BGM found'
# 图集BGM作者
album_music_author = 'No BGM found'
# 图集BGM ID
album_music_id = 'No BGM found'
# 图集BGM MID
album_music_mid = 'No BGM found'
# 图集ID # 图集ID
album_aweme_id = str(js['item_list'][0]['statistics']['aweme_id']) album_aweme_id = str(js['item_list'][0]['statistics']['aweme_id'])
# 评论数量 # 评论数量
@ -162,24 +172,34 @@ class Scraper:
# 去水印后视频链接(2022年1月1日抖音APi获取到的URL会进行跳转需要在Location中获取直链) # 去水印后视频链接(2022年1月1日抖音APi获取到的URL会进行跳转需要在Location中获取直链)
r = requests.get(url=nwm_video_url, headers=headers, allow_redirects=False) r = requests.get(url=nwm_video_url, headers=headers, allow_redirects=False)
video_url = r.headers['Location'] video_url = r.headers['Location']
# 视频背景音频
if len(js['item_list'][0]['music']['play_url']['url_list']) > 0:
video_music = str(js['item_list'][0]['music']['play_url']['url_list'][0])
else:
# 部分视频的API数据中没有BGM链接返回None
video_music = "None"
# 视频作者签名 # 视频作者签名
video_author_signature = str(js['item_list'][0]['author']['signature']) video_author_signature = str(js['item_list'][0]['author']['signature'])
# 视频作者UID # 视频作者UID
video_author_uid = str(js['item_list'][0]['author']['uid']) video_author_uid = str(js['item_list'][0]['author']['uid'])
# 视频BGM标题 # 尝试获取视频背景音乐
video_music_title = str(js['item_list'][0]['music']['title']) try:
# 视频BGM作者 # 视频BGM链接
video_music_author = str(js['item_list'][0]['music']['author']) video_music = str(js['item_list'][0]['music']['play_url']['url_list'][0])
# 视频BGM ID # 视频BGM标题
video_music_id = str(js['item_list'][0]['music']['id']) video_music_title = str(js['item_list'][0]['music']['title'])
# 视频BGM MID # 视频BGM作者
video_music_mid = str(js['item_list'][0]['music']['mid']) video_music_author = str(js['item_list'][0]['music']['author'])
# 视频BGM ID
video_music_id = str(js['item_list'][0]['music']['id'])
# 视频BGM MID
video_music_mid = str(js['item_list'][0]['music']['mid'])
except:
# 出错代表无背景音乐
# 视频BGM链接
video_music = 'No BGM found'
# 视频BGM标题
video_music_title = 'No BGM found'
# 视频BGM作者
video_music_author = 'No BGM found'
# 视频BGM ID
video_music_id = 'No BGM found'
# 视频BGM MID
video_music_mid = 'No BGM found'
# 视频ID # 视频ID
video_aweme_id = str(js['item_list'][0]['statistics']['aweme_id']) video_aweme_id = str(js['item_list'][0]['statistics']['aweme_id'])
# 评论数量 # 评论数量
@ -385,6 +405,6 @@ if __name__ == '__main__':
tiktok_date = scraper.tiktok(tiktok_url) tiktok_date = scraper.tiktok(tiktok_url)
print(tiktok_date) print(tiktok_date)
print('') print('')
douyin_url = "https://www.douyin.com/video/7036277592986537252" douyin_url = "https://www.douyin.com/video/7055581212840086817"
douyin_date = scraper.douyin(douyin_url) douyin_date = scraper.douyin(douyin_url)
print(douyin_date) print(douyin_date)