From e68d96080fd83e40a6485d80f6a8ef9544100385 Mon Sep 17 00:00:00 2001 From: Evil0ctal Date: Wed, 6 Apr 2022 14:23:28 -0700 Subject: [PATCH] =?UTF-8?q?=20=F0=9F=97=B3=E4=BF=AE=E5=A4=8DDouyin?= =?UTF-8?q?=E9=83=A8=E5=88=86=E9=93=BE=E6=8E=A5=E5=9B=A0=E6=97=A0=E8=83=8C?= =?UTF-8?q?=E6=99=AF=E9=9F=B3=E4=B9=90=E5=AF=BC=E8=87=B4=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E5=A4=B1=E8=B4=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- API/scraper.py | 80 +++++++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/API/scraper.py b/API/scraper.py index ed541bc..ea06d67 100644 --- a/API/scraper.py +++ b/API/scraper.py @@ -2,7 +2,7 @@ # -*- encoding: utf-8 -*- # @Author: https://github.com/Evil0ctal/ # @Time: 2021/11/06 -# @Update: 2022/04/05 +# @Update: 2022/04/06 # @Function: # 核心代码,估值1块(๑•̀ㅂ•́)و✧ # 用于爬取Douyin/TikTok数据并以字典形式返回。 @@ -78,20 +78,30 @@ class Scraper: if album_author_id == "": # 如果作者未修改过抖音号,应使用此值以避免无法获取其抖音ID album_author_id = str(js['item_list'][0]['author']['short_id']) - # 图集BGM链接 - if len(js['item_list'][0]['music']['play_url']['url_list']) > 0: + # 尝试获取图集BGM信息 + try: + # 图集BGM链接 album_music = str(js['item_list'][0]['music']['play_url']['url_list'][0]) - else: - # 部分视频的API数据中没有BGM链接,返回None - album_music = "None" - # 图集BGM标题 - album_music_title = str(js['item_list'][0]['music']['title']) - # 图集BGM作者 - album_music_author = str(js['item_list'][0]['music']['author']) - # 图集BGM ID - album_music_id = str(js['item_list'][0]['music']['id']) - # 图集BGM MID - album_music_mid = str(js['item_list'][0]['music']['mid']) + # 图集BGM标题 + album_music_title = str(js['item_list'][0]['music']['title']) + # 图集BGM作者 + album_music_author = str(js['item_list'][0]['music']['author']) + # 图集BGM ID + album_music_id = str(js['item_list'][0]['music']['id']) + # 图集BGM MID + album_music_mid = str(js['item_list'][0]['music']['mid']) + except: + # 报错后代表无背景音乐 + # 图集BGM链接 + album_music = 'No BGM found' + # 图集BGM标题 + album_music_title = 'No BGM found' + # 图集BGM作者 + album_music_author = 'No BGM found' + # 图集BGM ID + album_music_id = 'No BGM found' + # 图集BGM MID + album_music_mid = 'No BGM found' # 图集ID album_aweme_id = str(js['item_list'][0]['statistics']['aweme_id']) # 评论数量 @@ -162,24 +172,34 @@ class Scraper: # 去水印后视频链接(2022年1月1日抖音APi获取到的URL会进行跳转,需要在Location中获取直链) r = requests.get(url=nwm_video_url, headers=headers, allow_redirects=False) video_url = r.headers['Location'] - # 视频背景音频 - if len(js['item_list'][0]['music']['play_url']['url_list']) > 0: - video_music = str(js['item_list'][0]['music']['play_url']['url_list'][0]) - else: - # 部分视频的API数据中没有BGM链接,返回None - video_music = "None" # 视频作者签名 video_author_signature = str(js['item_list'][0]['author']['signature']) # 视频作者UID video_author_uid = str(js['item_list'][0]['author']['uid']) - # 视频BGM标题 - video_music_title = str(js['item_list'][0]['music']['title']) - # 视频BGM作者 - video_music_author = str(js['item_list'][0]['music']['author']) - # 视频BGM ID - video_music_id = str(js['item_list'][0]['music']['id']) - # 视频BGM MID - video_music_mid = str(js['item_list'][0]['music']['mid']) + # 尝试获取视频背景音乐 + try: + # 视频BGM链接 + video_music = str(js['item_list'][0]['music']['play_url']['url_list'][0]) + # 视频BGM标题 + video_music_title = str(js['item_list'][0]['music']['title']) + # 视频BGM作者 + video_music_author = str(js['item_list'][0]['music']['author']) + # 视频BGM ID + video_music_id = str(js['item_list'][0]['music']['id']) + # 视频BGM MID + video_music_mid = str(js['item_list'][0]['music']['mid']) + except: + # 出错代表无背景音乐 + # 视频BGM链接 + video_music = 'No BGM found' + # 视频BGM标题 + video_music_title = 'No BGM found' + # 视频BGM作者 + video_music_author = 'No BGM found' + # 视频BGM ID + video_music_id = 'No BGM found' + # 视频BGM MID + video_music_mid = 'No BGM found' # 视频ID video_aweme_id = str(js['item_list'][0]['statistics']['aweme_id']) # 评论数量 @@ -381,10 +401,10 @@ class Scraper: if __name__ == '__main__': # 测试类 scraper = Scraper() - tiktok_url = "https://vm.tiktok.com/TTPdHbphWF/" + tiktok_url = "https://www.tiktok.com/@oregonzoo/video/7074995215647477034" tiktok_date = scraper.tiktok(tiktok_url) print(tiktok_date) print('') - douyin_url = "https://www.douyin.com/video/7036277592986537252" + douyin_url = "https://www.douyin.com/video/7055581212840086817" douyin_date = scraper.douyin(douyin_url) print(douyin_date)