From 2ca727cb02c485222b41468fba61d0677dae179c Mon Sep 17 00:00:00 2001 From: Evil0ctal Date: Wed, 6 Apr 2022 11:30:22 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=97=B3=E4=BF=AE=E5=A4=8DTikTok=E9=83=A8?= =?UTF-8?q?=E5=88=86=E7=9F=AD=E9=93=BE=E6=8E=A5=E8=A7=A3=E6=9E=90=E5=A4=B1?= =?UTF-8?q?=E8=B4=A5=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- API/scraper.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/API/scraper.py b/API/scraper.py index 7262fea..ed541bc 100644 --- a/API/scraper.py +++ b/API/scraper.py @@ -20,6 +20,7 @@ class Scraper: Scraper.douyin():抖音视频/图集解析,返回字典。 Scraper.tiktok():TikTok视频解析,返回字典。 """ + def __init__(self): self.headers = { 'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.66' @@ -248,6 +249,7 @@ class Scraper: # 从请求头中获取原始链接 response = requests.get(url=original_url, headers=headers, allow_redirects=False) true_link = response.headers['Location'].split("?")[0] + original_url = true_link # TikTok请求头返回的第二种链接类型 if '.html' in true_link: response = requests.get(url=true_link, headers=headers, allow_redirects=False) @@ -361,7 +363,7 @@ class Scraper: 'video_comment_count': video_comment_count, 'video_digg_count': video_digg_count, 'video_play_count': video_play_count, - 'video_share_count':video_share_count, + 'video_share_count': video_share_count, 'video_author_followerCount': video_author_followerCount, 'video_author_followingCount': video_author_followingCount, 'video_author_heartCount': video_author_heartCount, @@ -379,10 +381,10 @@ class Scraper: if __name__ == '__main__': # 测试类 scraper = Scraper() - tiktok_url = "https://www.tiktok.com/@oregonzoo/video/7074995215647477034" + tiktok_url = "https://vm.tiktok.com/TTPdHbphWF/" tiktok_date = scraper.tiktok(tiktok_url) print(tiktok_date) print('') douyin_url = "https://www.douyin.com/video/7036277592986537252" douyin_date = scraper.douyin(douyin_url) - print(douyin_date) \ No newline at end of file + print(douyin_date)