diff --git a/app/api/endpoints/tiktok_app.py b/app/api/endpoints/tiktok_app.py index 921c7af..33f3838 100644 --- a/app/api/endpoints/tiktok_app.py +++ b/app/api/endpoints/tiktok_app.py @@ -10,8 +10,7 @@ TikTokAPPCrawler = TikTokAPPCrawler() # 获取单个作品数据 @router.get("/fetch_one_video", response_model=ResponseModel, - summary="获取单个作品数据/Get single video data", - deprecated=True + summary="获取单个作品数据/Get single video data" ) async def fetch_one_video(request: Request, aweme_id: str = Query(example="7350810998023949599", description="作品id/Video id")): diff --git a/config.yaml b/config.yaml index f4d9d98..a9dad76 100644 --- a/config.yaml +++ b/config.yaml @@ -30,8 +30,8 @@ API: Redoc_URL: /redoc # API documentation URL | API文档URL # API Information - Version: V4.0.6 # API version | API版本 - Update_Time: 2024/08/19 # API update time | API更新时间 + Version: V4.0.7 # API version | API版本 + Update_Time: 2024/09/14 # API update time | API更新时间 Environment: Demo # API environment | API环境 # Download Configuration diff --git a/crawlers/hybrid/hybrid_crawler.py b/crawlers/hybrid/hybrid_crawler.py index dd1fb6c..6d0aa44 100644 --- a/crawlers/hybrid/hybrid_crawler.py +++ b/crawlers/hybrid/hybrid_crawler.py @@ -57,10 +57,14 @@ class HybridCrawler: elif "tiktok" in url: platform = "tiktok" aweme_id = await self.TikTokWebCrawler.get_aweme_id(url) - data = await self.TikTokWebCrawler.fetch_one_video(aweme_id) - data = data.get("itemInfo").get("itemStruct") + + # 2024-09-14: Switch to TikTokAPPCrawler instead of TikTokWebCrawler + # data = await self.TikTokWebCrawler.fetch_one_video(aweme_id) + # data = data.get("itemInfo").get("itemStruct") + + data = await self.TikTokAPPCrawler.fetch_one_video(aweme_id) # $.imagePost exists if aweme_type is photo - aweme_type = 150 if data.get("imagePost") else 1 + aweme_type = data.get("aweme_type") else: raise ValueError("hybrid_parsing_single_video: Cannot judge the video source from the URL.") @@ -85,6 +89,7 @@ class HybridCrawler: } # 判断链接类型/Judge link type url_type = url_type_code_dict.get(aweme_type, 'video') + # print(f"url_type: {url_type}") """ 以下为(视频||图片)数据处理的四个方法,如果你需要自定义数据处理请在这里修改. @@ -158,14 +163,17 @@ class HybridCrawler: # TikTok视频数据处理/TikTok video data processing if url_type == 'video': # 将信息储存在字典中/Store information in a dictionary - wm_video = data['video']['downloadAddr'] + # wm_video = data['video']['downloadAddr'] + wm_video = data['video']['download_addr']['url_list'][0] api_data = { 'video_data': { 'wm_video_url': wm_video, 'wm_video_url_HQ': wm_video, - 'nwm_video_url': data['video']['playAddr'], - 'nwm_video_url_HQ': data['video']['bitrateInfo'][0]['PlayAddr']['UrlList'][0] + # 'nwm_video_url': data['video']['playAddr'], + 'nwm_video_url': data['video']['play_addr']['url_list'][0], + # 'nwm_video_url_HQ': data['video']['bitrateInfo'][0]['PlayAddr']['UrlList'][0] + 'nwm_video_url_HQ': data['video']['bit_rate'][0]['play_addr']['url_list'][0] } } # TikTok图片数据处理/TikTok image data processing @@ -174,9 +182,9 @@ class HybridCrawler: no_watermark_image_list = [] # 有水印图片列表/With watermark image list watermark_image_list = [] - for i in data['imagePost']['images']: - no_watermark_image_list.append(i['imageURL']['urlList'][0]) - # watermark_image_list.append(i['owner_watermark_image']['url_list'][0]) + for i in data['image_post_info']['images']: + no_watermark_image_list.append(i['display_image']['url_list'][0]) + watermark_image_list.append(i['owner_watermark_image']['url_list'][0]) api_data = { 'image_data': { @@ -191,7 +199,7 @@ class HybridCrawler: async def main(self): # 测试混合解析单一视频接口/Test hybrid parsing single video endpoint # url = "https://v.douyin.com/L4FJNR3/" - url = "https://www.tiktok.com/@evil0ctal/video/7156033831819037994" + url = "https://www.tiktok.com/@taylorswift/video/7359655005701311786" # url = "https://www.tiktok.com/@minecraft/photo/7369296852669205791" minimal = True result = await self.hybrid_parsing_single_video(url, minimal=minimal) diff --git a/crawlers/tiktok/app/app_crawler.py b/crawlers/tiktok/app/app_crawler.py index 104b84f..ffb6ecb 100644 --- a/crawlers/tiktok/app/app_crawler.py +++ b/crawlers/tiktok/app/app_crawler.py @@ -69,6 +69,7 @@ class TikTokAPPCrawler: "User-Agent": tiktok_config["headers"]["User-Agent"], "Referer": tiktok_config["headers"]["Referer"], "Cookie": tiktok_config["headers"]["Cookie"], + "x-ladon": "Hello From Evil0ctal!", }, "proxies": {"http://": None, "https://": None}, } @@ -77,7 +78,7 @@ class TikTokAPPCrawler: """-------------------------------------------------------handler接口列表-------------------------------------------------------""" # 获取单个作品数据 - @deprecated("TikTok APP fetch_one_video is deprecated and will be removed in a future release. Use Web API instead. | TikTok APP fetch_one_video 已弃用,将在将来的版本中删除。请改用Web API。") + # @deprecated("TikTok APP fetch_one_video is deprecated and will be removed in a future release. Use Web API instead. | TikTok APP fetch_one_video 已弃用,将在将来的版本中删除。请改用Web API。") async def fetch_one_video(self, aweme_id: str): # 获取TikTok的实时Cookie kwargs = await self.get_tiktok_headers()