From d641f0340cedf23cb06e90cab37cc31f0d273bff Mon Sep 17 00:00:00 2001 From: oho Date: Sun, 17 Nov 2024 17:35:38 +0800 Subject: [PATCH] fix tiktok crawlers to respect proxies from config.yaml --- crawlers/tiktok/app/app_crawler.py | 3 ++- crawlers/tiktok/web/web_crawler.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/crawlers/tiktok/app/app_crawler.py b/crawlers/tiktok/app/app_crawler.py index 21cae73..7b347f1 100644 --- a/crawlers/tiktok/app/app_crawler.py +++ b/crawlers/tiktok/app/app_crawler.py @@ -74,7 +74,8 @@ class TikTokAPPCrawler: "Cookie": tiktok_config["headers"]["Cookie"], "x-ladon": "Hello From Evil0ctal!", }, - "proxies": {"http://": None, "https://": None}, + "proxies": {"http://": tiktok_config["proxies"]["http"], + "https://": tiktok_config["proxies"]["https"]} } return kwargs diff --git a/crawlers/tiktok/web/web_crawler.py b/crawlers/tiktok/web/web_crawler.py index c7e03ac..f5bcf21 100644 --- a/crawlers/tiktok/web/web_crawler.py +++ b/crawlers/tiktok/web/web_crawler.py @@ -89,7 +89,8 @@ class TikTokWebCrawler: "Referer": tiktok_config["headers"]["Referer"], "Cookie": tiktok_config["headers"]["Cookie"], }, - "proxies": {"http://": None, "https://": None}, + "proxies": {"http://": tiktok_config["proxies"]["http"], + "https://": tiktok_config["proxies"]["https"]} } return kwargs @@ -133,7 +134,7 @@ class TikTokWebCrawler: kwargs = await self.get_tiktok_headers() # proxies = {"http://": 'http://43.159.29.191:24144', "https://": 'http://43.159.29.191:24144'} # 创建一个基础爬虫 - base_crawler = BaseCrawler(proxies=None, crawler_headers=kwargs["headers"]) + base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) async with base_crawler as crawler: # 创建一个用户作品的BaseModel参数 params = UserPost(secUid=secUid, cursor=cursor, count=count, coverFormat=coverFormat) @@ -216,7 +217,7 @@ class TikTokWebCrawler: kwargs = await self.get_tiktok_headers() # proxies = {"http://": 'http://43.159.18.174:25263', "https://": 'http://43.159.18.174:25263'} # 创建一个基础爬虫 - base_crawler = BaseCrawler(proxies=None, crawler_headers=kwargs["headers"]) + base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) async with base_crawler as crawler: # 创建一个作品评论的BaseModel参数 params = PostComment(aweme_id=aweme_id, cursor=cursor, count=count, current_region=current_region)