From 58aa3286a5fb1ab3545958943f4e1773e588eb19 Mon Sep 17 00:00:00 2001
From: Evil0ctal <evil0ctal1985@gmail.com>
Date: Sun, 13 Nov 2022 16:09:53 -0800
Subject: [PATCH] Use AIOHTTP instead of HTTPX

---
 scraper.py | 126 +++++++++++++++++++++++++++--------------------------
 1 file changed, 65 insertions(+), 61 deletions(-)

diff --git a/scraper.py b/scraper.py
index 427f413..396baf1 100644
--- a/scraper.py
+++ b/scraper.py
@@ -2,7 +2,7 @@
 # -*- encoding: utf-8 -*-
 # @Author: https://github.com/Evil0ctal/
 # @Time: 2021/11/06
-# @Update: 2022/11/10
+# @Update: 2022/11/13
 # @Version: 3.1.0
 # @Function:
 # 核心代码，估值1块(๑•̀ㅂ•́)و✧
@@ -11,7 +11,8 @@
 
 
 import re
-import httpx
+import aiohttp
+import platform
 import asyncio
 import orjson
 import traceback
@@ -64,23 +65,25 @@ class Scraper:
         self.tiktok_api_headers = {
             'User-Agent': 'com.ss.android.ugc.trill/2613 (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)'
         }
-        self.app_config = configparser.ConfigParser()
-        self.app_config.read('config.ini', encoding='utf-8')
-        self.api_config = self.app_config['Scraper']
+        self.config = configparser.ConfigParser()
+        self.config.read('config.ini', encoding='utf-8')
         # 判断是否使用代理
-        if self.api_config['Proxy_switch'] == 'True':
+        if self.config['Scraper']['Proxy_switch'] == 'True':
             # 判断是否区别协议选择代理
-            if self.api_config['Use_different_protocols'] == 'False':
+            if self.config['Scraper']['Use_different_protocols'] == 'False':
                 self.proxies = {
-                    'all': self.api_config['All']
+                    'all': self.config['Scraper']['All']
                 }
             else:
                 self.proxies = {
-                    'http': self.api_config['Http_proxy'],
-                    'https': self.api_config['Https_proxy'],
+                    'http': self.config['Scraper']['Http_proxy'],
+                    'https': self.config['Scraper']['Https_proxy'],
                 }
         else:
             self.proxies = None
+        # 针对Windows系统的异步事件规则/Asynchronous event rules for Windows systems
+        if platform.system() == 'Windows':
+            asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
 
     """__________________________________________⬇️utils(实用程序)⬇️______________________________________"""
 
@@ -94,15 +97,15 @@ class Scraper:
             if len(url) > 0:
                 return url[0]
         except Exception as e:
-            print(e)
+            print('Error in get_url:', e)
             return None
 
     # 转换链接/convert url
     @retry(stop=stop_after_attempt(3), wait=wait_random(min=1, max=2))
     async def convert_share_urls(self, url: str) -> str or None:
         """
-        用于从短链接中获取长链接
-        :return: 长链接
+        用于将分享链接(短链接)转换为原始链接/Convert share links (short links) to original links
+        :return: 原始链接/Original link
         """
         # 检索字符串中的链接/Retrieve links from string
         url = self.get_url(url)
@@ -129,17 +132,15 @@ class Scraper:
                 url = re.compile(r'(https://v.douyin.com/)\w+', re.I).match(url).group()
                 print('正在通过抖音分享链接获取原始链接...')
                 try:
-                    async with httpx.AsyncClient(proxies=self.proxies) as client:
-                        response = await client.get(url, headers=self.headers, follow_redirects=False, timeout=10)
-                    if response.status_code == 302:
-                        # 视频链接302重定向'Location'字段
-                        # https://www.iesdouyin.com/share/video/7148345687535570206/
-                        # 用户主页链接302重定向'Location'字段
-                        # https://www.iesdouyin.com/share/user/MS4wLjABAAAAbLMPpOhVk441et7z7ECGcmGrK42KtoWOuR0_7pLZCcyFheA9__asY-kGfNAtYqXR
-                        url = response.headers['Location'].split('?')[0] if '?' in response.headers['Location'] else \
-                            response.headers['Location']
-                        print('获取原始链接成功, 原始链接为: {}'.format(url))
-                        return url
+                    async with aiohttp.ClientSession() as session:
+                        async with session.get(url, headers=self.headers, proxy=self.proxies, allow_redirects=False,
+                                               timeout=10) as response:
+                            if response.status == 302:
+                                url = response.headers['Location'].split('?')[0] if '?' in response.headers[
+                                    'Location'] else \
+                                    response.headers['Location']
+                                print('获取原始链接成功, 原始链接为: {}'.format(url))
+                                return url
                 except Exception as e:
                     print('获取原始链接失败！')
                     print(e)
@@ -162,17 +163,15 @@ class Scraper:
             else:
                 print('正在通过TikTok分享链接获取原始链接...')
                 try:
-                    async with httpx.AsyncClient(proxies=self.proxies) as client:
-                        response = await client.get(url, headers=self.headers, follow_redirects=False, timeout=10)
-                    if response.status_code == 301:
-                        # 视频链接302重定向'Location'字段
-                        # https://www.tiktok.com/@tiktok/video/6950000000000000000
-                        # 用户主页链接302重定向'Location'字段
-                        # https://www.tiktok.com/@tiktok
-                        url = response.headers['Location'].split('?')[0] if '?' in response.headers['Location'] else \
-                            response.headers['Location']
-                        print('获取原始链接成功, 原始链接为: {}'.format(url))
-                        return url
+                    async with aiohttp.ClientSession() as session:
+                        async with session.get(url, headers=self.headers, proxy=self.proxies, allow_redirects=False,
+                                               timeout=10) as response:
+                            if response.status == 301:
+                                url = response.headers['Location'].split('?')[0] if '?' in response.headers[
+                                    'Location'] else \
+                                    response.headers['Location']
+                                print('获取原始链接成功, 原始链接为: {}'.format(url))
+                                return url
                 except Exception as e:
                     print('获取原始链接失败！')
                     print(e)
@@ -230,15 +229,14 @@ class Scraper:
             api_url = f"https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={video_id}"
             # 访问API/Access API
             print("正在获取视频数据API: {}".format(api_url))
-            async with httpx.AsyncClient(proxies=self.proxies) as client:
-                response = await client.get(api_url, headers=self.headers, timeout=5)
-            # 获取返回的json数据/Get the returned json data
-            data = orjson.loads(response.text)
-            # 获取视频数据/Get video data
-            video_data = data['item_list'][0]
-            print('获取视频数据成功！')
-            # print("抖音API返回数据: {}".format(video_data))
-            return video_data
+            async with aiohttp.ClientSession() as session:
+                async with session.get(api_url, headers=self.headers, proxy=self.proxies, timeout=10) as response:
+                    response = await response.json()
+                    # 获取视频数据/Get video data
+                    video_data = response['item_list'][0]
+                    print('获取视频数据成功！')
+                    # print("抖音API返回数据: {}".format(video_data))
+                    return video_data
         except Exception as e:
             print('获取抖音视频数据失败！原因:{}'.format(e))
             return None
@@ -252,15 +250,16 @@ class Scraper:
             api_url = f"https://live.douyin.com/webcast/web/enter/?aid=6383&web_rid={web_rid}"
             # 访问API/Access API
             print("正在获取视频数据API: {}".format(api_url))
-            async with httpx.AsyncClient(proxies=self.proxies) as client:
-                response = await client.get(api_url, headers=self.douyin_cookies, timeout=5)
-            # 获取返回的json数据/Get the returned json data
-            data = orjson.loads(response.text)
-            # 获取视频数据/Get video data
-            video_data = data['data']
-            print('获取视频数据成功！')
-            # print("抖音API返回数据: {}".format(video_data))
-            return video_data
+            async with aiohttp.ClientSession() as session:
+                async with session.get(api_url, headers=self.douyin_cookies, proxy=self.proxies, timeout=10) as response:
+                    response = await response.json()
+                    # 获取返回的json数据/Get the returned json data
+                    data = orjson.loads(response.text)
+                    # 获取视频数据/Get video data
+                    video_data = data['data']
+                    print('获取视频数据成功！')
+                    # print("抖音API返回数据: {}".format(video_data))
+                    return video_data
         except Exception as e:
             print('获取抖音视频数据失败！原因:{}'.format(e))
             return None
@@ -306,13 +305,12 @@ class Scraper:
         try:
             api_url = f'https://api-h2.tiktokv.com/aweme/v1/feed/?aweme_id={video_id}&version_code=2613&aid=1180'
             print("正在获取视频数据API: {}".format(api_url))
-            async with httpx.AsyncClient(proxies=self.proxies) as client:
-                response = await client.get(api_url, headers=self.tiktok_api_headers, timeout=5)
-            if response.content != '':
-                data = orjson.loads(response.text)
-                video_data = data['aweme_list'][0]
-                print('获取视频信息成功！')
-                return video_data
+            async with aiohttp.ClientSession() as session:
+                async with session.get(api_url, headers=self.tiktok_api_headers, proxy=self.proxies, timeout=10) as response:
+                    response = await response.json()
+                    video_data = response['aweme_list'][0]
+                    print('获取视频信息成功！')
+                    return video_data
         except Exception as e:
             print('获取视频信息失败！原因:{}'.format(e))
             return None
@@ -519,8 +517,10 @@ class Scraper:
 """__________________________________________⬇️Test methods(测试方法)⬇️______________________________________"""
 
 
-async def async_test(douyin_url: str = None, tiktok_url: str = None):
+async def async_test(douyin_url: str = None, tiktok_url: str = None) -> None:
     # 异步测试/Async test
+    start_time = time.time()
+    print("正在进行异步测试...")
 
     print("正在测试异步获取抖音视频ID方法...")
     douyin_id = await api.get_douyin_video_id(douyin_url)
@@ -536,6 +536,10 @@ async def async_test(douyin_url: str = None, tiktok_url: str = None):
     douyin_hybrid_data = await api.hybrid_parsing(douyin_url)
     tiktok_hybrid_data = await api.hybrid_parsing(tiktok_url)
 
+    # 总耗时/Total time
+    total_time = round(time.time() - start_time, 2)
+    print("异步测试完成，总耗时: {}s".format(total_time))
+
 
 if __name__ == '__main__':
     api = Scraper()