From 4581d6cb6a5814d1ff3f961183e93a2317d1f729 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=98Xuenew=3Bgit=20config=20--global=20user=2Eemail?=
=?UTF-8?q?=2018188108851=40163=2Ecom=E2=80=99?= <“18188108851@163.com”>
Date: Mon, 25 Sep 2023 23:12:48 +0800
Subject: [PATCH] =?UTF-8?q?1,=E6=B7=BB=E5=8A=A0=E4=BA=86=E8=A5=BF=E7=93=9C?=
=?UTF-8?q?=E8=A7=A3=E6=9E=902=EF=BC=8C=E4=BF=AE=E6=94=B9=E4=BA=86readme/r?=
=?UTF-8?q?eadme.en3,BUG=20convert=5Fshare=5Furls=20=E8=BF=99=E9=87=8C?=
=?UTF-8?q?=E6=9C=89bug=E3=80=82=20=E5=A6=82=E6=9E=9C=E6=8A=96=E9=9F=B3?=
=?UTF-8?q?=E7=9A=84=E5=8F=A3=E4=BB=A4=E8=A7=A3=E6=9E=90=E7=9A=84=E5=87=BA?=
=?UTF-8?q?=E6=9D=A5=E5=85=B6=E4=BB=96=E7=9A=84=E9=83=BD=E6=98=AFnone?=
=?UTF-8?q?=EF=BC=8C=E8=BF=98=E6=B2=A1=E6=9C=89=E5=A5=BD=E7=9A=84=E8=A7=A3?=
=?UTF-8?q?=E5=86=B3=E6=96=B9=E6=B3=95=EF=BC=8C=E6=88=91=E6=83=B3=E7=AE=80?=
=?UTF-8?q?=E5=8D=95=E7=9A=84=E7=94=A8if=E5=88=A4=E6=96=AD=EF=BC=8C?=
=?UTF-8?q?=E4=BD=86=E8=BF=99=E6=A0=B7=E4=BC=9A=E4=B8=8D=E4=BC=9A=E6=9C=89?=
=?UTF-8?q?=E5=85=B6=E4=BB=96=E6=96=B0=E4=BD=86=E5=B9=B3=E5=8F=B0=E5=8A=A0?=
=?UTF-8?q?=E8=BF=9B=E6=9D=A5=E6=84=9F=E8=A7=89=E8=BF=99=E6=A0=B7=E5=A4=84?=
=?UTF-8?q?=E7=90=86=E6=9C=89=E7=82=B9=E5=A4=AA=E7=AE=80=E5=8D=95=E4=BA=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.idea/Douyin_TikTok_Download_API.iml | 12 ++
.idea/inspectionProfiles/Project_Default.xml | 37 +++++
.../inspectionProfiles/profiles_settings.xml | 6 +
.idea/misc.xml | 7 +
.idea/modules.xml | 8 +
.idea/vcs.xml | 6 +
.idea/workspace.xml | 147 ++++++++++++++++++
README.en.md | 8 +
README.md | 8 +
scraper.py | 146 ++++++++++++++++-
10 files changed, 380 insertions(+), 5 deletions(-)
create mode 100644 .idea/Douyin_TikTok_Download_API.iml
create mode 100644 .idea/inspectionProfiles/Project_Default.xml
create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
create mode 100644 .idea/misc.xml
create mode 100644 .idea/modules.xml
create mode 100644 .idea/vcs.xml
create mode 100644 .idea/workspace.xml
diff --git a/.idea/Douyin_TikTok_Download_API.iml b/.idea/Douyin_TikTok_Download_API.iml
new file mode 100644
index 0000000..8b8c395
--- /dev/null
+++ b/.idea/Douyin_TikTok_Download_API.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..8191630
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,37 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..8422fff
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..c5ff2c9
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000..c793b68
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,147 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1695571223199
+
+
+ 1695571223199
+
+
+
+
+
+ 1695652986133
+
+
+
+ 1695652986133
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/README.en.md b/README.en.md
index 8c4eb12..ec010cf 100644
--- a/README.en.md
+++ b/README.en.md
@@ -81,6 +81,7 @@ _Download prohibited videos, perform data analysis, download without watermark o
- Douyin (overseas version of Douyin: TikTok) video/picture analysis
- Bilibili video analysis
+- Xigua video analysis
- Batch analysis on the web page (supports Douyin/TikTok mixed submission)
- Batch download of non-watermarked videos from the web parsing result page (removed for V3.X and above versions, please deploy V2.X version by yourself)
- API call to get link data
@@ -126,6 +127,13 @@ asyncio.run(hybrid_parsing(url=input("Paste Douyin/TikTok/Bilibili share URL her
> 💡Tip: Including but not limited to the following examples, if you encounter link parsing failure, please open a new one[issue](https://github.com/Evil0ctal/Douyin_TikTok_Download_API/issues)
+- Xigua video link
+
+```text
+https://www.ixigua.com/7270448082586698281/
+https://m.ixigua.com/video/7274710134306112054/
+```
+
- Bilibili video link
```text
diff --git a/README.md b/README.md
index d9766a5..818e2c7 100644
--- a/README.md
+++ b/README.md
@@ -102,6 +102,7 @@
- 抖音(抖音海外版: TikTok)视频/图片解析
- Bilibili视频解析
+- 西瓜视频解析
- 网页端批量解析(支持抖音/TikTok混合提交)
- 网页端解析结果页批量下载无水印视频(V3.X以上版本移除,请自行部署V2.X版本)
- API调用获取链接数据
@@ -147,6 +148,13 @@ asyncio.run(hybrid_parsing(url=input("Paste Douyin/TikTok/Bilibili share URL her
> 💡提示:包含但不仅限于以下例子,如果遇到链接解析失败请开启一个新 [issue](https://github.com/Evil0ctal/Douyin_TikTok_Download_API/issues)
+- 西瓜视频链接
+
+```text
+https://www.ixigua.com/7270448082586698281/
+https://m.ixigua.com/video/7274710134306112054/
+```
+
- Bilibili视频链接
```text
diff --git a/scraper.py b/scraper.py
index 43d0ff0..5e0bae7 100644
--- a/scraper.py
+++ b/scraper.py
@@ -10,7 +10,7 @@
# If this project is helpful to you, please give me a star, thank you!
# @备注:
# 核心代码,估值1块(๑•̀ㅂ•́)و✧
-# 用于爬取Douyin/TikTok/Bilibili的数据并以字典形式返回。
+# 用于爬取Douyin/TikTok/Bilibili/xigua的数据并以字典形式返回。
# 如果本项目对您有帮助,请给我一个star,谢谢!
import re
@@ -23,7 +23,9 @@ import asyncio
import traceback
import configparser
import urllib.parse
+import random
+from zlib import crc32
from typing import Union
from tenacity import *
@@ -47,6 +49,22 @@ class Scraper:
self.bilibili_api_headers = {
'User-Agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'
}
+ self.ixigua_api_headers = {
+ 'authority': 'ib.365yg.com',
+ 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+ 'accept-language': 'zh-CN,zh;q=0.9',
+ 'cache-control': 'no-cache',
+ 'pragma': 'no-cache',
+ 'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
+ 'sec-ch-ua-mobile': '?0',
+ 'sec-ch-ua-platform': '"macOS"',
+ 'sec-fetch-dest': 'document',
+ 'sec-fetch-mode': 'navigate',
+ 'sec-fetch-site': 'none',
+ 'sec-fetch-user': '?1',
+ 'upgrade-insecure-requests': '1',
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
+ }
# 判断配置文件是否存在/Check if the configuration file exists
if os.path.exists('config.ini'):
self.config = configparser.ConfigParser()
@@ -210,6 +228,37 @@ class Scraper:
else:
print('该链接为原始链接,无需转换,原始链接为: {}'.format(url))
return url
+ elif 'ixigua.com' in url:
+ """
+ 西瓜视频链接类型(不全):
+ 1. https://v.ixigua.com/ienrQ5bR/
+ 2. https://www.ixigua.com/7270448082586698281
+ 3. https://m.ixigua.com/video/7270448082586698281
+ 西瓜用户链接类型(不全):
+ 1. https://www.ixigua.com/home/3189050062678823
+ 西瓜直播链接类型(不全):
+ """
+ if 'v.ixigua.com' in url:
+ print('正在通过西瓜分享链接获取原始链接...')
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.get(url, headers=self.ixigua_api_headers, proxy=self.proxies, allow_redirects=False,
+ timeout=10) as response:
+ print("asdfasdf",response.headers)
+ if response.status == 302:
+ url = response.headers['Location'].split('?')[0] if '?' in response.headers[
+ 'Location'] else \
+ response.headers['Location']
+ print('获取原始链接成功, 原始链接为: {}'.format(url))
+ return url
+ except Exception as e:
+ print('获取原始链接失败!')
+ print(e)
+ # return None
+ raise e
+ else:
+ print('该链接为原始链接,无需转换,原始链接为: {}'.format(url))
+ return url
"""__________________________________________⬇️Douyin methods(抖音方法)⬇️______________________________________"""
@@ -365,7 +414,7 @@ class Scraper:
"""__________________________________________⬇️bilibili methods(Bilibili方法)⬇️______________________________________"""
- # 获取TikTok视频ID/Get TikTok video ID
+ # 获取bilibili视频ID/Get BiliBili video ID
async def get_bilibili_video_id(self, original_url: str) -> Union[str, None]:
"""
获取视频id
@@ -427,6 +476,73 @@ class Scraper:
except Exception as e:
raise ValueError(f'获取BiliBili视频数据出错了:{e}')
+
+ """__________________________________________⬇️xigua methods(xigua方法)⬇️______________________________________"""
+ # 获取西瓜拿播放地址的接口
+ def get_xigua_json_url(self,video_id):
+ # 获取json文件的地址
+ r = str(random.random())[2:]
+ url_part = "/video/urls/v/1/toutiao/mp4/{}?r={}".format(video_id, r)
+ s = crc32(url_part.encode())
+ json_url = "https://ib.365yg.com{}&s={}&nobase64=true".format(url_part, s)
+ return json_url
+ # 获取西瓜视频ID/Get xigua video ID
+ async def get_ixigua_video_id(self, original_url: str) -> Union[str, None]:
+ """
+ 获取视频id
+ :param original_url: 视频链接
+ :return: 视频id
+ """
+ try:
+ # 转换链接/Convert link
+ original_url = await self.convert_share_urls(original_url)
+ # 获取视频ID/Get video ID
+ if 'www.ixigua.com/' in original_url:
+ video_id = re.findall('ixigua\.com/(\d+)', original_url)[0]
+ elif 'm.ixigua.com/video' in original_url:
+ video_id = re.findall('/video/(\d+)', original_url)[0]
+ # 返回视频ID/Return video ID
+ return video_id
+ except Exception as e:
+ raise ValueError(f'获取西瓜视频ID出错了:{e}')
+
+ @retry(stop=stop_after_attempt(4), wait=wait_fixed(7))
+ async def get_ixigua_video_data(self, video_id: str) -> Union[dict, None]:
+ """
+ 获取单个视频信息
+ :param video_id: 视频id
+ :return: 视频信息
+ """
+ print('正在获取西瓜视频数据...')
+ try:
+ # 构造访问链接/Construct the access link
+ video_url = f'https://m.ixigua.com/video/{video_id}?wid_try=1'
+ print("video_url",video_url)
+ async with aiohttp.ClientSession() as session:
+ async with session.get(video_url, headers=self.ixigua_api_headers, proxy=self.proxies,
+ timeout=10) as response:
+ response = await response.text()
+ search = re.search("\"vid\":\"([^\"]+)\",", response)
+ vid = search.group(1)
+ print('获取视频vid信息成功!')
+ play_url_api = self.get_xigua_json_url(vid)
+ print(f"正在获取视频数据API: {play_url_api}")
+ async with aiohttp.ClientSession() as session:
+ async with session.get(play_url_api, headers=self.ixigua_api_headers, proxy=self.proxies,
+ timeout=10) as response:
+ response = await response.json()
+ video_data = response.get("data",{}).get("video_list",{}).get("video_3",{}).get("main_url","")
+ video_data = {
+ 'status': 'success',
+ 'message': "更多接口请查看(More API see): https://api.tikhub.io/",
+ 'type': 'video',
+ 'platform': '西瓜',
+ 'video_url': video_data,
+ }
+ return video_data
+ except Exception as e:
+ raise ValueError(f'获取西瓜视频数据出错了:{e}')
+
"""__________________________________________⬇️Hybrid methods(混合方法)⬇️______________________________________"""
# 判断链接平台/Judge link platform
@@ -435,6 +551,8 @@ class Scraper:
url_platform = 'douyin'
elif 'bilibili' in video_url:
url_platform = 'bilibili'
+ elif 'xigua' in video_url:
+ url_platform = 'xigua'
elif 'tiktok' in video_url:
url_platform = 'tiktok'
else:
@@ -456,6 +574,7 @@ class Scraper:
video_id = await self.get_douyin_video_id(video_url) if url_platform == 'douyin' \
else await self.get_tiktok_video_id(video_url) if url_platform == 'tiktok' \
else await self.get_bilibili_video_id(video_url) if url_platform == 'bilibili' \
+ else await self.get_ixigua_video_id(video_url) if url_platform == 'xigua' \
else None
# 如果获取不到视频ID抛出异常/If the video ID cannot be obtained, an exception is thrown
@@ -467,6 +586,7 @@ class Scraper:
data = await self.get_douyin_video_data(video_id) if url_platform == 'douyin' \
else await self.get_tiktok_video_data(video_id) if url_platform == 'tiktok' \
else await self.get_bilibili_video_data(video_id) if url_platform == 'bilibili' \
+ else await self.get_ixigua_video_data(video_id) if url_platform == 'xigua' \
else None
if data:
@@ -475,6 +595,10 @@ class Scraper:
if url_platform == 'bilibili':
print("获取Bilibili视频数据成功!")
return data
+ # 如果是西瓜平台则返回视频数据/If it is a ixigua platform, return video data
+ if url_platform == 'xigua':
+ print("获取西瓜视频数据成功!")
+ return data
# 如果是抖音/TikTok平台则继续进行数据解析/If it is a Douyin/TikTok platform, continue to parse the data
print(f"获取**{url_platform}**视频数据成功,正在判断数据类型...")
@@ -654,11 +778,19 @@ class Scraper:
"""__________________________________________⬇️Test methods(测试方法)⬇️______________________________________"""
-async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili_url: str = None) -> None:
+async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili_url: str = None, _ixigua_url: str = None) -> None:
# 异步测试/Async test
start_time = time.time()
print("<异步测试/Async test>")
+ print('\n--------------------------------------------------')
+ print("正在测试异步获取西瓜视频ID方法...")
+ ixigua_id = await api.get_ixigua_video_id(_ixigua_url)
+ print(f"西瓜视频ID: {ixigua_id}")
+ print("正在测试异步获取西瓜视频数据方法...")
+ ixigua_data = await api.get_ixigua_video_data(ixigua_id)
+ print(f"西瓜视频数据: {str(ixigua_data)[:100]}")
+
print('\n--------------------------------------------------')
print("正在测试异步获取哔哩哔哩视频ID方法...")
bilibili_id = await api.get_bilibili_video_id(_bilibili_url)
@@ -688,7 +820,8 @@ async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili
douyin_hybrid_data = await api.hybrid_parsing(_douyin_url)
tiktok_hybrid_data = await api.hybrid_parsing(_tiktok_url)
bilibili_hybrid_data = await api.hybrid_parsing(_bilibili_url)
- print(f"抖音、TikTok、哔哩哔哩混合解析全部成功!")
+ xigua_hybrid_data = await api.hybrid_parsing(_ixigua_url)
+ print(f"抖音、TikTok、哔哩哔哩、西瓜混合解析全部成功!")
print('\n--------------------------------------------------')
# 总耗时/Total time
@@ -704,4 +837,7 @@ if __name__ == '__main__':
douyin_url = 'https://v.douyin.com/rLyrQxA/6.66'
tiktok_url = 'https://www.tiktok.com/@evil0ctal/video/7217027383390555438'
bilibili_url = "https://www.bilibili.com/video/BV1Th411x7ii/"
- asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url, _bilibili_url=bilibili_url))
+ ixigua_url = "https://www.ixigua.com/7270448082586698281"
+ # ixigua_url = "ttps://v.ixigua.com/ienrQ5bR/" # convert_share_urls 这里有bug 如果抖音的口令解析的出来其他的都是none
+ asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url, _bilibili_url=bilibili_url, _ixigua_url=ixigua_url))
+