Merge pull request #291 from Xuenew/xyy_kuaishou

This commit is contained in:
Evil0ctal 2023-10-13 16:41:43 -07:00 committed by GitHub
commit cf86d39de6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 157 additions and 23 deletions

View File

@ -103,6 +103,7 @@
- 抖音(抖音海外版: TikTok视频/图片解析 - 抖音(抖音海外版: TikTok视频/图片解析
- Bilibili视频解析 - Bilibili视频解析
- 西瓜视频解析 - 西瓜视频解析
- 快手视频解析
- 网页端批量解析(支持抖音/TikTok混合提交) - 网页端批量解析(支持抖音/TikTok混合提交)
- 网页端解析结果页批量下载无水印视频(V3.X以上版本移除请自行部署V2.X版本) - 网页端解析结果页批量下载无水印视频(V3.X以上版本移除请自行部署V2.X版本)
- API调用获取链接数据 - API调用获取链接数据
@ -148,6 +149,13 @@ asyncio.run(hybrid_parsing(url=input("Paste Douyin/TikTok/Bilibili share URL her
> 💡提示:包含但不仅限于以下例子,如果遇到链接解析失败请开启一个新 [issue](https://github.com/Evil0ctal/Douyin_TikTok_Download_API/issues) > 💡提示:包含但不仅限于以下例子,如果遇到链接解析失败请开启一个新 [issue](https://github.com/Evil0ctal/Douyin_TikTok_Download_API/issues)
- 快手视频链接
```text
https://www.kuaishou.com/short-video/3xiqjrezhqjyzxw/
https://v.kuaishou.com/75kDOJ/
```
- 西瓜视频链接 - 西瓜视频链接
```text ```text

View File

@ -25,6 +25,7 @@ import traceback
import configparser import configparser
import urllib.parse import urllib.parse
import random import random
import json
from zlib import crc32 from zlib import crc32
from typing import Union from typing import Union
@ -52,20 +53,37 @@ class Scraper:
'User-Agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36' 'User-Agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'
} }
self.ixigua_api_headers = { self.ixigua_api_headers = {
'authority': 'ib.365yg.com', 'authority': 'ib.365yg.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9', 'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'no-cache', 'cache-control': 'no-cache',
'pragma': 'no-cache', 'pragma': 'no-cache',
'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"', 'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
'sec-ch-ua-mobile': '?0', 'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"', 'sec-ch-ua-platform': '"macOS"',
'sec-fetch-dest': 'document', 'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate', 'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none', 'sec-fetch-site': 'none',
'sec-fetch-user': '?1', 'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1', 'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36' 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
}
self.kuaishou_api_headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': 'kpf=PC_WEB; clientid=3; did=web_c5627223fe1e796669894e6cb74f1461; _ga=GA1.1.1139357938.1696318390; didv=1696329758000; _ga_0P9YPW1GQ3=GS1.1.1696659232.14.0.1696659232.0.0.0; kpn=KUAISHOU_VISION',
'Pragma': 'no-cache',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36',
'sec-ch-ua': '"Google Chrome";v="117", "Not;A=Brand";v="8", "Chromium";v="117"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
} }
# 判断配置文件是否存在/Check if the configuration file exists # 判断配置文件是否存在/Check if the configuration file exists
if os.path.exists('config.ini'): if os.path.exists('config.ini'):
@ -246,7 +264,6 @@ class Scraper:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.get(url, headers=self.ixigua_api_headers, proxy=self.proxies, allow_redirects=False, async with session.get(url, headers=self.ixigua_api_headers, proxy=self.proxies, allow_redirects=False,
timeout=10) as response: timeout=10) as response:
print("asdfasdf",response.headers)
if response.status == 302: if response.status == 302:
url = response.headers['Location'].split('?')[0] if '?' in response.headers[ url = response.headers['Location'].split('?')[0] if '?' in response.headers[
'Location'] else \ 'Location'] else \
@ -261,6 +278,40 @@ class Scraper:
else: else:
print('该链接为原始链接,无需转换,原始链接为: {}'.format(url)) print('该链接为原始链接,无需转换,原始链接为: {}'.format(url))
return url return url
# elif 'kuaishou.com' in url or 'v.kuaishou.com':
elif 'kuaishou.com' in url:
"""
快手视频链接类型(不全)
1. https://www.kuaishou.com/short-video/3xiqjrezhqjyzxw
2. https://v.kuaishou.com/75kDOJ
快手用户链接类型(不全)
1. https://www.kuaishou.com/profile/3xvgbyksme9f2p6
快手直播链接类型(不全)
1.https://live.kuaishou.com/u/3xv5uz3ui6iga5w
2.https://v.kuaishou.com/5Ch22V
"""
if 'v.kuaishou.com' in url:
print('正在通过快手分享链接获取原始链接...')
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=self.kuaishou_api_headers, proxy=self.proxies, allow_redirects=False,
timeout=10) as response:
if response.status == 302:
url = response.headers['Location'].split('?')[0] if '?' in response.headers[
'Location'] else \
response.headers['Location']
# https://v.m.chenzhongtech.com/fw/photo/3xburnkmj3auazc # 视频
# https://v.m.chenzhongtech.com/fw/live/3xi49whuspyupmq # 直播
print('获取原始链接成功, 原始链接为: {}'.format(url))
return url
except Exception as e:
print('获取原始链接失败!')
print(e)
# return None
raise e
else:
print('该链接为原始链接,无需转换,原始链接为: {}'.format(url))
return url
"""__________________________________________⬇Douyin methods(抖音方法)⬇______________________________________""" """__________________________________________⬇Douyin methods(抖音方法)⬇______________________________________"""
@ -480,7 +531,7 @@ class Scraper:
"""__________________________________________⬇xigua methods(xigua方法)⬇______________________________________""" """__________________________________________⬇xigua methods(xigua方法)⬇______________________________________"""
# 获取西瓜播放地址的接口 # 获取西瓜播放地址的接口
def get_xigua_json_url(self,video_id): def get_xigua_json_url(self,video_id):
# 获取json文件的地址 # 获取json文件的地址
r = str(random.random())[2:] r = str(random.random())[2:]
@ -545,6 +596,63 @@ class Scraper:
except Exception as e: except Exception as e:
raise ValueError(f'获取西瓜视频数据出错了:{e}') raise ValueError(f'获取西瓜视频数据出错了:{e}')
"""__________________________________________⬇kuaishou methods(kuaishou方法)⬇______________________________________"""
# 获取快手视频ID/Get xigua video ID cspr
async def get_kuaishou_video_id(self, original_url: str) -> Union[str, None]:
"""
获取视频id
:param original_url: 视频链接
:return: 视频id
"""
try:
# 转换链接/Convert link
original_url = await self.convert_share_urls(original_url)
# https://v.m.chenzhongtech.com/fw/photo/3xburnkmj3auazc # 视频
# https://v.m.chenzhongtech.com/fw/live/3xi49whuspyupmq # 直播
# https://c.kuaishou.com/fw/user/3xuw52inejca9tm # 作者主页
# 获取视频ID/Get video ID
if '/fw/photo/' in original_url:
video_id = re.findall('/fw/photo/(.*)', original_url)[0]
elif "short-video" in original_url:
video_id = re.findall('short-video/(.*)', original_url)[0]
# 返回视频ID/Return video ID
return video_id
except Exception as e:
raise ValueError(f'获取快手视频ID出错了:{e}')
@retry(stop=stop_after_attempt(4), wait=wait_fixed(7))
async def get_kuaishou_video_data(self, video_id: str) -> Union[dict, None]:
"""
获取单个视频信息
:param video_id: 视频id
:return: 视频信息
"""
print('正在获取快手视频数据...')
try:
# 构造访问链接/Construct the access link
video_url = f'https://www.kuaishou.com/short-video/{video_id}'
print("video_url",video_url)
print(f"正在获取视频数据API: {video_url}")
async with aiohttp.ClientSession() as session:
async with session.get(video_url, headers=self.kuaishou_api_headers, proxy=self.proxies,
timeout=10) as response:
response = await response.text()
video_data = re.findall('"photoH265Url":"(.*?)"',response)[0] # iud
if video_data:
video_data = video_data.encode().decode('raw_unicode-escape')
video_data = {
'status': 'success',
'message': "更多接口请查看(More API see): https://api.tikhub.io/",
'type': 'video',
'platform': '快手',
'video_url': video_data,
}
return video_data
except Exception as e:
raise ValueError(f'获取快手视频数据出错了:{e}')
"""__________________________________________⬇Hybrid methods(混合方法)⬇______________________________________""" """__________________________________________⬇Hybrid methods(混合方法)⬇______________________________________"""
# 判断链接平台/Judge link platform # 判断链接平台/Judge link platform
@ -555,6 +663,8 @@ class Scraper:
url_platform = 'bilibili' url_platform = 'bilibili'
elif 'xigua' in video_url: elif 'xigua' in video_url:
url_platform = 'xigua' url_platform = 'xigua'
elif 'kuaishou' in video_url:
url_platform = 'kuaishou'
elif 'tiktok' in video_url: elif 'tiktok' in video_url:
url_platform = 'tiktok' url_platform = 'tiktok'
else: else:
@ -568,7 +678,7 @@ class Scraper:
# 如果不是指定平台抛出异常/If it is not the specified platform, an exception is thrown # 如果不是指定平台抛出异常/If it is not the specified platform, an exception is thrown
if not url_platform: if not url_platform:
raise ValueError(f"链接**{video_url}**不是抖音、Bilibili、TikTok链接") raise ValueError(f"链接**{video_url}**不是抖音、Bilibili、西瓜、快手、TikTok链接")
print(f"正在解析**{url_platform}**视频链接...") print(f"正在解析**{url_platform}**视频链接...")
@ -577,6 +687,7 @@ class Scraper:
else await self.get_tiktok_video_id(video_url) if url_platform == 'tiktok' \ else await self.get_tiktok_video_id(video_url) if url_platform == 'tiktok' \
else await self.get_bilibili_video_id(video_url) if url_platform == 'bilibili' \ else await self.get_bilibili_video_id(video_url) if url_platform == 'bilibili' \
else await self.get_ixigua_video_id(video_url) if url_platform == 'xigua' \ else await self.get_ixigua_video_id(video_url) if url_platform == 'xigua' \
else await self.get_kuaishou_video_id(video_url) if url_platform == 'kuaishou' \
else None else None
# 如果获取不到视频ID抛出异常/If the video ID cannot be obtained, an exception is thrown # 如果获取不到视频ID抛出异常/If the video ID cannot be obtained, an exception is thrown
@ -589,6 +700,7 @@ class Scraper:
else await self.get_tiktok_video_data(video_id) if url_platform == 'tiktok' \ else await self.get_tiktok_video_data(video_id) if url_platform == 'tiktok' \
else await self.get_bilibili_video_data(video_id) if url_platform == 'bilibili' \ else await self.get_bilibili_video_data(video_id) if url_platform == 'bilibili' \
else await self.get_ixigua_video_data(video_id) if url_platform == 'xigua' \ else await self.get_ixigua_video_data(video_id) if url_platform == 'xigua' \
else await self.get_kuaishou_video_data(video_id) if url_platform == 'kuaishou' \
else None else None
if data: if data:
@ -601,6 +713,10 @@ class Scraper:
if url_platform == 'xigua': if url_platform == 'xigua':
print("获取西瓜视频数据成功!") print("获取西瓜视频数据成功!")
return data return data
# 如果是快说平台则返回视频数据/If it is a kuaishou platform, return video data
if url_platform == 'kuaishou':
print("获取快手视频数据成功!")
return data
# 如果是抖音/TikTok平台则继续进行数据解析/If it is a Douyin/TikTok platform, continue to parse the data # 如果是抖音/TikTok平台则继续进行数据解析/If it is a Douyin/TikTok platform, continue to parse the data
print(f"获取**{url_platform}**视频数据成功,正在判断数据类型...") print(f"获取**{url_platform}**视频数据成功,正在判断数据类型...")
@ -780,11 +896,19 @@ class Scraper:
"""__________________________________________⬇Test methods(测试方法)⬇______________________________________""" """__________________________________________⬇Test methods(测试方法)⬇______________________________________"""
async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili_url: str = None, _ixigua_url: str = None) -> None: async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili_url: str = None, _ixigua_url: str = None, _kuaishou_url: str = None) -> None:
# 异步测试/Async test # 异步测试/Async test
start_time = time.time() start_time = time.time()
print("<异步测试/Async test>") print("<异步测试/Async test>")
print('\n--------------------------------------------------')
print("正在测试异步获取快手视频ID方法...")
kuaishou_id = await api.get_kuaishou_video_id(_kuaishou_url)
print(f"快手视频ID: {kuaishou_id}")
print("正在测试异步获取快手视频数据方法...")
kuaishou_data = await api.get_kuaishou_video_data(kuaishou_id)
print(f"快手视频数据: {str(kuaishou_data)}")
print('\n--------------------------------------------------') print('\n--------------------------------------------------')
print("正在测试异步获取西瓜视频ID方法...") print("正在测试异步获取西瓜视频ID方法...")
ixigua_id = await api.get_ixigua_video_id(_ixigua_url) ixigua_id = await api.get_ixigua_video_id(_ixigua_url)
@ -816,14 +940,15 @@ async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili
print("正在测试异步获取TikTok视频数据方法...") print("正在测试异步获取TikTok视频数据方法...")
tiktok_data = await api.get_tiktok_video_data(tiktok_id) tiktok_data = await api.get_tiktok_video_data(tiktok_id)
print(f"TikTok视频数据: {str(tiktok_data)[:100]}") print(f"TikTok视频数据: {str(tiktok_data)[:100]}")
#
print('\n--------------------------------------------------') print('\n--------------------------------------------------')
print("正在测试异步混合解析方法...") print("正在测试异步混合解析方法...")
douyin_hybrid_data = await api.hybrid_parsing(_douyin_url) douyin_hybrid_data = await api.hybrid_parsing(_douyin_url)
tiktok_hybrid_data = await api.hybrid_parsing(_tiktok_url) tiktok_hybrid_data = await api.hybrid_parsing(_tiktok_url)
bilibili_hybrid_data = await api.hybrid_parsing(_bilibili_url) bilibili_hybrid_data = await api.hybrid_parsing(_bilibili_url)
xigua_hybrid_data = await api.hybrid_parsing(_ixigua_url) xigua_hybrid_data = await api.hybrid_parsing(_ixigua_url)
print(f"抖音、TikTok、哔哩哔哩、西瓜混合解析全部成功") kuaishou_hybrid_data = await api.hybrid_parsing(_kuaishou_url)
print(f"抖音、TikTok、哔哩哔哩、西瓜、快手快手混合解析全部成功")
print('\n--------------------------------------------------') print('\n--------------------------------------------------')
# 总耗时/Total time # 总耗时/Total time
@ -839,7 +964,8 @@ if __name__ == '__main__':
douyin_url = 'https://v.douyin.com/rLyrQxA/6.66' douyin_url = 'https://v.douyin.com/rLyrQxA/6.66'
tiktok_url = 'https://www.tiktok.com/@evil0ctal/video/7217027383390555438' tiktok_url = 'https://www.tiktok.com/@evil0ctal/video/7217027383390555438'
bilibili_url = "https://www.bilibili.com/video/BV1Th411x7ii/" bilibili_url = "https://www.bilibili.com/video/BV1Th411x7ii/"
ixigua_url = "https://www.ixigua.com/7270448082586698281" ixigua_url = "https://www.ixigua.com/7270448082586698281" # 短链接 "https://v.ixigua.com/ienrQ5bR/"
# ixigua_url = "ttps://v.ixigua.com/ienrQ5bR/" # convert_share_urls 这里有bug 如果抖音的口令解析的出来其他的都是none kuaishou_url = "https://www.kuaishou.com/short-video/3xiqjrezhqjyzxw" # 短链接 https://v.kuaishou.com/75kDOJ
asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url, _bilibili_url=bilibili_url, _ixigua_url=ixigua_url))
asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url, _bilibili_url=bilibili_url, _ixigua_url=ixigua_url,_kuaishou_url=kuaishou_url))