mirror of
https://github.com/Evil0ctal/Douyin_TikTok_Download_API.git
synced 2025-04-08 00:05:44 +08:00
174 lines
7.5 KiB
Python
174 lines
7.5 KiB
Python
import asyncio
|
|
|
|
from crawlers.douyin.web.web_crawler import DouyinWebCrawler # 导入抖音Web爬虫
|
|
from crawlers.tiktok.web.web_crawler import TikTokWebCrawler # 导入TikTok Web爬虫
|
|
from crawlers.tiktok.app.app_crawler import TikTokAPPCrawler # 导入TikTok App爬虫
|
|
|
|
|
|
class HybridCrawler:
|
|
def __init__(self):
|
|
self.DouyinWebCrawler = DouyinWebCrawler()
|
|
self.TikTokWebCrawler = TikTokWebCrawler()
|
|
self.TikTokAPPCrawler = TikTokAPPCrawler()
|
|
|
|
async def hybrid_parsing_single_video(self, url: str, minimal: bool = False):
|
|
# 解析抖音视频/Parse Douyin video
|
|
if "douyin" in url:
|
|
platform = "douyin"
|
|
aweme_id = await self.DouyinWebCrawler.get_aweme_id(url)
|
|
data = await self.DouyinWebCrawler.fetch_one_video(aweme_id)
|
|
data = data.get("aweme_detail")
|
|
# $.aweme_detail.aweme_type
|
|
aweme_type = data.get("aweme_type")
|
|
# 解析TikTok视频/Parse TikTok video
|
|
elif "tiktok" in url:
|
|
platform = "tiktok"
|
|
aweme_id = await self.TikTokWebCrawler.get_aweme_id(url)
|
|
data = await self.TikTokAPPCrawler.fetch_one_video(aweme_id)
|
|
# $.aweme_type
|
|
aweme_type = data.get("aweme_type")
|
|
else:
|
|
raise ValueError("hybrid_parsing_single_video: Cannot judge the video source from the URL.")
|
|
|
|
# 检查是否需要返回最小数据/Check if minimal data is required
|
|
if not minimal:
|
|
return data
|
|
|
|
# 如果是最小数据,处理数据/If it is minimal data, process the data
|
|
url_type_code_dict = {
|
|
# common
|
|
0: 'video',
|
|
# Douyin
|
|
2: 'image',
|
|
4: 'video',
|
|
68: 'image',
|
|
# TikTok
|
|
51: 'video',
|
|
55: 'video',
|
|
58: 'video',
|
|
61: 'video',
|
|
150: 'image'
|
|
}
|
|
# 判断链接类型/Judge link type
|
|
url_type = url_type_code_dict.get(aweme_type, 'video')
|
|
|
|
"""
|
|
以下为(视频||图片)数据处理的四个方法,如果你需要自定义数据处理请在这里修改.
|
|
The following are four methods of (video || image) data processing.
|
|
If you need to customize data processing, please modify it here.
|
|
"""
|
|
|
|
"""
|
|
创建已知数据字典(索引相同),稍后使用.update()方法更新数据
|
|
Create a known data dictionary (index the same),
|
|
and then use the .update() method to update the data
|
|
"""
|
|
|
|
result_data = {
|
|
'type': url_type,
|
|
'platform': platform,
|
|
'aweme_id': aweme_id,
|
|
'desc': data.get("desc"),
|
|
'create_time': data.get("create_time"),
|
|
'author': data.get("author"),
|
|
'music': data.get("music"),
|
|
'statistics': data.get("statistics"),
|
|
'cover_data': {
|
|
'cover': data.get("video").get("cover"),
|
|
'origin_cover': data.get("video").get("origin_cover"),
|
|
'dynamic_cover': data.get("video").get("dynamic_cover")
|
|
},
|
|
'hashtags': data.get('text_extra'),
|
|
}
|
|
# 创建一个空变量,稍后使用.update()方法更新数据/Create an empty variable and use the .update() method to update the data
|
|
api_data = None
|
|
# 判断链接类型并处理数据/Judge link type and process data
|
|
# 抖音数据处理/Douyin data processing
|
|
if platform == 'douyin':
|
|
# 抖音视频数据处理/Douyin video data processing
|
|
if url_type == 'video':
|
|
# 将信息储存在字典中/Store information in a dictionary
|
|
uri = data['video']['play_addr']['uri']
|
|
wm_video_url_HQ = data['video']['play_addr']['url_list'][0]
|
|
wm_video_url = f"https://aweme.snssdk.com/aweme/v1/playwm/?video_id={uri}&radio=1080p&line=0"
|
|
nwm_video_url_HQ = wm_video_url_HQ.replace('playwm', 'play')
|
|
nwm_video_url = f"https://aweme.snssdk.com/aweme/v1/play/?video_id={uri}&ratio=1080p&line=0"
|
|
api_data = {
|
|
'video_data':
|
|
{
|
|
'wm_video_url': wm_video_url,
|
|
'wm_video_url_HQ': wm_video_url_HQ,
|
|
'nwm_video_url': nwm_video_url,
|
|
'nwm_video_url_HQ': nwm_video_url_HQ
|
|
}
|
|
}
|
|
# 抖音图片数据处理/Douyin image data processing
|
|
elif url_type == 'image':
|
|
# 无水印图片列表/No watermark image list
|
|
no_watermark_image_list = []
|
|
# 有水印图片列表/With watermark image list
|
|
watermark_image_list = []
|
|
# 遍历图片列表/Traverse image list
|
|
for i in data['images']:
|
|
no_watermark_image_list.append(i['url_list'][0])
|
|
watermark_image_list.append(i['download_url_list'][0])
|
|
api_data = {
|
|
'image_data':
|
|
{
|
|
'no_watermark_image_list': no_watermark_image_list,
|
|
'watermark_image_list': watermark_image_list
|
|
}
|
|
}
|
|
# TikTok数据处理/TikTok data processing
|
|
elif platform == 'tiktok':
|
|
# TikTok视频数据处理/TikTok video data processing
|
|
if url_type == 'video':
|
|
# 将信息储存在字典中/Store information in a dictionary
|
|
wm_video = data['video']['download_addr']['url_list'][0]
|
|
api_data = {
|
|
'video_data':
|
|
{
|
|
'wm_video_url': wm_video,
|
|
'wm_video_url_HQ': wm_video,
|
|
'nwm_video_url': data['video']['play_addr']['url_list'][0],
|
|
'nwm_video_url_HQ': data['video']['bit_rate'][0]['play_addr']['url_list'][0]
|
|
}
|
|
}
|
|
# TikTok图片数据处理/TikTok image data processing
|
|
elif url_type == 'image':
|
|
# 无水印图片列表/No watermark image list
|
|
no_watermark_image_list = []
|
|
# 有水印图片列表/With watermark image list
|
|
watermark_image_list = []
|
|
for i in data['image_post_info']['images']:
|
|
no_watermark_image_list.append(i['display_image']['url_list'][0])
|
|
watermark_image_list.append(i['owner_watermark_image']['url_list'][0])
|
|
api_data = {
|
|
'image_data':
|
|
{
|
|
'no_watermark_image_list': no_watermark_image_list,
|
|
'watermark_image_list': watermark_image_list
|
|
}
|
|
}
|
|
# 更新数据/Update data
|
|
result_data.update(api_data)
|
|
return result_data
|
|
|
|
async def main(self):
|
|
# 测试混合解析单一视频接口/Test hybrid parsing single video endpoint
|
|
# url = "https://v.douyin.com/L4FJNR3/"
|
|
url = "https://www.tiktok.com/@evil0ctal/video/7156033831819037994"
|
|
minimal = True
|
|
result = await self.hybrid_parsing_single_video(url, minimal=minimal)
|
|
print(result)
|
|
|
|
# 占位
|
|
pass
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# 实例化混合爬虫/Instantiate hybrid crawler
|
|
hybird_crawler = HybridCrawler()
|
|
# 运行测试代码/Run test code
|
|
asyncio.run(hybird_crawler.main())
|