mirror of
https://github.com/Evil0ctal/Douyin_TikTok_Download_API.git
synced 2025-04-20 07:09:59 +08:00
😈 Add Douyin Web X-Bogus algorithm
This commit is contained in:
parent
9dc180631f
commit
f6513be80a
564
X-Bogus.js
Normal file
564
X-Bogus.js
Normal file
File diff suppressed because one or more lines are too long
@ -3,17 +3,21 @@ aiosignal==1.3.1
|
|||||||
anyio==3.6.2
|
anyio==3.6.2
|
||||||
async-timeout==4.0.2
|
async-timeout==4.0.2
|
||||||
attrs==22.2.0
|
attrs==22.2.0
|
||||||
|
Brotli==1.0.9
|
||||||
charset-normalizer==3.0.1
|
charset-normalizer==3.0.1
|
||||||
click==8.1.3
|
click==8.1.3
|
||||||
colorama==0.4.6
|
colorama==0.4.6
|
||||||
|
Deprecated==1.2.13
|
||||||
fastapi==0.92.0
|
fastapi==0.92.0
|
||||||
frozenlist==1.3.3
|
frozenlist==1.3.3
|
||||||
h11==0.14.0
|
h11==0.14.0
|
||||||
idna==3.4
|
idna==3.4
|
||||||
limits
|
limits==2.8.0
|
||||||
multidict==6.0.4
|
multidict==6.0.4
|
||||||
orjson==3.8.6
|
orjson==3.8.6
|
||||||
|
packaging==22.0
|
||||||
pydantic==1.10.5
|
pydantic==1.10.5
|
||||||
|
PyExecJS==1.5.1
|
||||||
pywebio==1.7.1
|
pywebio==1.7.1
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
slowapi==0.1.7
|
slowapi==0.1.7
|
||||||
@ -25,4 +29,5 @@ typing_extensions==4.5.0
|
|||||||
ua-parser==0.16.1
|
ua-parser==0.16.1
|
||||||
user-agents==2.2.0
|
user-agents==2.2.0
|
||||||
uvicorn==0.20.0
|
uvicorn==0.20.0
|
||||||
|
wrapt==1.15.0
|
||||||
yarl==1.8.2
|
yarl==1.8.2
|
||||||
|
87
scraper.py
87
scraper.py
@ -8,8 +8,7 @@
|
|||||||
# 核心代码,估值1块(๑•̀ㅂ•́)و✧
|
# 核心代码,估值1块(๑•̀ㅂ•́)و✧
|
||||||
# 用于爬取Douyin/TikTok数据并以字典形式返回。
|
# 用于爬取Douyin/TikTok数据并以字典形式返回。
|
||||||
# input link, output dictionary.
|
# input link, output dictionary.
|
||||||
|
import random
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
@ -23,7 +22,7 @@ import configparser
|
|||||||
|
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
# import execjs
|
import execjs
|
||||||
from tenacity import *
|
from tenacity import *
|
||||||
|
|
||||||
|
|
||||||
@ -66,10 +65,11 @@ class Scraper:
|
|||||||
self.headers = {
|
self.headers = {
|
||||||
'User-Agent': "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.66"
|
'User-Agent': "Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.66"
|
||||||
}
|
}
|
||||||
self.douyin_cookies = {
|
self.douyin_api_headers = {
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
|
'accept-encoding': 'gzip, deflate, br',
|
||||||
"Referer": "https://www.douyin.com/",
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
||||||
'Cookie': 'msToken=tsQyL2_m4XgtIij2GZfyu8XNXBfTGELdreF1jeIJTyktxMqf5MMIna8m1bv7zYz4pGLinNP2TvISbrzvFubLR8khwmAVLfImoWo3Ecnl_956MgOK9kOBdwM=; odin_tt=6db0a7d68fd2147ddaf4db0b911551e472d698d7b84a64a24cf07c49bdc5594b2fb7a42fd125332977218dd517a36ec3c658f84cebc6f806032eff34b36909607d5452f0f9d898810c369cd75fd5fb15; ttwid=1%7CfhiqLOzu_UksmD8_muF_TNvFyV909d0cw8CSRsmnbr0%7C1662368529%7C048a4e969ec3570e84a5faa3518aa7e16332cfc7fbcb789780135d33a34d94d2'
|
'referer': 'https://www.douyin.com/',
|
||||||
|
'cookie': 's_v_web_id=verify_leytkxgn_kvO5kOmO_SdMs_4t1o_B5ml_BUqtWM1mP6BF;'
|
||||||
}
|
}
|
||||||
self.tiktok_api_headers = {
|
self.tiktok_api_headers = {
|
||||||
'User-Agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'
|
'User-Agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'
|
||||||
@ -194,21 +194,35 @@ class Scraper:
|
|||||||
|
|
||||||
"""__________________________________________⬇️Douyin methods(抖音方法)⬇️______________________________________"""
|
"""__________________________________________⬇️Douyin methods(抖音方法)⬇️______________________________________"""
|
||||||
|
|
||||||
|
"""
|
||||||
|
Credits: https://github.com/Johnserf-Seed
|
||||||
|
[中文]
|
||||||
|
感谢John为本项目提供了非常多的帮助
|
||||||
|
大家可以去他的仓库点个star :)
|
||||||
|
顺便打个广告, 如果需要更稳定、快速、长期维护的抖音/TikTok API, 或者需要更多的数据(APP端),
|
||||||
|
请移步: https://api.tikhub.io
|
||||||
|
|
||||||
|
[English]
|
||||||
|
Thanks to John for providing a lot of help to this project
|
||||||
|
You can go to his repository and give him a star :)
|
||||||
|
By the way, if you need a more stable, fast and long-term maintenance Douyin/TikTok API, or need more data (APP side),
|
||||||
|
Please go to: https://api.tikhub.io
|
||||||
|
"""
|
||||||
|
|
||||||
# 生成抖音X-Bogus签名/Generate Douyin X-Bogus signature
|
# 生成抖音X-Bogus签名/Generate Douyin X-Bogus signature
|
||||||
# 暂时不可用,欢迎PR/Temporarily unavailable, welcome PR
|
# 下面的代码不能保证稳定性,随时可能失效/ The code below cannot guarantee stability and may fail at any time
|
||||||
# def generate_x_bogus(self, url: str) -> str:
|
def generate_x_bogus_url(self, url: str) -> str:
|
||||||
# """
|
"""
|
||||||
# 生成抖音X-Bogus签名
|
生成抖音X-Bogus签名
|
||||||
# :param url: 视频链接参数
|
:param url: 视频链接
|
||||||
# :return: X-Bogus签名
|
:return: 包含X-Bogus签名的URL
|
||||||
# """
|
"""
|
||||||
# # 调用JavaScript函数
|
# 调用JavaScript函数
|
||||||
# query = urllib.parse.urlparse(url).query
|
query = urllib.parse.urlparse(url).query
|
||||||
# xbogus = execjs.compile(open('./X-Bogus.js').read()).call('sign', query, self.douyin_cookies)
|
xbogus = execjs.compile(open('./X-Bogus.js').read()).call('sign', query, self.headers['User-Agent'])
|
||||||
# print('生成的X-Bogus签名为: {}'.format(xbogus))
|
print('生成的X-Bogus签名为: {}'.format(xbogus))
|
||||||
# new_url = url + "&X-Bogus=" + xbogus
|
new_url = url + "&X-Bogus=" + xbogus
|
||||||
# print(new_url)
|
return new_url
|
||||||
# return new_url
|
|
||||||
|
|
||||||
# 获取抖音视频ID/Get Douyin video ID
|
# 获取抖音视频ID/Get Douyin video ID
|
||||||
async def get_douyin_video_id(self, original_url: str) -> Union[str, None]:
|
async def get_douyin_video_id(self, original_url: str) -> Union[str, None]:
|
||||||
@ -250,32 +264,27 @@ class Scraper:
|
|||||||
|
|
||||||
# 获取单个抖音视频数据/Get single Douyin video data
|
# 获取单个抖音视频数据/Get single Douyin video data
|
||||||
@retry(stop=stop_after_attempt(4), wait=wait_fixed(7))
|
@retry(stop=stop_after_attempt(4), wait=wait_fixed(7))
|
||||||
async def get_douyin_video_data(self, video_id: str) -> Union[dict, None]:
|
async def get_douyin_video_data(self, video_id: str, s_v_web_id: str = None) -> Union[dict, None]:
|
||||||
"""
|
"""
|
||||||
:param video_id: str - 抖音视频id
|
:param video_id: str - 抖音视频id
|
||||||
|
:param s_v_web_id: str - Example: "s_v_web_id=verify_leytkxgn_kvO5kOmO_SdMs_4t1o_B5ml_BUqtWM1mP6BF; "
|
||||||
:return:dict - 包含信息的字典
|
:return:dict - 包含信息的字典
|
||||||
"""
|
"""
|
||||||
print('正在获取抖音视频数据...')
|
print('正在获取抖音视频数据...')
|
||||||
|
if s_v_web_id:
|
||||||
|
self.douyin_api_headers['cookie'] = s_v_web_id
|
||||||
try:
|
try:
|
||||||
# 构造访问链接/Construct the access link
|
# 构造访问链接/Construct the access link
|
||||||
"""
|
api_url = f"https://www.douyin.com/aweme/v1/web/aweme/detail/?device_platform=webapp&aid=6383&channel=channel_pc_web&aweme_id={video_id}&pc_client_type=1&version_code=190500&version_name=19.5.0&cookie_enabled=true&screen_width=1344&screen_height=756&browser_language=zh-CN&browser_platform=Win32&browser_name=Evil0ctal&browser_version=110.0&browser_online=true&engine_name=Gecko&engine_version=109.0&os_name=Windows&os_version=10&cpu_core_num=128&device_memory=2333&platform=PC&webid=7158288523463362079"
|
||||||
旧API已失效(2022年12月21日),请大家且用且珍惜。
|
api_url = self.generate_x_bogus_url(api_url)
|
||||||
api_url = f"https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={video_id}"
|
|
||||||
|
|
||||||
# 从安卓apk中提取到的新API,目前可用,支持视频,图集,笔记的解析(2022年12月25日)
|
|
||||||
api_url = f"https://www.iesdouyin.com/aweme/v1/web/aweme/detail/?aweme_id={video_id}&aid=1128&version_name=23.5.0&device_platform=android&os_version=2333&Github=Evil0ctal&words=FXXK_U_ByteDance"
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 先暂时使用这个API,后续会开源web端的API,我有可能会很忙来不及更新,欢迎大家PR(2023年3月6日)
|
|
||||||
api_url = f"https://api.tikhub.io/douyin_video_data/?video_id={video_id}"
|
|
||||||
|
|
||||||
# 访问API/Access API
|
# 访问API/Access API
|
||||||
print("正在获取视频数据API: {}".format(api_url))
|
print("正在获取视频数据API: {}".format(api_url))
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(api_url, headers=self.headers, proxy=self.proxies, timeout=10) as response:
|
async with session.get(api_url, headers=self.douyin_api_headers, proxy=self.proxies,
|
||||||
|
timeout=10) as response:
|
||||||
response = await response.json()
|
response = await response.json()
|
||||||
# 获取视频数据/Get video data
|
# 获取视频数据/Get video data
|
||||||
video_data = response['aweme_list'][0]
|
video_data = response['aweme_detail']
|
||||||
print('获取视频数据成功!')
|
print('获取视频数据成功!')
|
||||||
# print("抖音API返回数据: {}".format(video_data))
|
# print("抖音API返回数据: {}".format(video_data))
|
||||||
return video_data
|
return video_data
|
||||||
@ -295,7 +304,7 @@ class Scraper:
|
|||||||
# 访问API/Access API
|
# 访问API/Access API
|
||||||
print("正在获取视频数据API: {}".format(api_url))
|
print("正在获取视频数据API: {}".format(api_url))
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
async with session.get(api_url, headers=self.douyin_cookies, proxy=self.proxies,
|
async with session.get(api_url, headers=self.douyin_api_headers, proxy=self.proxies,
|
||||||
timeout=10) as response:
|
timeout=10) as response:
|
||||||
response = await response.json()
|
response = await response.json()
|
||||||
# 获取视频数据/Get video data
|
# 获取视频数据/Get video data
|
||||||
@ -662,6 +671,6 @@ if __name__ == '__main__':
|
|||||||
# 运行测试
|
# 运行测试
|
||||||
# params = "device_platform=webapp&aid=6383&channel=channel_pc_web&aweme_id=7153585499477757192&pc_client_type=1&version_code=190500&version_name=19.5.0&cookie_enabled=true&screen_width=1344&screen_height=756&browser_language=zh-CN&browser_platform=Win32&browser_name=Firefox&browser_version=110.0&browser_online=true&engine_name=Gecko&engine_version=109.0&os_name=Windows&os_version=10&cpu_core_num=16&device_memory=&platform=PC&webid=7158288523463362079"
|
# params = "device_platform=webapp&aid=6383&channel=channel_pc_web&aweme_id=7153585499477757192&pc_client_type=1&version_code=190500&version_name=19.5.0&cookie_enabled=true&screen_width=1344&screen_height=756&browser_language=zh-CN&browser_platform=Win32&browser_name=Firefox&browser_version=110.0&browser_online=true&engine_name=Gecko&engine_version=109.0&os_name=Windows&os_version=10&cpu_core_num=16&device_memory=&platform=PC&webid=7158288523463362079"
|
||||||
# api.generate_x_bogus(params)
|
# api.generate_x_bogus(params)
|
||||||
# douyin_url = 'https://v.douyin.com/rLyrQxA/6.66'
|
douyin_url = 'https://v.douyin.com/rLyrQxA/6.66'
|
||||||
# tiktok_url = 'https://vt.tiktok.com/ZSRwWXtdr/'
|
tiktok_url = 'https://vt.tiktok.com/ZSRwWXtdr/'
|
||||||
# asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url))
|
asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url))
|
||||||
|
@ -291,7 +291,7 @@ async def hybrid_parsing(request: Request, url: str, minimal: bool = False):
|
|||||||
# 获取抖音单个视频数据/Get Douyin single video data
|
# 获取抖音单个视频数据/Get Douyin single video data
|
||||||
@app.get("/douyin_video_data/", response_class=ORJSONResponse, response_model=API_Video_Response, tags=["Douyin"])
|
@app.get("/douyin_video_data/", response_class=ORJSONResponse, response_model=API_Video_Response, tags=["Douyin"])
|
||||||
@limiter.limit(Rate_Limit)
|
@limiter.limit(Rate_Limit)
|
||||||
async def get_douyin_video_data(request: Request, douyin_video_url: str = None, video_id: str = None):
|
async def get_douyin_video_data(request: Request, douyin_video_url: str = None, video_id: str = None, s_v_web_id: str = None):
|
||||||
"""
|
"""
|
||||||
## 用途/Usage
|
## 用途/Usage
|
||||||
- 获取抖音用户单个视频数据,参数是视频链接|分享口令
|
- 获取抖音用户单个视频数据,参数是视频链接|分享口令
|
||||||
@ -308,6 +308,11 @@ async def get_douyin_video_data(request: Request, douyin_video_url: str = None,
|
|||||||
- The video ID, can be obtained from the video link.
|
- The video ID, can be obtained from the video link.
|
||||||
- 例子/Example:
|
- 例子/Example:
|
||||||
`7153585499477757192`
|
`7153585499477757192`
|
||||||
|
#### s_v_web_id(选填/Optional):
|
||||||
|
- s_v_web_id,可以从浏览器访问抖音然后从cookie中获取。
|
||||||
|
- s_v_web_id, can be obtained from the browser to access Douyin and then from the cookie.
|
||||||
|
- 例子/Example:
|
||||||
|
`s_v_web_id=verify_leytkxgn_kvO5kOmO_SdMs_4t1o_B5ml_BUqtWM1mP6BF;`
|
||||||
#### 备注/Note:
|
#### 备注/Note:
|
||||||
- 参数`douyin_video_url`和`video_id`二选一即可,如果都填写,优先使用`video_id`以获得更快的响应速度。
|
- 参数`douyin_video_url`和`video_id`二选一即可,如果都填写,优先使用`video_id`以获得更快的响应速度。
|
||||||
- The parameters `douyin_video_url` and `video_id` can be selected, if both are filled in, the `video_id` is used first to get a faster response speed.
|
- The parameters `douyin_video_url` and `video_id` can be selected, if both are filled in, the `video_id` is used first to get a faster response speed.
|
||||||
@ -330,7 +335,7 @@ async def get_douyin_video_data(request: Request, douyin_video_url: str = None,
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
print('获取到的video_id数据:{}'.format(video_id))
|
print('获取到的video_id数据:{}'.format(video_id))
|
||||||
if video_id is not None:
|
if video_id is not None:
|
||||||
video_data = await api.get_douyin_video_data(video_id=video_id)
|
video_data = await api.get_douyin_video_data(video_id=video_id, s_v_web_id=s_v_web_id)
|
||||||
if video_data is None:
|
if video_data is None:
|
||||||
result = {
|
result = {
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user