mirror of
https://github.com/Evil0ctal/Douyin_TikTok_Download_API.git
synced 2025-04-20 11:35:01 +08:00
1,添加了西瓜解析2,修改了readme/readme.en3,BUG convert_share_urls 这里有bug。 如果抖音的口令解析的出来其他的都是none,还没有好的解决方法,我想简单的用if判断,但这样会不会有其他新但平台加进来感觉这样处理有点太简单了
This commit is contained in:
parent
0d75d270d9
commit
4581d6cb6a
12
.idea/Douyin_TikTok_Download_API.iml
generated
Normal file
12
.idea/Douyin_TikTok_Download_API.iml
generated
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="PyDocumentationSettings">
|
||||||
|
<option name="format" value="PLAIN" />
|
||||||
|
<option name="myDocStringFormat" value="Plain" />
|
||||||
|
</component>
|
||||||
|
</module>
|
37
.idea/inspectionProfiles/Project_Default.xml
generated
Normal file
37
.idea/inspectionProfiles/Project_Default.xml
generated
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
|
||||||
|
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredPackages">
|
||||||
|
<value>
|
||||||
|
<list size="15">
|
||||||
|
<item index="0" class="java.lang.String" itemvalue="moviepy" />
|
||||||
|
<item index="1" class="java.lang.String" itemvalue="requests" />
|
||||||
|
<item index="2" class="java.lang.String" itemvalue="scipy" />
|
||||||
|
<item index="3" class="java.lang.String" itemvalue="matplotlib" />
|
||||||
|
<item index="4" class="java.lang.String" itemvalue="psycopg2" />
|
||||||
|
<item index="5" class="java.lang.String" itemvalue="pydub" />
|
||||||
|
<item index="6" class="java.lang.String" itemvalue="PyAudio" />
|
||||||
|
<item index="7" class="java.lang.String" itemvalue="numpy" />
|
||||||
|
<item index="8" class="java.lang.String" itemvalue="mysql-connector-python" />
|
||||||
|
<item index="9" class="java.lang.String" itemvalue="wincertstore" />
|
||||||
|
<item index="10" class="java.lang.String" itemvalue="retrying" />
|
||||||
|
<item index="11" class="java.lang.String" itemvalue="mitmproxy" />
|
||||||
|
<item index="12" class="java.lang.String" itemvalue="PyMySQL" />
|
||||||
|
<item index="13" class="java.lang.String" itemvalue="Requests" />
|
||||||
|
<item index="14" class="java.lang.String" itemvalue="redis" />
|
||||||
|
</list>
|
||||||
|
</value>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredErrors">
|
||||||
|
<list>
|
||||||
|
<option value="N803" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="TsLint" enabled="true" level="WARNING" enabled_by_default="true" />
|
||||||
|
</profile>
|
||||||
|
</component>
|
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
7
.idea/misc.xml
generated
Normal file
7
.idea/misc.xml
generated
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="JavaScriptSettings">
|
||||||
|
<option name="languageLevel" value="ES6" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (Douyin_TikTok_Download_API)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/Douyin_TikTok_Download_API.iml" filepath="$PROJECT_DIR$/.idea/Douyin_TikTok_Download_API.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
147
.idea/workspace.xml
generated
Normal file
147
.idea/workspace.xml
generated
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="BranchesTreeState">
|
||||||
|
<expand>
|
||||||
|
<path>
|
||||||
|
<item name="ROOT" type="e8cecc67:BranchNodeDescriptor" />
|
||||||
|
<item name="LOCAL_ROOT" type="e8cecc67:BranchNodeDescriptor" />
|
||||||
|
</path>
|
||||||
|
<path>
|
||||||
|
<item name="ROOT" type="e8cecc67:BranchNodeDescriptor" />
|
||||||
|
<item name="REMOTE_ROOT" type="e8cecc67:BranchNodeDescriptor" />
|
||||||
|
</path>
|
||||||
|
<path>
|
||||||
|
<item name="ROOT" type="e8cecc67:BranchNodeDescriptor" />
|
||||||
|
<item name="REMOTE_ROOT" type="e8cecc67:BranchNodeDescriptor" />
|
||||||
|
<item name="GROUP_NODE:origin" type="e8cecc67:BranchNodeDescriptor" />
|
||||||
|
</path>
|
||||||
|
</expand>
|
||||||
|
<select />
|
||||||
|
</component>
|
||||||
|
<component name="ChangeListManager">
|
||||||
|
<list default="true" id="1927aa06-6b5f-4740-b2ae-70a32c544f86" name="Default Changelist" comment="" />
|
||||||
|
<option name="SHOW_DIALOG" value="false" />
|
||||||
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||||
|
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||||
|
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||||
|
</component>
|
||||||
|
<component name="Git.Settings">
|
||||||
|
<option name="RECENT_BRANCH_BY_REPOSITORY">
|
||||||
|
<map>
|
||||||
|
<entry key="$PROJECT_DIR$" value="xigua" />
|
||||||
|
</map>
|
||||||
|
</option>
|
||||||
|
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectId" id="2VqkuxviM0uPte5GQr4yI8Qa8JC" />
|
||||||
|
<component name="ProjectViewState">
|
||||||
|
<option name="hideEmptyMiddlePackages" value="true" />
|
||||||
|
<option name="showLibraryContents" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="PropertiesComponent">
|
||||||
|
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
|
||||||
|
<property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
|
||||||
|
<property name="WebServerToolWindowFactoryState" value="false" />
|
||||||
|
</component>
|
||||||
|
<component name="RunManager">
|
||||||
|
<configuration name="scraper" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||||
|
<module name="Douyin_TikTok_Download_API" />
|
||||||
|
<option name="INTERPRETER_OPTIONS" value="" />
|
||||||
|
<option name="PARENT_ENVS" value="true" />
|
||||||
|
<envs>
|
||||||
|
<env name="PYTHONUNBUFFERED" value="1" />
|
||||||
|
</envs>
|
||||||
|
<option name="SDK_HOME" value="" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
||||||
|
<option name="IS_MODULE_SDK" value="true" />
|
||||||
|
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||||
|
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||||
|
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||||
|
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/scraper.py" />
|
||||||
|
<option name="PARAMETERS" value="" />
|
||||||
|
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||||
|
<option name="EMULATE_TERMINAL" value="false" />
|
||||||
|
<option name="MODULE_MODE" value="false" />
|
||||||
|
<option name="REDIRECT_INPUT" value="false" />
|
||||||
|
<option name="INPUT_FILE" value="" />
|
||||||
|
<method v="2" />
|
||||||
|
</configuration>
|
||||||
|
<recent_temporary>
|
||||||
|
<list>
|
||||||
|
<item itemvalue="Python.scraper" />
|
||||||
|
</list>
|
||||||
|
</recent_temporary>
|
||||||
|
</component>
|
||||||
|
<component name="SvnConfiguration">
|
||||||
|
<configuration />
|
||||||
|
</component>
|
||||||
|
<component name="TaskManager">
|
||||||
|
<task active="true" id="Default" summary="Default task">
|
||||||
|
<changelist id="1927aa06-6b5f-4740-b2ae-70a32c544f86" name="Default Changelist" comment="" />
|
||||||
|
<created>1695571223199</created>
|
||||||
|
<option name="number" value="Default" />
|
||||||
|
<option name="presentableId" value="Default" />
|
||||||
|
<updated>1695571223199</updated>
|
||||||
|
<workItem from="1695571224589" duration="834000" />
|
||||||
|
<workItem from="1695638899275" duration="6328000" />
|
||||||
|
<workItem from="1695654217519" duration="164000" />
|
||||||
|
</task>
|
||||||
|
<task id="LOCAL-00001" summary="1,添加西瓜视频解析 2,添加了readme/readme.en里关于西瓜样例 3, BUG convert_share_urls 这里有bug,如果抖音的口令解析的出来其他的都是none,这里我看你的逻辑我没动。我想就简单的判断是不是哔哩还是西瓜但这样会不会有点不好,将来有其他平台这里还得改,我在想有没有更简单的方案。">
|
||||||
|
<created>1695652986133</created>
|
||||||
|
<option name="number" value="00001" />
|
||||||
|
<option name="presentableId" value="LOCAL-00001" />
|
||||||
|
<option name="project" value="LOCAL" />
|
||||||
|
<updated>1695652986133</updated>
|
||||||
|
</task>
|
||||||
|
<option name="localTasksCounter" value="2" />
|
||||||
|
<servers />
|
||||||
|
</component>
|
||||||
|
<component name="TypeScriptGeneratedFilesManager">
|
||||||
|
<option name="version" value="2" />
|
||||||
|
</component>
|
||||||
|
<component name="Vcs.Log.Tabs.Properties">
|
||||||
|
<option name="TAB_STATES">
|
||||||
|
<map>
|
||||||
|
<entry key="MAIN">
|
||||||
|
<value>
|
||||||
|
<State />
|
||||||
|
</value>
|
||||||
|
</entry>
|
||||||
|
</map>
|
||||||
|
</option>
|
||||||
|
<option name="oldMeFiltersMigrated" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="VcsManagerConfiguration">
|
||||||
|
<MESSAGE value="1,添加西瓜视频解析 2,添加了readme/readme.en里关于西瓜样例 3, BUG convert_share_urls 这里有bug,如果抖音的口令解析的出来其他的都是none,这里我看你的逻辑我没动。我想就简单的判断是不是哔哩还是西瓜但这样会不会有点不好,将来有其他平台这里还得改,我在想有没有更简单的方案。" />
|
||||||
|
<option name="LAST_COMMIT_MESSAGE" value="1,添加西瓜视频解析 2,添加了readme/readme.en里关于西瓜样例 3, BUG convert_share_urls 这里有bug,如果抖音的口令解析的出来其他的都是none,这里我看你的逻辑我没动。我想就简单的判断是不是哔哩还是西瓜但这样会不会有点不好,将来有其他平台这里还得改,我在想有没有更简单的方案。" />
|
||||||
|
</component>
|
||||||
|
<component name="WindowStateProjectService">
|
||||||
|
<state width="1750" height="405" key="GridCell.Tab.0.bottom" timestamp="1695654184489">
|
||||||
|
<screen x="0" y="0" width="1792" height="1120" />
|
||||||
|
</state>
|
||||||
|
<state width="1750" height="405" key="GridCell.Tab.0.bottom/0.0.1792.1120@0.0.1792.1120" timestamp="1695654184489" />
|
||||||
|
<state width="1750" height="405" key="GridCell.Tab.0.center" timestamp="1695654184487">
|
||||||
|
<screen x="0" y="0" width="1792" height="1120" />
|
||||||
|
</state>
|
||||||
|
<state width="1750" height="405" key="GridCell.Tab.0.center/0.0.1792.1120@0.0.1792.1120" timestamp="1695654184487" />
|
||||||
|
<state width="1750" height="405" key="GridCell.Tab.0.left" timestamp="1695654184487">
|
||||||
|
<screen x="0" y="0" width="1792" height="1120" />
|
||||||
|
</state>
|
||||||
|
<state width="1750" height="405" key="GridCell.Tab.0.left/0.0.1792.1120@0.0.1792.1120" timestamp="1695654184487" />
|
||||||
|
<state width="1750" height="405" key="GridCell.Tab.0.right" timestamp="1695654184488">
|
||||||
|
<screen x="0" y="0" width="1792" height="1120" />
|
||||||
|
</state>
|
||||||
|
<state width="1750" height="405" key="GridCell.Tab.0.right/0.0.1792.1120@0.0.1792.1120" timestamp="1695654184488" />
|
||||||
|
<state x="581" y="280" key="RollbackChangesDialog" timestamp="1695654162351">
|
||||||
|
<screen x="0" y="0" width="1792" height="1120" />
|
||||||
|
</state>
|
||||||
|
<state x="581" y="280" key="RollbackChangesDialog/0.0.1792.1120@0.0.1792.1120" timestamp="1695654162351" />
|
||||||
|
<state x="496" y="297" key="Vcs.Push.Dialog.v2" timestamp="1695653575453">
|
||||||
|
<screen x="0" y="0" width="1792" height="1120" />
|
||||||
|
</state>
|
||||||
|
<state x="496" y="297" key="Vcs.Push.Dialog.v2/0.0.1792.1120@0.0.1792.1120" timestamp="1695653575453" />
|
||||||
|
</component>
|
||||||
|
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
||||||
|
<SUITE FILE_PATH="coverage/Douyin_TikTok_Download_API$scraper.coverage" NAME="scraper Coverage Results" MODIFIED="1695652653592" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
|
||||||
|
</component>
|
||||||
|
</project>
|
@ -81,6 +81,7 @@ _Download prohibited videos, perform data analysis, download without watermark o
|
|||||||
|
|
||||||
- Douyin (overseas version of Douyin: TikTok) video/picture analysis
|
- Douyin (overseas version of Douyin: TikTok) video/picture analysis
|
||||||
- Bilibili video analysis
|
- Bilibili video analysis
|
||||||
|
- Xigua video analysis
|
||||||
- Batch analysis on the web page (supports Douyin/TikTok mixed submission)
|
- Batch analysis on the web page (supports Douyin/TikTok mixed submission)
|
||||||
- Batch download of non-watermarked videos from the web parsing result page (removed for V3.X and above versions, please deploy V2.X version by yourself)
|
- Batch download of non-watermarked videos from the web parsing result page (removed for V3.X and above versions, please deploy V2.X version by yourself)
|
||||||
- API call to get link data
|
- API call to get link data
|
||||||
@ -126,6 +127,13 @@ asyncio.run(hybrid_parsing(url=input("Paste Douyin/TikTok/Bilibili share URL her
|
|||||||
|
|
||||||
> 💡Tip: Including but not limited to the following examples, if you encounter link parsing failure, please open a new one[issue](https://github.com/Evil0ctal/Douyin_TikTok_Download_API/issues)
|
> 💡Tip: Including but not limited to the following examples, if you encounter link parsing failure, please open a new one[issue](https://github.com/Evil0ctal/Douyin_TikTok_Download_API/issues)
|
||||||
|
|
||||||
|
- Xigua video link
|
||||||
|
|
||||||
|
```text
|
||||||
|
https://www.ixigua.com/7270448082586698281/
|
||||||
|
https://m.ixigua.com/video/7274710134306112054/
|
||||||
|
```
|
||||||
|
|
||||||
- Bilibili video link
|
- Bilibili video link
|
||||||
|
|
||||||
```text
|
```text
|
||||||
|
@ -102,6 +102,7 @@
|
|||||||
|
|
||||||
- 抖音(抖音海外版: TikTok)视频/图片解析
|
- 抖音(抖音海外版: TikTok)视频/图片解析
|
||||||
- Bilibili视频解析
|
- Bilibili视频解析
|
||||||
|
- 西瓜视频解析
|
||||||
- 网页端批量解析(支持抖音/TikTok混合提交)
|
- 网页端批量解析(支持抖音/TikTok混合提交)
|
||||||
- 网页端解析结果页批量下载无水印视频(V3.X以上版本移除,请自行部署V2.X版本)
|
- 网页端解析结果页批量下载无水印视频(V3.X以上版本移除,请自行部署V2.X版本)
|
||||||
- API调用获取链接数据
|
- API调用获取链接数据
|
||||||
@ -147,6 +148,13 @@ asyncio.run(hybrid_parsing(url=input("Paste Douyin/TikTok/Bilibili share URL her
|
|||||||
|
|
||||||
> 💡提示:包含但不仅限于以下例子,如果遇到链接解析失败请开启一个新 [issue](https://github.com/Evil0ctal/Douyin_TikTok_Download_API/issues)
|
> 💡提示:包含但不仅限于以下例子,如果遇到链接解析失败请开启一个新 [issue](https://github.com/Evil0ctal/Douyin_TikTok_Download_API/issues)
|
||||||
|
|
||||||
|
- 西瓜视频链接
|
||||||
|
|
||||||
|
```text
|
||||||
|
https://www.ixigua.com/7270448082586698281/
|
||||||
|
https://m.ixigua.com/video/7274710134306112054/
|
||||||
|
```
|
||||||
|
|
||||||
- Bilibili视频链接
|
- Bilibili视频链接
|
||||||
|
|
||||||
```text
|
```text
|
||||||
|
146
scraper.py
146
scraper.py
@ -10,7 +10,7 @@
|
|||||||
# If this project is helpful to you, please give me a star, thank you!
|
# If this project is helpful to you, please give me a star, thank you!
|
||||||
# @备注:
|
# @备注:
|
||||||
# 核心代码,估值1块(๑•̀ㅂ•́)و✧
|
# 核心代码,估值1块(๑•̀ㅂ•́)و✧
|
||||||
# 用于爬取Douyin/TikTok/Bilibili的数据并以字典形式返回。
|
# 用于爬取Douyin/TikTok/Bilibili/xigua的数据并以字典形式返回。
|
||||||
# 如果本项目对您有帮助,请给我一个star,谢谢!
|
# 如果本项目对您有帮助,请给我一个star,谢谢!
|
||||||
|
|
||||||
import re
|
import re
|
||||||
@ -23,7 +23,9 @@ import asyncio
|
|||||||
import traceback
|
import traceback
|
||||||
import configparser
|
import configparser
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import random
|
||||||
|
|
||||||
|
from zlib import crc32
|
||||||
from typing import Union
|
from typing import Union
|
||||||
from tenacity import *
|
from tenacity import *
|
||||||
|
|
||||||
@ -47,6 +49,22 @@ class Scraper:
|
|||||||
self.bilibili_api_headers = {
|
self.bilibili_api_headers = {
|
||||||
'User-Agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'
|
'User-Agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36'
|
||||||
}
|
}
|
||||||
|
self.ixigua_api_headers = {
|
||||||
|
'authority': 'ib.365yg.com',
|
||||||
|
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||||
|
'accept-language': 'zh-CN,zh;q=0.9',
|
||||||
|
'cache-control': 'no-cache',
|
||||||
|
'pragma': 'no-cache',
|
||||||
|
'sec-ch-ua': '"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"',
|
||||||
|
'sec-ch-ua-mobile': '?0',
|
||||||
|
'sec-ch-ua-platform': '"macOS"',
|
||||||
|
'sec-fetch-dest': 'document',
|
||||||
|
'sec-fetch-mode': 'navigate',
|
||||||
|
'sec-fetch-site': 'none',
|
||||||
|
'sec-fetch-user': '?1',
|
||||||
|
'upgrade-insecure-requests': '1',
|
||||||
|
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
|
||||||
|
}
|
||||||
# 判断配置文件是否存在/Check if the configuration file exists
|
# 判断配置文件是否存在/Check if the configuration file exists
|
||||||
if os.path.exists('config.ini'):
|
if os.path.exists('config.ini'):
|
||||||
self.config = configparser.ConfigParser()
|
self.config = configparser.ConfigParser()
|
||||||
@ -210,6 +228,37 @@ class Scraper:
|
|||||||
else:
|
else:
|
||||||
print('该链接为原始链接,无需转换,原始链接为: {}'.format(url))
|
print('该链接为原始链接,无需转换,原始链接为: {}'.format(url))
|
||||||
return url
|
return url
|
||||||
|
elif 'ixigua.com' in url:
|
||||||
|
"""
|
||||||
|
西瓜视频链接类型(不全):
|
||||||
|
1. https://v.ixigua.com/ienrQ5bR/
|
||||||
|
2. https://www.ixigua.com/7270448082586698281
|
||||||
|
3. https://m.ixigua.com/video/7270448082586698281
|
||||||
|
西瓜用户链接类型(不全):
|
||||||
|
1. https://www.ixigua.com/home/3189050062678823
|
||||||
|
西瓜直播链接类型(不全):
|
||||||
|
"""
|
||||||
|
if 'v.ixigua.com' in url:
|
||||||
|
print('正在通过西瓜分享链接获取原始链接...')
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(url, headers=self.ixigua_api_headers, proxy=self.proxies, allow_redirects=False,
|
||||||
|
timeout=10) as response:
|
||||||
|
print("asdfasdf",response.headers)
|
||||||
|
if response.status == 302:
|
||||||
|
url = response.headers['Location'].split('?')[0] if '?' in response.headers[
|
||||||
|
'Location'] else \
|
||||||
|
response.headers['Location']
|
||||||
|
print('获取原始链接成功, 原始链接为: {}'.format(url))
|
||||||
|
return url
|
||||||
|
except Exception as e:
|
||||||
|
print('获取原始链接失败!')
|
||||||
|
print(e)
|
||||||
|
# return None
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
print('该链接为原始链接,无需转换,原始链接为: {}'.format(url))
|
||||||
|
return url
|
||||||
|
|
||||||
"""__________________________________________⬇️Douyin methods(抖音方法)⬇️______________________________________"""
|
"""__________________________________________⬇️Douyin methods(抖音方法)⬇️______________________________________"""
|
||||||
|
|
||||||
@ -365,7 +414,7 @@ class Scraper:
|
|||||||
|
|
||||||
"""__________________________________________⬇️bilibili methods(Bilibili方法)⬇️______________________________________"""
|
"""__________________________________________⬇️bilibili methods(Bilibili方法)⬇️______________________________________"""
|
||||||
|
|
||||||
# 获取TikTok视频ID/Get TikTok video ID
|
# 获取bilibili视频ID/Get BiliBili video ID
|
||||||
async def get_bilibili_video_id(self, original_url: str) -> Union[str, None]:
|
async def get_bilibili_video_id(self, original_url: str) -> Union[str, None]:
|
||||||
"""
|
"""
|
||||||
获取视频id
|
获取视频id
|
||||||
@ -427,6 +476,73 @@ class Scraper:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f'获取BiliBili视频数据出错了:{e}')
|
raise ValueError(f'获取BiliBili视频数据出错了:{e}')
|
||||||
|
|
||||||
|
|
||||||
|
"""__________________________________________⬇️xigua methods(xigua方法)⬇️______________________________________"""
|
||||||
|
# 获取西瓜拿播放地址的接口
|
||||||
|
def get_xigua_json_url(self,video_id):
|
||||||
|
# 获取json文件的地址
|
||||||
|
r = str(random.random())[2:]
|
||||||
|
url_part = "/video/urls/v/1/toutiao/mp4/{}?r={}".format(video_id, r)
|
||||||
|
s = crc32(url_part.encode())
|
||||||
|
json_url = "https://ib.365yg.com{}&s={}&nobase64=true".format(url_part, s)
|
||||||
|
return json_url
|
||||||
|
# 获取西瓜视频ID/Get xigua video ID
|
||||||
|
async def get_ixigua_video_id(self, original_url: str) -> Union[str, None]:
|
||||||
|
"""
|
||||||
|
获取视频id
|
||||||
|
:param original_url: 视频链接
|
||||||
|
:return: 视频id
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 转换链接/Convert link
|
||||||
|
original_url = await self.convert_share_urls(original_url)
|
||||||
|
# 获取视频ID/Get video ID
|
||||||
|
if 'www.ixigua.com/' in original_url:
|
||||||
|
video_id = re.findall('ixigua\.com/(\d+)', original_url)[0]
|
||||||
|
elif 'm.ixigua.com/video' in original_url:
|
||||||
|
video_id = re.findall('/video/(\d+)', original_url)[0]
|
||||||
|
# 返回视频ID/Return video ID
|
||||||
|
return video_id
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f'获取西瓜视频ID出错了:{e}')
|
||||||
|
|
||||||
|
@retry(stop=stop_after_attempt(4), wait=wait_fixed(7))
|
||||||
|
async def get_ixigua_video_data(self, video_id: str) -> Union[dict, None]:
|
||||||
|
"""
|
||||||
|
获取单个视频信息
|
||||||
|
:param video_id: 视频id
|
||||||
|
:return: 视频信息
|
||||||
|
"""
|
||||||
|
print('正在获取西瓜视频数据...')
|
||||||
|
try:
|
||||||
|
# 构造访问链接/Construct the access link
|
||||||
|
video_url = f'https://m.ixigua.com/video/{video_id}?wid_try=1'
|
||||||
|
print("video_url",video_url)
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(video_url, headers=self.ixigua_api_headers, proxy=self.proxies,
|
||||||
|
timeout=10) as response:
|
||||||
|
response = await response.text()
|
||||||
|
search = re.search("\"vid\":\"([^\"]+)\",", response)
|
||||||
|
vid = search.group(1)
|
||||||
|
print('获取视频vid信息成功!')
|
||||||
|
play_url_api = self.get_xigua_json_url(vid)
|
||||||
|
print(f"正在获取视频数据API: {play_url_api}")
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(play_url_api, headers=self.ixigua_api_headers, proxy=self.proxies,
|
||||||
|
timeout=10) as response:
|
||||||
|
response = await response.json()
|
||||||
|
video_data = response.get("data",{}).get("video_list",{}).get("video_3",{}).get("main_url","")
|
||||||
|
video_data = {
|
||||||
|
'status': 'success',
|
||||||
|
'message': "更多接口请查看(More API see): https://api.tikhub.io/",
|
||||||
|
'type': 'video',
|
||||||
|
'platform': '西瓜',
|
||||||
|
'video_url': video_data,
|
||||||
|
}
|
||||||
|
return video_data
|
||||||
|
except Exception as e:
|
||||||
|
raise ValueError(f'获取西瓜视频数据出错了:{e}')
|
||||||
|
|
||||||
"""__________________________________________⬇️Hybrid methods(混合方法)⬇️______________________________________"""
|
"""__________________________________________⬇️Hybrid methods(混合方法)⬇️______________________________________"""
|
||||||
|
|
||||||
# 判断链接平台/Judge link platform
|
# 判断链接平台/Judge link platform
|
||||||
@ -435,6 +551,8 @@ class Scraper:
|
|||||||
url_platform = 'douyin'
|
url_platform = 'douyin'
|
||||||
elif 'bilibili' in video_url:
|
elif 'bilibili' in video_url:
|
||||||
url_platform = 'bilibili'
|
url_platform = 'bilibili'
|
||||||
|
elif 'xigua' in video_url:
|
||||||
|
url_platform = 'xigua'
|
||||||
elif 'tiktok' in video_url:
|
elif 'tiktok' in video_url:
|
||||||
url_platform = 'tiktok'
|
url_platform = 'tiktok'
|
||||||
else:
|
else:
|
||||||
@ -456,6 +574,7 @@ class Scraper:
|
|||||||
video_id = await self.get_douyin_video_id(video_url) if url_platform == 'douyin' \
|
video_id = await self.get_douyin_video_id(video_url) if url_platform == 'douyin' \
|
||||||
else await self.get_tiktok_video_id(video_url) if url_platform == 'tiktok' \
|
else await self.get_tiktok_video_id(video_url) if url_platform == 'tiktok' \
|
||||||
else await self.get_bilibili_video_id(video_url) if url_platform == 'bilibili' \
|
else await self.get_bilibili_video_id(video_url) if url_platform == 'bilibili' \
|
||||||
|
else await self.get_ixigua_video_id(video_url) if url_platform == 'xigua' \
|
||||||
else None
|
else None
|
||||||
|
|
||||||
# 如果获取不到视频ID抛出异常/If the video ID cannot be obtained, an exception is thrown
|
# 如果获取不到视频ID抛出异常/If the video ID cannot be obtained, an exception is thrown
|
||||||
@ -467,6 +586,7 @@ class Scraper:
|
|||||||
data = await self.get_douyin_video_data(video_id) if url_platform == 'douyin' \
|
data = await self.get_douyin_video_data(video_id) if url_platform == 'douyin' \
|
||||||
else await self.get_tiktok_video_data(video_id) if url_platform == 'tiktok' \
|
else await self.get_tiktok_video_data(video_id) if url_platform == 'tiktok' \
|
||||||
else await self.get_bilibili_video_data(video_id) if url_platform == 'bilibili' \
|
else await self.get_bilibili_video_data(video_id) if url_platform == 'bilibili' \
|
||||||
|
else await self.get_ixigua_video_data(video_id) if url_platform == 'xigua' \
|
||||||
else None
|
else None
|
||||||
|
|
||||||
if data:
|
if data:
|
||||||
@ -475,6 +595,10 @@ class Scraper:
|
|||||||
if url_platform == 'bilibili':
|
if url_platform == 'bilibili':
|
||||||
print("获取Bilibili视频数据成功!")
|
print("获取Bilibili视频数据成功!")
|
||||||
return data
|
return data
|
||||||
|
# 如果是西瓜平台则返回视频数据/If it is a ixigua platform, return video data
|
||||||
|
if url_platform == 'xigua':
|
||||||
|
print("获取西瓜视频数据成功!")
|
||||||
|
return data
|
||||||
|
|
||||||
# 如果是抖音/TikTok平台则继续进行数据解析/If it is a Douyin/TikTok platform, continue to parse the data
|
# 如果是抖音/TikTok平台则继续进行数据解析/If it is a Douyin/TikTok platform, continue to parse the data
|
||||||
print(f"获取**{url_platform}**视频数据成功,正在判断数据类型...")
|
print(f"获取**{url_platform}**视频数据成功,正在判断数据类型...")
|
||||||
@ -654,11 +778,19 @@ class Scraper:
|
|||||||
"""__________________________________________⬇️Test methods(测试方法)⬇️______________________________________"""
|
"""__________________________________________⬇️Test methods(测试方法)⬇️______________________________________"""
|
||||||
|
|
||||||
|
|
||||||
async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili_url: str = None) -> None:
|
async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili_url: str = None, _ixigua_url: str = None) -> None:
|
||||||
# 异步测试/Async test
|
# 异步测试/Async test
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
print("<异步测试/Async test>")
|
print("<异步测试/Async test>")
|
||||||
|
|
||||||
|
print('\n--------------------------------------------------')
|
||||||
|
print("正在测试异步获取西瓜视频ID方法...")
|
||||||
|
ixigua_id = await api.get_ixigua_video_id(_ixigua_url)
|
||||||
|
print(f"西瓜视频ID: {ixigua_id}")
|
||||||
|
print("正在测试异步获取西瓜视频数据方法...")
|
||||||
|
ixigua_data = await api.get_ixigua_video_data(ixigua_id)
|
||||||
|
print(f"西瓜视频数据: {str(ixigua_data)[:100]}")
|
||||||
|
|
||||||
print('\n--------------------------------------------------')
|
print('\n--------------------------------------------------')
|
||||||
print("正在测试异步获取哔哩哔哩视频ID方法...")
|
print("正在测试异步获取哔哩哔哩视频ID方法...")
|
||||||
bilibili_id = await api.get_bilibili_video_id(_bilibili_url)
|
bilibili_id = await api.get_bilibili_video_id(_bilibili_url)
|
||||||
@ -688,7 +820,8 @@ async def async_test(_douyin_url: str = None, _tiktok_url: str = None, _bilibili
|
|||||||
douyin_hybrid_data = await api.hybrid_parsing(_douyin_url)
|
douyin_hybrid_data = await api.hybrid_parsing(_douyin_url)
|
||||||
tiktok_hybrid_data = await api.hybrid_parsing(_tiktok_url)
|
tiktok_hybrid_data = await api.hybrid_parsing(_tiktok_url)
|
||||||
bilibili_hybrid_data = await api.hybrid_parsing(_bilibili_url)
|
bilibili_hybrid_data = await api.hybrid_parsing(_bilibili_url)
|
||||||
print(f"抖音、TikTok、哔哩哔哩混合解析全部成功!")
|
xigua_hybrid_data = await api.hybrid_parsing(_ixigua_url)
|
||||||
|
print(f"抖音、TikTok、哔哩哔哩、西瓜混合解析全部成功!")
|
||||||
|
|
||||||
print('\n--------------------------------------------------')
|
print('\n--------------------------------------------------')
|
||||||
# 总耗时/Total time
|
# 总耗时/Total time
|
||||||
@ -704,4 +837,7 @@ if __name__ == '__main__':
|
|||||||
douyin_url = 'https://v.douyin.com/rLyrQxA/6.66'
|
douyin_url = 'https://v.douyin.com/rLyrQxA/6.66'
|
||||||
tiktok_url = 'https://www.tiktok.com/@evil0ctal/video/7217027383390555438'
|
tiktok_url = 'https://www.tiktok.com/@evil0ctal/video/7217027383390555438'
|
||||||
bilibili_url = "https://www.bilibili.com/video/BV1Th411x7ii/"
|
bilibili_url = "https://www.bilibili.com/video/BV1Th411x7ii/"
|
||||||
asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url, _bilibili_url=bilibili_url))
|
ixigua_url = "https://www.ixigua.com/7270448082586698281"
|
||||||
|
# ixigua_url = "ttps://v.ixigua.com/ienrQ5bR/" # convert_share_urls 这里有bug 如果抖音的口令解析的出来其他的都是none
|
||||||
|
asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url, _bilibili_url=bilibili_url, _ixigua_url=ixigua_url))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user