mirror of
https://github.com/Evil0ctal/Douyin_TikTok_Download_API.git
synced 2025-04-22 09:03:54 +08:00
319 lines
14 KiB
Python
319 lines
14 KiB
Python
import asyncio # 异步I/O
|
||
import os # 系统操作
|
||
import time # 时间操作
|
||
import yaml # 配置文件
|
||
|
||
# 基础爬虫客户端和哔哩哔哩API端点
|
||
from crawlers.base_crawler import BaseCrawler
|
||
from crawlers.bilibili.web.endpoints import BilibiliAPIEndpoints
|
||
|
||
# 哔哩哔哩工具类
|
||
from crawlers.bilibili.web.utils import EndpointModels, bv2av, ResponseAnalyzer
|
||
|
||
|
||
# 配置文件路径
|
||
path = os.path.abspath(os.path.dirname(__file__))
|
||
|
||
# 读取配置文件
|
||
with open(f"{path}/config.yaml", "r", encoding="utf-8") as f:
|
||
config = yaml.safe_load(f)
|
||
|
||
|
||
class BilibiliWebCrawler:
|
||
|
||
# 从配置文件读取哔哩哔哩请求头
|
||
async def get_bilibili_headers(self):
|
||
bili_config = config['TokenManager']['bilibili']
|
||
kwargs = {
|
||
"headers": {
|
||
"accept-language": bili_config["headers"]["accept-language"],
|
||
"origin": bili_config["headers"]["origin"],
|
||
"referer": bili_config["headers"]["referer"],
|
||
"user-agent": bili_config["headers"]["user-agent"],
|
||
"cookie": bili_config["headers"]["cookie"],
|
||
},
|
||
"proxies": {"http://": bili_config["proxies"]["http"], "https://": bili_config["proxies"]["https"]},
|
||
}
|
||
return kwargs
|
||
|
||
"-------------------------------------------------------handler接口列表-------------------------------------------------------"
|
||
# 获取单个视频详情信息
|
||
async def fetch_one_video(self, bv_id: str) -> dict:
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = f"{BilibiliAPIEndpoints.POST_DETAIL}?bvid={bv_id}"
|
||
# 发送请求,获取请求响应结果
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
return response
|
||
|
||
# 获取用户发布视频作品数据
|
||
async def fetch_user_post_videos(self, uid: str, pn: int) -> dict:
|
||
"""
|
||
:param uid: 用户uid
|
||
:param pn: 页码
|
||
:return:
|
||
"""
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = await EndpointModels().user_post_videos_endpoint(uid=uid, pn=pn)
|
||
# 发送请求,获取请求响应结果
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
return response
|
||
|
||
# 获取用户所有收藏夹信息
|
||
async def fetch_collect_folders(self, uid: str) -> dict:
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = f"{BilibiliAPIEndpoints.COLLECT_FOLDERS}?up_mid={uid}"
|
||
# 发送请求,获取请求响应结果
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
# 分析响应结果
|
||
result_dict = await ResponseAnalyzer.collect_folders_analyze(response=response)
|
||
return result_dict
|
||
|
||
# 获取指定收藏夹内视频数据
|
||
async def fetch_folder_videos(self, folder_id: str, pn: int) -> dict:
|
||
"""
|
||
:param folder_id: 收藏夹id-- 可从<获取用户所有收藏夹信息>获得
|
||
:param pn: 页码
|
||
:return:
|
||
"""
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
# 发送请求,获取请求响应结果
|
||
async with base_crawler as crawler:
|
||
endpoint = f"{BilibiliAPIEndpoints.COLLECT_VIDEOS}?media_id={folder_id}&pn={pn}&ps=20&keyword=&order=mtime&type=0&tid=0&platform=web"
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
return response
|
||
|
||
# 获取指定用户的信息
|
||
async def fetch_user_profile(self, uid: str) -> dict:
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = await EndpointModels().user_profile_endpoint(uid=uid)
|
||
response = await crawler.fetch_get_json(endpoint=endpoint)
|
||
return response
|
||
|
||
# 获取综合热门视频信息
|
||
async def fetch_com_popular(self, pn: int) -> dict:
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = await EndpointModels().com_popular_endpoint(pn=pn)
|
||
response = await crawler.fetch_get_json(endpoint=endpoint)
|
||
return response
|
||
|
||
# 获取指定视频的评论
|
||
async def fetch_video_comments(self, bv_id: str, pn: int) -> dict:
|
||
# 评论排序 -- 1:按点赞数排序. 0:按时间顺序排序
|
||
sort = 1
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = f"{BilibiliAPIEndpoints.VIDEO_COMMENTS}?type=1&oid={bv_id}&sort={sort}&nohot=0&ps=20&pn={pn}"
|
||
# 发送请求,获取请求响应结果
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
return response
|
||
|
||
# 获取视频下指定评论的回复
|
||
async def fetch_comment_reply(self, bv_id: str, pn: int, rpid: str) -> dict:
|
||
"""
|
||
:param bv_id: 目标视频bv号
|
||
:param pn: 页码
|
||
:param rpid: 目标评论id,可通过fetch_video_comments获得
|
||
:return:
|
||
"""
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = f"{BilibiliAPIEndpoints.COMMENT_REPLY}?type=1&oid={bv_id}&root={rpid}&&ps=20&pn={pn}"
|
||
# 发送请求,获取请求响应结果
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
return response
|
||
|
||
# 获取指定用户动态
|
||
async def fetch_user_dynamic(self, uid: str, offset: str) -> dict:
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = await EndpointModels().user_dynamic_endpoint(uid=uid, offset=offset)
|
||
# 发送请求,获取请求响应结果
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
return response
|
||
|
||
# 获取指定直播间信息
|
||
async def fetch_live_room_detail(self, room_id: str) -> dict:
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = f"{BilibiliAPIEndpoints.LIVEROOM_DETAIL}?room_id={room_id}"
|
||
# 发送请求,获取请求响应结果
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
return response
|
||
|
||
# 获取指定直播间视频流
|
||
# async def fetch_live_videos(self, room_id: str) -> dict:
|
||
# # 获取请求头信息
|
||
# kwargs = await self.get_bilibili_headers()
|
||
# # 创建基础爬虫对象
|
||
# base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
# async with base_crawler as crawler:
|
||
# # 创建请求endpoint
|
||
# endpoint = f"{BilibiliAPIEndpoints.LIVE_VIDEOS}?cid={room_id}&quality=4"
|
||
# # 发送请求,获取请求响应结果
|
||
# response = await crawler.fetch_get_json(endpoint)
|
||
# return response
|
||
|
||
"-------------------------------------------------------utils接口列表-------------------------------------------------------"
|
||
# 通过bv号获得视频aid号
|
||
async def get_aid(self, bv_id: str) -> int:
|
||
aid = await bv2av(bv_id=bv_id)
|
||
return aid
|
||
|
||
# 获取所有直播分区列表
|
||
async def fetch_all_live_areas(self) -> dict:
|
||
# 获取请求头信息
|
||
kwargs = await self.get_bilibili_headers()
|
||
# 创建基础爬虫对象
|
||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||
async with base_crawler as crawler:
|
||
# 创建请求endpoint
|
||
endpoint = BilibiliAPIEndpoints.LIVE_AREAS
|
||
# 发送请求,获取请求响应结果
|
||
response = await crawler.fetch_get_json(endpoint)
|
||
return response
|
||
|
||
# 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数)
|
||
# (仅示例 不同接口所需要传进去的参数不同)(待改进)
|
||
async def uid_to_wrid(self, uid: str) -> dict:
|
||
result = await EndpointModels().get_wrid_wts_by_uid(uid=uid)
|
||
return result
|
||
|
||
"-------------------------------------------------------main-------------------------------------------------------"
|
||
async def main(self):
|
||
|
||
"-------------------------------------------------------handler接口列表-------------------------------------------------------"
|
||
# 获取单个作品数据
|
||
# bv_id = 'BV1M1421t7hT'
|
||
# result = await self.fetch_one_video(bv_id=bv_id)
|
||
# print(result)
|
||
|
||
# 获取用户发布作品数据
|
||
# uid = '178360345'
|
||
# pn = 1
|
||
# result = await self.fetch_user_post_videos(uid=uid, pn=pn)
|
||
# print(result)
|
||
|
||
# 获取用户所有收藏夹信息
|
||
# uid = '178360345'
|
||
# reslut = await self.fetch_collect_folders(uid=uid)
|
||
# print(reslut)
|
||
|
||
# 获取用户指定收藏夹内视频数据
|
||
# folder_id = '1756059545' # 收藏夹id,可从<获取用户所有收藏夹信息>获得
|
||
# pn = 1
|
||
# result = await self.fetch_folder_videos(folder_id=folder_id, pn=pn)
|
||
# print(result)
|
||
|
||
# 获取指定用户的信息
|
||
# uid = '178360345'
|
||
# result = await self.fetch_user_profile(uid=uid)
|
||
# print(result)
|
||
|
||
# 获取综合热门信息
|
||
# pn = 1 # 页码
|
||
# result = await self.fetch_com_popular(pn=pn)
|
||
# print(result)
|
||
|
||
# 获取指定视频的评论(不登录只能获取一页的评论)
|
||
# bv_id = "BV1M1421t7hT"
|
||
# pn = 1
|
||
# result = await self.fetch_video_comments(bv_id=bv_id, pn=pn)
|
||
# print(result)
|
||
|
||
# 获取视频下指定评论的回复(不登录只能获取一页的评论)
|
||
# bv_id = "BV1M1421t7hT"
|
||
# rpid = "237109455120"
|
||
# pn = 1
|
||
# result = await self.fetch_comment_reply(bv_id=bv_id, pn=pn, rpid=rpid)
|
||
# print(result)
|
||
|
||
# 获取指定用户动态
|
||
# uid = "16015678"
|
||
# offset = "953154282154098691" # 翻页索引,为空即从最新动态开始,可从获得到的动态数据里面获得
|
||
# result = await self.fetch_user_dynamic(uid=uid, offset=offset)
|
||
# print(result)
|
||
|
||
# 获取指定直播间信息
|
||
# room_id = "22816111"
|
||
# result = await self.fetch_live_room_detail(room_id=room_id)
|
||
# print(result)
|
||
|
||
# 获取直播间视频流
|
||
# room_id = "22816111"
|
||
# result = await self.fetch_user_live_videos_by_room_id(room_id=room_id)
|
||
# print(result)
|
||
|
||
"-------------------------------------------------------utils接口列表-------------------------------------------------------"
|
||
# 通过bv号获得视频aid号
|
||
# bv_id = 'BV1M1421t7hT'
|
||
# aid = await self.get_aid(bv_id=bv_id)
|
||
# print(aid)
|
||
|
||
# 获取所有直播分区列表
|
||
# result = await self.fetch_all_live_areas()
|
||
# print(result)
|
||
|
||
# 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数)
|
||
# (仅示例 不同接口所需要传进去的参数不同)(待改进)
|
||
# uid = '178360345'
|
||
# w_rid = await self.uid_to_wrid(uid=uid)
|
||
# print(w_rid)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
# 初始化
|
||
BilibiliWebCrawler = BilibiliWebCrawler()
|
||
|
||
# 开始时间
|
||
start = time.time()
|
||
|
||
asyncio.run(BilibiliWebCrawler.main())
|
||
|
||
# 结束时间
|
||
end = time.time()
|
||
print(f"耗时:{end - start}")
|