mirror of
https://github.com/Evil0ctal/Douyin_TikTok_Download_API.git
synced 2025-04-12 11:53:43 +08:00
Add files via upload
This commit is contained in:
parent
2efd3e3eca
commit
bd650f2ad9
17
README.en.md
17
README.en.md
@ -125,7 +125,7 @@ Part of TikHub's source code will be open sourced on Github, and it will sponsor
|
||||
- [iOS shortcut commands to quickly call API](https://apps.apple.com/cn/app/%E5%BF%AB%E6%8D%B7%E6%8C%87%E4%BB%A4/id915249334)Achieve in-app download of watermark-free videos/photo albums
|
||||
- Complete API documentation ([Demo/Demonstration](https://api.douyin.wtf/docs))
|
||||
- Rich API interface:
|
||||
- Douyin web version API
|
||||
- Douyin web version API
|
||||
|
||||
- [x] Video data analysis
|
||||
- [x] Get user homepage work data
|
||||
@ -149,7 +149,7 @@ Part of TikHub's source code will be open sourced on Github, and it will sponsor
|
||||
- [x] Extract list work id
|
||||
- [x] Extract live broadcast room number from list
|
||||
- [x] Extract live broadcast room number from list
|
||||
- TikTok web version API
|
||||
- TikTok web version API
|
||||
|
||||
- [x] Video data analysis
|
||||
- [x] Get user homepage work data
|
||||
@ -171,7 +171,18 @@ Part of TikHub's source code will be open sourced on Github, and it will sponsor
|
||||
- [x] Extract list work id
|
||||
- [x] Get user unique_id
|
||||
- [x] Get list unique_id
|
||||
|
||||
- Bilibili web version API
|
||||
- [x] Get single video data
|
||||
- [x] Get user homepage video data
|
||||
- [x] Get user collection folders
|
||||
- [x] Gets video data from a collection folder
|
||||
- [x] Get information of specified user
|
||||
- [x] Get comprehensive popular video information
|
||||
- [x] Get comments on the specified video
|
||||
- [x] Get reply to the specified comment
|
||||
- [x] Get dynamic information of specified user
|
||||
- [x] Get information of specified live room
|
||||
- [x] Get a list of all live areas
|
||||
* * *
|
||||
|
||||
## 📦Call the parsing library (obsolete and needs to be updated):
|
||||
|
29
README.md
29
README.md
@ -129,13 +129,15 @@ TikHub的部分源代码会开源在Github上,并且会赞助一些开源项
|
||||
│ └─web
|
||||
│ └─views
|
||||
└─crawlers
|
||||
├─douyin
|
||||
│ └─web
|
||||
├─hybrid
|
||||
├─tiktok
|
||||
│ ├─app
|
||||
│ └─web
|
||||
└─utils
|
||||
├─bilibili
|
||||
│ └─web
|
||||
├─douyin
|
||||
│ └─web
|
||||
├─hybrid
|
||||
├─tiktok
|
||||
│ ├─app
|
||||
│ └─web
|
||||
└─utils
|
||||
```
|
||||
|
||||
## ✨支持功能:
|
||||
@ -192,7 +194,18 @@ TikHub的部分源代码会开源在Github上,并且会赞助一些开源项
|
||||
- [x] 提取列表作品id
|
||||
- [x] 获取用户unique_id
|
||||
- [x] 获取列表unique_id
|
||||
|
||||
- 哔哩哔哩网页版API
|
||||
- [x] 获取单个视频详情信息
|
||||
- [x] 获取用户发布视频作品数据
|
||||
- [x] 获取用户所有收藏夹信息
|
||||
- [x] 获取指定收藏夹内视频数据
|
||||
- [x] 获取指定用户的信息
|
||||
- [x] 获取综合热门视频信息
|
||||
- [x] 获取指定视频的评论
|
||||
- [x] 获取视频下指定评论的回复
|
||||
- [x] 获取指定用户动态
|
||||
- [x] 获取指定直播间信息
|
||||
- [x] 获取所有直播分区列表
|
||||
---
|
||||
|
||||
## 📦调用解析库(已废弃需要更新):
|
||||
|
498
app/api/endpoints/bilibili_web.py
Normal file
498
app/api/endpoints/bilibili_web.py
Normal file
@ -0,0 +1,498 @@
|
||||
from fastapi import APIRouter, Body, Query, Request, HTTPException # 导入FastAPI组件
|
||||
from app.api.models.APIResponseModel import ResponseModel, ErrorResponseModel # 导入响应模型
|
||||
|
||||
from crawlers.bilibili.web.web_crawler import BilibiliWebCrawler # 导入哔哩哔哩web爬虫
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
BilibiliWebCrawler = BilibiliWebCrawler()
|
||||
|
||||
|
||||
# 获取单个视频详情信息
|
||||
@router.get("/fetch_one_video", response_model=ResponseModel, summary="获取单个视频详情信息/Get single video data")
|
||||
async def fetch_one_video(request: Request,
|
||||
bv_id: str = Query(example="BV1M1421t7hT", description="作品id/Video id")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取单个视频详情信息
|
||||
### 参数:
|
||||
- bv_id: 作品id
|
||||
### 返回:
|
||||
- 视频详情信息
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get single video data
|
||||
### Parameters:
|
||||
- bv_id: Video id
|
||||
### Return:
|
||||
- Video data
|
||||
|
||||
# [示例/Example]
|
||||
bv_id = "BV1M1421t7hT"
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_one_video(bv_id)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取用户发布视频作品数据
|
||||
@router.get("/fetch_user_post_videos", response_model=ResponseModel,
|
||||
summary="获取用户主页作品数据/Get user homepage video data")
|
||||
async def fetch_user_post_videos(request: Request,
|
||||
uid: str = Query(example="178360345", description="用户UID"),
|
||||
pn: int = Query(default=1, description="页码/Page number"),):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取用户发布的视频数据
|
||||
### 参数:
|
||||
- uid: 用户UID
|
||||
- pn: 页码
|
||||
### 返回:
|
||||
- 用户发布的视频数据
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get user post video data
|
||||
### Parameters:
|
||||
- uid: User UID
|
||||
- pn: Page number
|
||||
### Return:
|
||||
- User posted video data
|
||||
|
||||
# [示例/Example]
|
||||
uid = "178360345"
|
||||
pn = 1
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_user_post_videos(uid, pn)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取用户所有收藏夹信息
|
||||
@router.get("/fetch_collect_folders", response_model=ResponseModel,
|
||||
summary="获取用户所有收藏夹信息/Get user collection folders")
|
||||
async def fetch_collect_folders(request: Request,
|
||||
uid: str = Query(example="178360345", description="用户UID")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取用户收藏作品数据
|
||||
### 参数:
|
||||
- uid: 用户UID
|
||||
### 返回:
|
||||
- 用户收藏夹信息
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get user collection folders
|
||||
### Parameters:
|
||||
- uid: User UID
|
||||
### Return:
|
||||
- user collection folders
|
||||
|
||||
# [示例/Example]
|
||||
uid = "178360345"
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_collect_folders(uid)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取指定收藏夹内视频数据
|
||||
@router.get("/fetch_user_collection_videos", response_model=ResponseModel,
|
||||
summary="获取指定收藏夹内视频数据/Gets video data from a collection folder")
|
||||
async def fetch_user_collection_videos(request: Request,
|
||||
folder_id: str = Query(example="1756059545",
|
||||
description="收藏夹id/collection folder id"),
|
||||
pn: int = Query(default=1, description="页码/Page number")
|
||||
):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取指定收藏夹内视频数据
|
||||
### 参数:
|
||||
- folder_id: 用户UID
|
||||
- pn: 页码
|
||||
### 返回:
|
||||
- 指定收藏夹内视频数据
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Gets video data from a collection folder
|
||||
### Parameters:
|
||||
- folder_id: collection folder id
|
||||
- pn: Page number
|
||||
### Return:
|
||||
- video data from collection folder
|
||||
|
||||
# [示例/Example]
|
||||
folder_id = "1756059545"
|
||||
pn = 1
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_folder_videos(folder_id, pn)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取指定用户的信息
|
||||
@router.get("/fetch_user_profile", response_model=ResponseModel,
|
||||
summary="获取指定用户的信息/Get information of specified user")
|
||||
async def fetch_collect_folders(request: Request,
|
||||
uid: str = Query(example="178360345", description="用户UID")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取指定用户的信息
|
||||
### 参数:
|
||||
- uid: 用户UID
|
||||
### 返回:
|
||||
- 指定用户的个人信息
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get information of specified user
|
||||
### Parameters:
|
||||
- uid: User UID
|
||||
### Return:
|
||||
- information of specified user
|
||||
|
||||
# [示例/Example]
|
||||
uid = "178360345"
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_user_profile(uid)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取综合热门视频信息
|
||||
@router.get("/fetch_com_popular", response_model=ResponseModel,
|
||||
summary="获取综合热门视频信息/Get comprehensive popular video information")
|
||||
async def fetch_collect_folders(request: Request,
|
||||
pn: int = Query(default=1, description="页码/Page number")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取综合热门视频信息
|
||||
### 参数:
|
||||
- pn: 页码
|
||||
### 返回:
|
||||
- 综合热门视频信息
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get comprehensive popular video information
|
||||
### Parameters:
|
||||
- pn: Page number
|
||||
### Return:
|
||||
- comprehensive popular video information
|
||||
|
||||
# [示例/Example]
|
||||
pn = 1
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_com_popular(pn)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取指定视频的评论
|
||||
@router.get("/fetch_video_comments", response_model=ResponseModel,
|
||||
summary="获取指定视频的评论/Get comments on the specified video")
|
||||
async def fetch_collect_folders(request: Request,
|
||||
bv_id: str = Query(example="BV1M1421t7hT", description="作品id/Video id"),
|
||||
pn: int = Query(default=1, description="页码/Page number")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取指定视频的评论
|
||||
### 参数:
|
||||
- bv_id: 作品id
|
||||
- pn: 页码
|
||||
### 返回:
|
||||
- 指定视频的评论数据
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get comments on the specified video
|
||||
### Parameters:
|
||||
- bv_id: Video id
|
||||
- pn: Page number
|
||||
### Return:
|
||||
- comments of the specified video
|
||||
|
||||
# [示例/Example]
|
||||
bv_id = "BV1M1421t7hT"
|
||||
pn = 1
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_video_comments(bv_id, pn)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取视频下指定评论的回复
|
||||
@router.get("/fetch_comment_reply", response_model=ResponseModel,
|
||||
summary="获取视频下指定评论的回复/Get reply to the specified comment")
|
||||
async def fetch_collect_folders(request: Request,
|
||||
bv_id: str = Query(example="BV1M1421t7hT", description="作品id/Video id"),
|
||||
pn: int = Query(default=1, description="页码/Page number"),
|
||||
rpid: str = Query(example="237109455120", description="回复id/Reply id")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取视频下指定评论的回复
|
||||
### 参数:
|
||||
- bv_id: 作品id
|
||||
- pn: 页码
|
||||
- rpid: 回复id
|
||||
### 返回:
|
||||
- 指定评论的回复数据
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get reply to the specified comment
|
||||
### Parameters:
|
||||
- bv_id: Video id
|
||||
- pn: Page number
|
||||
- rpid: Reply id
|
||||
### Return:
|
||||
- Reply of the specified comment
|
||||
|
||||
# [示例/Example]
|
||||
bv_id = "BV1M1421t7hT"
|
||||
pn = 1
|
||||
rpid = "237109455120"
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_comment_reply(bv_id, pn, rpid)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取指定用户动态
|
||||
@router.get("/fetch_user_dynamic", response_model=ResponseModel,
|
||||
summary="获取指定用户动态/Get dynamic information of specified user")
|
||||
async def fetch_collect_folders(request: Request,
|
||||
uid: str = Query(example="16015678", description="用户UID"),
|
||||
offset: str = Query(default="", example="953154282154098691",
|
||||
description="开始索引/offset")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取指定用户动态
|
||||
### 参数:
|
||||
- uid: 用户UID
|
||||
- offset: 开始索引
|
||||
### 返回:
|
||||
- 指定用户动态数据
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get dynamic information of specified user
|
||||
### Parameters:
|
||||
- uid: User UID
|
||||
- offset: offset
|
||||
### Return:
|
||||
- dynamic information of specified user
|
||||
|
||||
# [示例/Example]
|
||||
uid = "178360345"
|
||||
offset = "953154282154098691"
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_user_dynamic(uid, offset)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取指定直播间信息
|
||||
@router.get("/fetch_live_room_detail", response_model=ResponseModel,
|
||||
summary="获取指定直播间信息/Get information of specified live room")
|
||||
async def fetch_collect_folders(request: Request,
|
||||
room_id: str = Query(example="22816111", description="直播间ID/Live room ID")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取指定直播间信息
|
||||
### 参数:
|
||||
- room_id: 直播间ID
|
||||
### 返回:
|
||||
- 指定直播间信息
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get information of specified live room
|
||||
### Parameters:
|
||||
- room_id: Live room ID
|
||||
### Return:
|
||||
- information of specified live room
|
||||
|
||||
# [示例/Example]
|
||||
room_id = "22816111"
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_live_room_detail(room_id)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# # 获取指定直播间视频流
|
||||
# @router.get("/fetch_live_videos", response_model=ResponseModel,
|
||||
# summary="获取直播间视频流/Get live video data of specified room")
|
||||
# async def fetch_collect_folders(request: Request,
|
||||
# room_id: str = Query(example="22816111", description="直播间ID/Live room ID")):
|
||||
# """
|
||||
# # [中文]
|
||||
# ### 用途:
|
||||
# - 获取指定直播间视频流
|
||||
# ### 参数:
|
||||
# - room_id: 直播间ID
|
||||
# ### 返回:
|
||||
# - 指定直播间视频流
|
||||
#
|
||||
# # [English]
|
||||
# ### Purpose:
|
||||
# - Get live video data of specified room
|
||||
# ### Parameters:
|
||||
# - room_id: Live room ID
|
||||
# ### Return:
|
||||
# - live video data of specified room
|
||||
#
|
||||
# # [示例/Example]
|
||||
# room_id = "22816111"
|
||||
# """
|
||||
# try:
|
||||
# data = await BilibiliWebCrawler.fetch_live_videos(room_id)
|
||||
# return ResponseModel(code=200,
|
||||
# router=request.url.path,
|
||||
# data=data)
|
||||
# except Exception as e:
|
||||
# status_code = 400
|
||||
# detail = ErrorResponseModel(code=status_code,
|
||||
# router=request.url.path,
|
||||
# params=dict(request.query_params),
|
||||
# )
|
||||
# raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
|
||||
# 获取所有直播分区列表
|
||||
@router.get("/fetch_all_live_areas", response_model=ResponseModel,
|
||||
summary="获取所有直播分区列表/Get a list of all live areas")
|
||||
async def fetch_collect_folders(request: Request,):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 获取所有直播分区列表
|
||||
### 参数:
|
||||
### 返回:
|
||||
- 所有直播分区列表
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Get a list of all live areas
|
||||
### Parameters:
|
||||
### Return:
|
||||
- list of all live areas
|
||||
|
||||
# [示例/Example]
|
||||
"""
|
||||
try:
|
||||
data = await BilibiliWebCrawler.fetch_all_live_areas()
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data=data)
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
@ -3,6 +3,7 @@ from app.api.endpoints import (
|
||||
tiktok_web,
|
||||
tiktok_app,
|
||||
douyin_web,
|
||||
bilibili_web,
|
||||
hybrid_parsing, ios_shortcut, download,
|
||||
)
|
||||
|
||||
@ -15,6 +16,9 @@ router.include_router(tiktok_app.router, prefix="/tiktok/app", tags=["TikTok-App
|
||||
# Douyin routers
|
||||
router.include_router(douyin_web.router, prefix="/douyin/web", tags=["Douyin-Web-API"])
|
||||
|
||||
# Bilibili routers
|
||||
router.include_router(bilibili_web.router, prefix="/bilibili/web", tags=["Bilibili-Web-API"])
|
||||
|
||||
# Hybrid routers
|
||||
router.include_router(hybrid_parsing.router, prefix="/hybrid", tags=["Hybrid-API"])
|
||||
|
||||
|
@ -77,6 +77,10 @@ tags_metadata = [
|
||||
"name": "TikTok-App-API",
|
||||
"description": "**(TikTok-App-API数据接口/TikTok-App-API data endpoints)**",
|
||||
},
|
||||
{
|
||||
"name": "Bilibili-Web-API",
|
||||
"description": "**(Bilibili-Web-API数据接口/Bilibili-Web-API data endpoints)**",
|
||||
},
|
||||
{
|
||||
"name": "iOS-Shortcut",
|
||||
"description": "**(iOS快捷指令数据接口/iOS-Shortcut data endpoints)**",
|
||||
|
13
crawlers/bilibili/web/config.yaml
Normal file
13
crawlers/bilibili/web/config.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
TokenManager:
|
||||
bilibili:
|
||||
headers:
|
||||
'accept-language': zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6
|
||||
'origin': https://www.bilibili.com
|
||||
'referer': https://space.bilibili.com/
|
||||
'origin_2': https://space.bilibili.com
|
||||
'cookie': buvid3=D6E58E7B-E3A9-7CD3-7BE5-B5F255788A3020034infoc; b_nut=1723702120; _uuid=6E10D69A10-A711-9DA8-6833-1010262296C24B21337infoc; buvid_fp=6cf2ea8e143bbc49f3b7c0dcb2465fc2; buvid4=748EC8F0-82E2-1672-A286-8445DDB2A80C06110-023112304-; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjM5NjEzMjIsImlhdCI6MTcyMzcwMjA2MiwicGx0IjotMX0.IWOEMLCDKqWAX24rePU-1Qgm9Isf5CU8Tz0O-j6GHfo; bili_ticket_expires=1723961262; CURRENT_FNVAL=4048; rpdid=|(JluY|JJ|RR0J'u~kJ~|kkuY; b_lsid=E10B83DC4_191552166D6; header_theme_version=CLOSE; enable_web_push=DISABLE; home_feed_column=5; browser_resolution=1488-714; sid=873ujj7i
|
||||
'user-agent': Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36
|
||||
|
||||
proxies:
|
||||
http:
|
||||
https:
|
50
crawlers/bilibili/web/endpoints.py
Normal file
50
crawlers/bilibili/web/endpoints.py
Normal file
@ -0,0 +1,50 @@
|
||||
class BilibiliAPIEndpoints:
|
||||
|
||||
"-------------------------------------------------------域名-domain-------------------------------------------------------"
|
||||
# 哔哩哔哩接口域名
|
||||
BILIAPI_DOMAIN = "https://api.bilibili.com"
|
||||
|
||||
# 哔哩哔哩直播域名
|
||||
LIVE_DOMAIN = "https://api.live.bilibili.com"
|
||||
|
||||
"-------------------------------------------------------接口-api-------------------------------------------------------"
|
||||
# 作品信息 (Post Detail)
|
||||
POST_DETAIL = f"{BILIAPI_DOMAIN}/x/web-interface/view"
|
||||
|
||||
# 用户播放列表 (用于爬取用户所有视频数据)
|
||||
USER_POST = f"{BILIAPI_DOMAIN}/x/v2/medialist/resource/list"
|
||||
|
||||
# 收藏夹列表
|
||||
COLLECT_FOLDERS = f"{BILIAPI_DOMAIN}/x/v3/fav/folder/created/list-all"
|
||||
|
||||
# 收藏夹视频
|
||||
COLLECT_VIDEOS = f"{BILIAPI_DOMAIN}/x/v3/fav/resource/list"
|
||||
|
||||
# 用户个人信息
|
||||
USER_DETAIL = f"{BILIAPI_DOMAIN}/x/space/wbi/acc/info"
|
||||
|
||||
# 综合热门
|
||||
COM_POPULAR = f"{BILIAPI_DOMAIN}/x/web-interface/popular"
|
||||
|
||||
# 每周必看
|
||||
WEEKLY_POPULAR = f"{BILIAPI_DOMAIN}/x/web-interface/popular/series/one"
|
||||
|
||||
# 入站必刷
|
||||
PRECIOUS_POPULAR = f"{BILIAPI_DOMAIN}/x/web-interface/popular/precious"
|
||||
|
||||
# 视频评论
|
||||
VIDEO_COMMENTS = f"{BILIAPI_DOMAIN}/x/v2/reply"
|
||||
|
||||
# 评论的回复
|
||||
COMMENT_REPLY = f"{BILIAPI_DOMAIN}/x/v2/reply/reply"
|
||||
|
||||
# 直播间信息
|
||||
LIVEROOM_DETAIL = f"{LIVE_DOMAIN}/room/v1/Room/get_info"
|
||||
|
||||
# 直播分区列表
|
||||
LIVE_AREAS = f"{LIVE_DOMAIN}/room/v1/Area/getList"
|
||||
|
||||
# 直播间视频流
|
||||
LIVE_VIDEOS = f"{LIVE_DOMAIN}/room/v1/Room/playUrl"
|
||||
|
||||
|
201
crawlers/bilibili/web/utils.py
Normal file
201
crawlers/bilibili/web/utils.py
Normal file
@ -0,0 +1,201 @@
|
||||
import time
|
||||
from urllib.parse import urlencode
|
||||
import random
|
||||
from crawlers.bilibili.web import wrid
|
||||
from crawlers.utils.logger import logger
|
||||
|
||||
# 装饰器 检查是否正确生成endpoint
|
||||
def Check_gen(func):
|
||||
def checker(*args, **kwargs):
|
||||
try:
|
||||
result = func(*args, **kwargs)
|
||||
return result
|
||||
except Exception as e:
|
||||
raise RuntimeError("生成w_rid失败:{0}, 函数地址:{1}".format(e, func.__name__))
|
||||
|
||||
return checker
|
||||
|
||||
class EndpointModels:
|
||||
def __init__(self):
|
||||
# 实例化WridManager
|
||||
self.wridmanager = WridManager()
|
||||
# 当前时间戳
|
||||
self.wts = round(time.time())
|
||||
# 固定inter也能获得结果。如果失效见--WridManager().get_inter
|
||||
self.inter = '{"ds":[],"wh":[3557,5674,5],"of":[154,308,154]}'
|
||||
|
||||
# 获取wrid示例 通过uid 生成包含w_rid和wts的字典
|
||||
@Check_gen
|
||||
async def get_wrid_wts_by_uid(self, uid: str) -> dict:
|
||||
params = {
|
||||
'dm_cover_img_str': 'QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ',
|
||||
'dm_img_inter': self.inter,
|
||||
'dm_img_list': [],
|
||||
'dm_img_str': 'V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ',
|
||||
'mid': uid,
|
||||
'platform': 'web',
|
||||
'token': '',
|
||||
'web_location': '1550101',
|
||||
'wts': f'{self.wts}ea1db124af3c7062474693fa704f4ff8'
|
||||
}
|
||||
# 获取w_rid参数
|
||||
w_rid = await self.wridmanager.get_wrid(params=params)
|
||||
reslut = {
|
||||
"w_rid": w_rid,
|
||||
"wts": self.wts
|
||||
}
|
||||
return reslut
|
||||
|
||||
# 获取用户发布视频作品数据 生成enpoint
|
||||
@Check_gen
|
||||
async def user_post_videos_endpoint(self, uid: str, pn: int, ps: int = 30) -> str:
|
||||
# 编码inter
|
||||
new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D")
|
||||
# 构建请求参数
|
||||
params = {
|
||||
"dm_cover_img_str": "QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ",
|
||||
"dm_img_inter": self.inter,
|
||||
"dm_img_list": [],
|
||||
"dm_img_str": "V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ",
|
||||
"keyword": "",
|
||||
"mid": uid,
|
||||
"order": "pubdate",
|
||||
"order_avoided": "true",
|
||||
"platform": "web",
|
||||
"pn": pn,
|
||||
"ps": ps,
|
||||
"tid": "0",
|
||||
"web_location": "1550101",
|
||||
"wts": f"{self.wts}ea1db124af3c7062474693fa704f4ff8",
|
||||
}
|
||||
# 获取wrid
|
||||
w_rid = await self.wridmanager.get_wrid(params=params)
|
||||
# 将上面结果拼接成最终结果并返回
|
||||
final_endpoint = f'https://api.bilibili.com/x/space/wbi/arc/search?mid={uid}&ps={ps}&tid=0&pn={pn}&keyword=&order=pubdate&platform=web&web_location=1550101&order_avoided=true&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&w_rid={w_rid}&wts={self.wts}'
|
||||
return final_endpoint
|
||||
|
||||
# 获取指定用户的信息 生成enpoint
|
||||
@Check_gen
|
||||
async def user_profile_endpoint(self, uid: str) -> str:
|
||||
# 编码inter
|
||||
new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D")
|
||||
# 构建请求参数
|
||||
params = {
|
||||
'dm_cover_img_str': 'QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ',
|
||||
'dm_img_inter': self.inter,
|
||||
'dm_img_list': [],
|
||||
'dm_img_str': 'V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ',
|
||||
'mid': uid,
|
||||
'platform': 'web',
|
||||
'token': '',
|
||||
'web_location': '1550101',
|
||||
'wts': f'{self.wts}ea1db124af3c7062474693fa704f4ff8'
|
||||
}
|
||||
# 获取wrid
|
||||
w_rid = await self.wridmanager.get_wrid(params=params)
|
||||
# 将上面结果拼接成最终字符串并返回
|
||||
final_endpoint = f'https://api.bilibili.com/x/space/wbi/acc/info?mid={uid}&token=&platform=web&web_location=1550101&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&w_rid={w_rid}&wts={self.wts}'
|
||||
return final_endpoint
|
||||
|
||||
# 获取综合热门视频信息 生成enpoint
|
||||
@Check_gen
|
||||
async def com_popular_endpoint(self, pn: int) -> str:
|
||||
# 构建请求参数
|
||||
params = {
|
||||
"pn": pn,
|
||||
"ps": "20",
|
||||
"web_location": "333.934",
|
||||
"wts": f"{self.wts}ea1db124af3c7062474693fa704f4ff8",
|
||||
}
|
||||
# 获取wrid
|
||||
w_rid = await self.wridmanager.get_wrid(params=params)
|
||||
# 将上面结果拼接成最终结果并返回
|
||||
final_endpoint = f"https://api.bilibili.com/x/web-interface/popular?ps=20&pn={pn}&web_location=333.934&w_rid={w_rid}&wts={self.wts}"
|
||||
return final_endpoint
|
||||
|
||||
# 获取指定用户动态
|
||||
@Check_gen
|
||||
async def user_dynamic_endpoint(self, uid: str, offset: str):
|
||||
# 编码inter
|
||||
new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D")
|
||||
# 构建请求参数
|
||||
params = {
|
||||
"dm_cover_img_str": "QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ",
|
||||
"dm_img_inter": self.inter,
|
||||
"dm_img_list": [],
|
||||
"dm_img_str": "V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&features=itemOpusStyle%2ClistOnlyfans%2CopusBigCover%2ConlyfansVote%2CdecorationCard%2CforwardListHidden%2CugcDelete",
|
||||
"host_mid": uid,
|
||||
"offset": offset,
|
||||
"platform": "web",
|
||||
"timezone_offset": "-480",
|
||||
"web_location": "333.999",
|
||||
"wts": self.wts,
|
||||
"x-bili-device-req-json": "%7B%22platform%22%3A%22web%22%2C%22device%22%3A%22pc%22%7D",
|
||||
"x-bili-web-req-json": "%7B%22spm_id%22%3A%22333.999%22%7Dea1db124af3c7062474693fa704f4ff8"
|
||||
}
|
||||
# 获取wrid
|
||||
w_rid = await self.wridmanager.get_wrid(params=params)
|
||||
# 将上面结果拼接成最终结果并返回
|
||||
final_endpoint = f'https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space?offset={offset}&host_mid={uid}&timezone_offset=-480&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote,decorationCard,forwardListHidden,ugcDelete&web_location=333.999&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&x-bili-device-req-json=%7B%22platform%22:%22web%22,%22device%22:%22pc%22%7D&x-bili-web-req-json=%7B%22spm_id%22:%22333.999%22%7D&w_rid={w_rid}&wts={self.wts}'
|
||||
return final_endpoint
|
||||
|
||||
|
||||
class WridManager:
|
||||
|
||||
def s(self) -> list:
|
||||
x = random.randint(0, 113)
|
||||
return [2 * 1488 + 2 * 311 + 3 * x, 4 * 1488 - 311 + x, x]
|
||||
|
||||
def d(self) -> list:
|
||||
x = random.randint(0, 513)
|
||||
return [x, 2 * x, x]
|
||||
|
||||
def get_inter(self) -> dict:
|
||||
return {"ds": [], "wh": self.s(), "of": self.d()}
|
||||
|
||||
async def get_encode_query(self, params: dict) -> str:
|
||||
params = dict(sorted(params.items())) # 按照 key 重排参数
|
||||
# 过滤 value 中的 "!'()*" 字符
|
||||
params = {
|
||||
k: ''.join(filter(lambda chr: chr not in "!'()*", str(v)))
|
||||
for k, v
|
||||
in params.items()
|
||||
}
|
||||
query = urlencode(params) # 序列化参数
|
||||
return query
|
||||
|
||||
async def get_wrid(self, params: dict) -> str:
|
||||
encode_query = await self.get_encode_query(params)
|
||||
# 获取w_rid参数
|
||||
w_rid = wrid.get_wrid(e=encode_query)
|
||||
return w_rid
|
||||
|
||||
async def bv2av(bv_id:str) -> int:
|
||||
table = "fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF"
|
||||
s = [11, 10, 3, 8, 4, 6, 2, 9, 5, 7]
|
||||
xor = 177451812
|
||||
add_105 = 8728348608
|
||||
add_all = 8728348608 - (2 ** 31 - 1) - 1
|
||||
tr = [0] * 128
|
||||
for i in range(58):
|
||||
tr[ord(table[i])] = i
|
||||
r = 0
|
||||
for i in range(6):
|
||||
r += tr[ord(bv_id[s[i]])] * (58 ** i)
|
||||
add = add_105
|
||||
if r < add:
|
||||
add = add_all
|
||||
aid = (r - add) ^ xor
|
||||
return aid
|
||||
|
||||
|
||||
# 响应分析
|
||||
class ResponseAnalyzer:
|
||||
# 用户收藏夹信息
|
||||
@classmethod
|
||||
async def collect_folders_analyze(cls, response: dict) -> dict:
|
||||
if response['data']:
|
||||
return response
|
||||
else:
|
||||
logger.warning("该用户收藏夹为空/用户设置为不可见")
|
||||
return {"code": 1, "message": "该用户收藏夹为空/用户设置为不可见"}
|
318
crawlers/bilibili/web/web_crawler.py
Normal file
318
crawlers/bilibili/web/web_crawler.py
Normal file
@ -0,0 +1,318 @@
|
||||
import asyncio # 异步I/O
|
||||
import os # 系统操作
|
||||
import time # 时间操作
|
||||
import yaml # 配置文件
|
||||
|
||||
# 基础爬虫客户端和哔哩哔哩API端点
|
||||
from crawlers.base_crawler import BaseCrawler
|
||||
from crawlers.bilibili.web.endpoints import BilibiliAPIEndpoints
|
||||
|
||||
# 哔哩哔哩工具类
|
||||
from crawlers.bilibili.web.utils import EndpointModels, bv2av, ResponseAnalyzer
|
||||
|
||||
|
||||
# 配置文件路径
|
||||
path = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
# 读取配置文件
|
||||
with open(f"{path}/config.yaml", "r", encoding="utf-8") as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
|
||||
class BilibiliWebCrawler:
|
||||
|
||||
# 从配置文件读取哔哩哔哩请求头
|
||||
async def get_bilibili_headers(self):
|
||||
bili_config = config['TokenManager']['bilibili']
|
||||
kwargs = {
|
||||
"headers": {
|
||||
"accept-language": bili_config["headers"]["accept-language"],
|
||||
"origin": bili_config["headers"]["origin"],
|
||||
"referer": bili_config["headers"]["referer"],
|
||||
"user-agent": bili_config["headers"]["user-agent"],
|
||||
"cookie": bili_config["headers"]["cookie"],
|
||||
},
|
||||
"proxies": {"http://": bili_config["proxies"]["http"], "https://": bili_config["proxies"]["https"]},
|
||||
}
|
||||
return kwargs
|
||||
|
||||
"-------------------------------------------------------handler接口列表-------------------------------------------------------"
|
||||
# 获取单个视频详情信息
|
||||
async def fetch_one_video(self, bv_id: str) -> dict:
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = f"{BilibiliAPIEndpoints.POST_DETAIL}?bvid={bv_id}"
|
||||
# 发送请求,获取请求响应结果
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
return response
|
||||
|
||||
# 获取用户发布视频作品数据
|
||||
async def fetch_user_post_videos(self, uid: str, pn: int) -> dict:
|
||||
"""
|
||||
:param uid: 用户uid
|
||||
:param pn: 页码
|
||||
:return:
|
||||
"""
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = await EndpointModels().user_post_videos_endpoint(uid=uid, pn=pn)
|
||||
# 发送请求,获取请求响应结果
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
return response
|
||||
|
||||
# 获取用户所有收藏夹信息
|
||||
async def fetch_collect_folders(self, uid: str) -> dict:
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = f"{BilibiliAPIEndpoints.COLLECT_FOLDERS}?up_mid={uid}"
|
||||
# 发送请求,获取请求响应结果
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
# 分析响应结果
|
||||
result_dict = await ResponseAnalyzer.collect_folders_analyze(response=response)
|
||||
return result_dict
|
||||
|
||||
# 获取指定收藏夹内视频数据
|
||||
async def fetch_folder_videos(self, folder_id: str, pn: int) -> dict:
|
||||
"""
|
||||
:param folder_id: 收藏夹id-- 可从<获取用户所有收藏夹信息>获得
|
||||
:param pn: 页码
|
||||
:return:
|
||||
"""
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
# 发送请求,获取请求响应结果
|
||||
async with base_crawler as crawler:
|
||||
endpoint = f"{BilibiliAPIEndpoints.COLLECT_VIDEOS}?media_id={folder_id}&pn={pn}&ps=20&keyword=&order=mtime&type=0&tid=0&platform=web"
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
return response
|
||||
|
||||
# 获取指定用户的信息
|
||||
async def fetch_user_profile(self, uid: str) -> dict:
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = await EndpointModels().user_profile_endpoint(uid=uid)
|
||||
response = await crawler.fetch_get_json(endpoint=endpoint)
|
||||
return response
|
||||
|
||||
# 获取综合热门视频信息
|
||||
async def fetch_com_popular(self, pn: int) -> dict:
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = await EndpointModels().com_popular_endpoint(pn=pn)
|
||||
response = await crawler.fetch_get_json(endpoint=endpoint)
|
||||
return response
|
||||
|
||||
# 获取指定视频的评论
|
||||
async def fetch_video_comments(self, bv_id: str, pn: int) -> dict:
|
||||
# 评论排序 -- 1:按点赞数排序. 0:按时间顺序排序
|
||||
sort = 1
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = f"{BilibiliAPIEndpoints.VIDEO_COMMENTS}?type=1&oid={bv_id}&sort={sort}&nohot=0&ps=20&pn={pn}"
|
||||
# 发送请求,获取请求响应结果
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
return response
|
||||
|
||||
# 获取视频下指定评论的回复
|
||||
async def fetch_comment_reply(self, bv_id: str, pn: int, rpid: str) -> dict:
|
||||
"""
|
||||
:param bv_id: 目标视频bv号
|
||||
:param pn: 页码
|
||||
:param rpid: 目标评论id,可通过fetch_video_comments获得
|
||||
:return:
|
||||
"""
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = f"{BilibiliAPIEndpoints.COMMENT_REPLY}?type=1&oid={bv_id}&root={rpid}&&ps=20&pn={pn}"
|
||||
# 发送请求,获取请求响应结果
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
return response
|
||||
|
||||
# 获取指定用户动态
|
||||
async def fetch_user_dynamic(self, uid: str, offset: str) -> dict:
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = await EndpointModels().user_dynamic_endpoint(uid=uid, offset=offset)
|
||||
# 发送请求,获取请求响应结果
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
return response
|
||||
|
||||
# 获取指定直播间信息
|
||||
async def fetch_live_room_detail(self, room_id: str) -> dict:
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = f"{BilibiliAPIEndpoints.LIVEROOM_DETAIL}?room_id={room_id}"
|
||||
# 发送请求,获取请求响应结果
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
return response
|
||||
|
||||
# 获取指定直播间视频流
|
||||
# async def fetch_live_videos(self, room_id: str) -> dict:
|
||||
# # 获取请求头信息
|
||||
# kwargs = await self.get_bilibili_headers()
|
||||
# # 创建基础爬虫对象
|
||||
# base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
# async with base_crawler as crawler:
|
||||
# # 创建请求endpoint
|
||||
# endpoint = f"{BilibiliAPIEndpoints.LIVE_VIDEOS}?cid={room_id}&quality=4"
|
||||
# # 发送请求,获取请求响应结果
|
||||
# response = await crawler.fetch_get_json(endpoint)
|
||||
# return response
|
||||
|
||||
"-------------------------------------------------------utils接口列表-------------------------------------------------------"
|
||||
# 通过bv号获得视频aid号
|
||||
async def get_aid(self, bv_id: str) -> int:
|
||||
aid = await bv2av(bv_id=bv_id)
|
||||
return aid
|
||||
|
||||
# 获取所有直播分区列表
|
||||
async def fetch_all_live_areas(self) -> dict:
|
||||
# 获取请求头信息
|
||||
kwargs = await self.get_bilibili_headers()
|
||||
# 创建基础爬虫对象
|
||||
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
|
||||
async with base_crawler as crawler:
|
||||
# 创建请求endpoint
|
||||
endpoint = BilibiliAPIEndpoints.LIVE_AREAS
|
||||
# 发送请求,获取请求响应结果
|
||||
response = await crawler.fetch_get_json(endpoint)
|
||||
return response
|
||||
|
||||
# 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数)
|
||||
# (仅示例 不同接口所需要传进去的参数不同)(待改进)
|
||||
async def uid_to_wrid(self, uid: str) -> dict:
|
||||
result = await EndpointModels().get_wrid_wts_by_uid(uid=uid)
|
||||
return result
|
||||
|
||||
"-------------------------------------------------------main-------------------------------------------------------"
|
||||
async def main(self):
|
||||
|
||||
"-------------------------------------------------------handler接口列表-------------------------------------------------------"
|
||||
# 获取单个作品数据
|
||||
# bv_id = 'BV1M1421t7hT'
|
||||
# result = await self.fetch_one_video(bv_id=bv_id)
|
||||
# print(result)
|
||||
|
||||
# 获取用户发布作品数据
|
||||
# uid = '178360345'
|
||||
# pn = 1
|
||||
# result = await self.fetch_user_post_videos(uid=uid, pn=pn)
|
||||
# print(result)
|
||||
|
||||
# 获取用户所有收藏夹信息
|
||||
# uid = '178360345'
|
||||
# reslut = await self.fetch_collect_folders(uid=uid)
|
||||
# print(reslut)
|
||||
|
||||
# 获取用户指定收藏夹内视频数据
|
||||
# folder_id = '1756059545' # 收藏夹id,可从<获取用户所有收藏夹信息>获得
|
||||
# pn = 1
|
||||
# result = await self.fetch_folder_videos(folder_id=folder_id, pn=pn)
|
||||
# print(result)
|
||||
|
||||
# 获取指定用户的信息
|
||||
# uid = '178360345'
|
||||
# result = await self.fetch_user_profile(uid=uid)
|
||||
# print(result)
|
||||
|
||||
# 获取综合热门信息
|
||||
# pn = 1 # 页码
|
||||
# result = await self.fetch_com_popular(pn=pn)
|
||||
# print(result)
|
||||
|
||||
# 获取指定视频的评论(不登录只能获取一页的评论)
|
||||
# bv_id = "BV1M1421t7hT"
|
||||
# pn = 1
|
||||
# result = await self.fetch_video_comments(bv_id=bv_id, pn=pn)
|
||||
# print(result)
|
||||
|
||||
# 获取视频下指定评论的回复(不登录只能获取一页的评论)
|
||||
# bv_id = "BV1M1421t7hT"
|
||||
# rpid = "237109455120"
|
||||
# pn = 1
|
||||
# result = await self.fetch_comment_reply(bv_id=bv_id, pn=pn, rpid=rpid)
|
||||
# print(result)
|
||||
|
||||
# 获取指定用户动态
|
||||
# uid = "16015678"
|
||||
# offset = "953154282154098691" # 翻页索引,为空即从最新动态开始,可从获得到的动态数据里面获得
|
||||
# result = await self.fetch_user_dynamic(uid=uid, offset=offset)
|
||||
# print(result)
|
||||
|
||||
# 获取指定直播间信息
|
||||
# room_id = "22816111"
|
||||
# result = await self.fetch_live_room_detail(room_id=room_id)
|
||||
# print(result)
|
||||
|
||||
# 获取直播间视频流
|
||||
# room_id = "22816111"
|
||||
# result = await self.fetch_user_live_videos_by_room_id(room_id=room_id)
|
||||
# print(result)
|
||||
|
||||
"-------------------------------------------------------utils接口列表-------------------------------------------------------"
|
||||
# 通过bv号获得视频aid号
|
||||
# bv_id = 'BV1M1421t7hT'
|
||||
# aid = await self.get_aid(bv_id=bv_id)
|
||||
# print(aid)
|
||||
|
||||
# 获取所有直播分区列表
|
||||
# result = await self.fetch_all_live_areas()
|
||||
# print(result)
|
||||
|
||||
# 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数)
|
||||
# (仅示例 不同接口所需要传进去的参数不同)(待改进)
|
||||
# uid = '178360345'
|
||||
# w_rid = await self.uid_to_wrid(uid=uid)
|
||||
# print(w_rid)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 初始化
|
||||
BilibiliWebCrawler = BilibiliWebCrawler()
|
||||
|
||||
# 开始时间
|
||||
start = time.time()
|
||||
|
||||
asyncio.run(BilibiliWebCrawler.main())
|
||||
|
||||
# 结束时间
|
||||
end = time.time()
|
||||
print(f"耗时:{end - start}")
|
204
crawlers/bilibili/web/wrid.py
Normal file
204
crawlers/bilibili/web/wrid.py
Normal file
@ -0,0 +1,204 @@
|
||||
import urllib.parse
|
||||
|
||||
def srotl(t, e):
|
||||
return (t << e) | (t >> (32 - e))
|
||||
|
||||
def tendian(t):
|
||||
if isinstance(t, int):
|
||||
return (16711935 & srotl(t, 8)) | (4278255360 & srotl(t, 24))
|
||||
for e in range(len(t)):
|
||||
t[e] = tendian(t[e])
|
||||
return t
|
||||
|
||||
# 没问题
|
||||
def tbytes_to_words(t):
|
||||
e = []
|
||||
r = 0
|
||||
for n in range(len(t)):
|
||||
if r >> 5 >= len(e):
|
||||
e.append(0)
|
||||
e[r >> 5] |= t[n] << (24 - r % 32)
|
||||
r += 8
|
||||
return e
|
||||
|
||||
def jbinstring_to_bytes(t):
|
||||
e = []
|
||||
for n in range(len(t)):
|
||||
e.append(ord(t[n]) & 255)
|
||||
return e
|
||||
|
||||
# 没问题
|
||||
def estring_to_bytes(t):
|
||||
return jbinstring_to_bytes(urllib.parse.unquote(urllib.parse.quote(t)))
|
||||
|
||||
def _ff(t, e, n, r, o, i, a):
|
||||
# 计算中间值 c
|
||||
c = t + ((e & n) | (~e & r)) + (o & 0xFFFFFFFF) + a
|
||||
# 将 c 转换为 32 位无符号整数
|
||||
c = c & 0xFFFFFFFF
|
||||
# 左移和右移操作
|
||||
c = (c << i | c >> (32 - i)) & 0xFFFFFFFF
|
||||
# 返回结果
|
||||
return (c + e) & 0xFFFFFFFF
|
||||
|
||||
def _gg(t, e, n, r, o, i, a):
|
||||
# 计算中间值 c
|
||||
c = t + ((e & r) | (n & ~r)) + (o & 0xFFFFFFFF) + a
|
||||
# 将 c 转换为 32 位无符号整数
|
||||
c = c & 0xFFFFFFFF
|
||||
# 左移和右移操作
|
||||
c = (c << i | c >> (32 - i)) & 0xFFFFFFFF
|
||||
# 返回结果
|
||||
return (c + e) & 0xFFFFFFFF
|
||||
|
||||
def _hh(t, e, n, r, o, i, a):
|
||||
# 计算中间值 c
|
||||
c = t + (e ^ n ^ r) + (o & 0xFFFFFFFF) + a
|
||||
# 将 c 转换为 32 位无符号整数
|
||||
c = c & 0xFFFFFFFF
|
||||
# 左移和右移操作
|
||||
c = (c << i | c >> (32 - i)) & 0xFFFFFFFF
|
||||
# 返回结果
|
||||
return (c + e) & 0xFFFFFFFF
|
||||
|
||||
def _ii(t, e, n, r, o, i, a):
|
||||
# 计算中间值 c
|
||||
c = t + (n ^ (e | ~r)) + (o & 0xFFFFFFFF) + a
|
||||
# 将 c 转换为 32 位无符号整数
|
||||
c = c & 0xFFFFFFFF
|
||||
# 左移和右移操作
|
||||
c = (c << i | c >> (32 - i)) & 0xFFFFFFFF
|
||||
# 返回结果
|
||||
return (c + e) & 0xFFFFFFFF
|
||||
|
||||
def o(i, a):
|
||||
if isinstance(i, str):
|
||||
i = estring_to_bytes(i)
|
||||
elif isinstance(i, (list, tuple)):
|
||||
i = list(i)
|
||||
elif not isinstance(i, (list, bytearray)):
|
||||
i = str(i)
|
||||
c = tbytes_to_words(i)
|
||||
u = 8 * len(i)
|
||||
s, l, f, p = 1732584193, -271733879, -1732584194, 271733878
|
||||
|
||||
for d in range(len(c)):
|
||||
c[d] = (16711935 & (c[d] << 8 | c[d] >> 24)) | (4278255360 & (c[d] << 24 | c[d] >> 8))
|
||||
|
||||
# 确保列表 c 的长度足够大
|
||||
while len(c) <= (14 + ((u + 64 >> 9) << 4)):
|
||||
c.append(0)
|
||||
|
||||
c[u >> 5] |= 128 << (u % 32)
|
||||
c[14 + ((u + 64 >> 9) << 4)] = u
|
||||
|
||||
h, v, y, m = _ff, _gg, _hh, _ii
|
||||
for d in range(0, len(c), 16):
|
||||
g, b, w, A = s, l, f, p
|
||||
# 确保在访问索引之前扩展列表的长度
|
||||
while len(c) <= d + 15:
|
||||
c.append(0)
|
||||
s = h(s, l, f, p, c[d + 0], 7, -680876936)
|
||||
p = h(p, s, l, f, c[d + 1], 12, -389564586)
|
||||
f = h(f, p, s, l, c[d + 2], 17, 606105819)
|
||||
l = h(l, f, p, s, c[d + 3], 22, -1044525330)
|
||||
s = h(s, l, f, p, c[d + 4], 7, -176418897)
|
||||
p = h(p, s, l, f, c[d + 5], 12, 1200080426)
|
||||
f = h(f, p, s, l, c[d + 6], 17, -1473231341)
|
||||
l = h(l, f, p, s, c[d + 7], 22, -45705983)
|
||||
s = h(s, l, f, p, c[d + 8], 7, 1770035416)
|
||||
p = h(p, s, l, f, c[d + 9], 12, -1958414417)
|
||||
f = h(f, p, s, l, c[d + 10], 17, -42063)
|
||||
l = h(l, f, p, s, c[d + 11], 22, -1990404162)
|
||||
s = h(s, l, f, p, c[d + 12], 7, 1804603682)
|
||||
p = h(p, s, l, f, c[d + 13], 12, -40341101)
|
||||
f = h(f, p, s, l, c[d + 14], 17, -1502002290)
|
||||
s = v(s, l := h(l, f, p, s, c[d + 15], 22, 1236535329), f, p, c[d + 1], 5, -165796510)
|
||||
p = v(p, s, l, f, c[d + 6], 9, -1069501632)
|
||||
f = v(f, p, s, l, c[d + 11], 14, 643717713)
|
||||
l = v(l, f, p, s, c[d + 0], 20, -373897302)
|
||||
s = v(s, l, f, p, c[d + 5], 5, -701558691)
|
||||
p = v(p, s, l, f, c[d + 10], 9, 38016083)
|
||||
f = v(f, p, s, l, c[d + 15], 14, -660478335)
|
||||
l = v(l, f, p, s, c[d + 4], 20, -405537848)
|
||||
s = v(s, l, f, p, c[d + 9], 5, 568446438)
|
||||
p = v(p, s, l, f, c[d + 14], 9, -1019803690)
|
||||
f = v(f, p, s, l, c[d + 3], 14, -187363961)
|
||||
l = v(l, f, p, s, c[d + 8], 20, 1163531501)
|
||||
s = v(s, l, f, p, c[d + 13], 5, -1444681467)
|
||||
p = v(p, s, l, f, c[d + 2], 9, -51403784)
|
||||
f = v(f, p, s, l, c[d + 7], 14, 1735328473)
|
||||
s = y(s, l := v(l, f, p, s, c[d + 12], 20, -1926607734), f, p, c[d + 5], 4, -378558)
|
||||
p = y(p, s, l, f, c[d + 8], 11, -2022574463)
|
||||
f = y(f, p, s, l, c[d + 11], 16, 1839030562)
|
||||
l = y(l, f, p, s, c[d + 14], 23, -35309556)
|
||||
s = y(s, l, f, p, c[d + 1], 4, -1530992060)
|
||||
p = y(p, s, l, f, c[d + 4], 11, 1272893353)
|
||||
f = y(f, p, s, l, c[d + 7], 16, -155497632)
|
||||
l = y(l, f, p, s, c[d + 10], 23, -1094730640)
|
||||
s = y(s, l, f, p, c[d + 13], 4, 681279174)
|
||||
p = y(p, s, l, f, c[d + 0], 11, -358537222)
|
||||
f = y(f, p, s, l, c[d + 3], 16, -722521979)
|
||||
l = y(l, f, p, s, c[d + 6], 23, 76029189)
|
||||
s = y(s, l, f, p, c[d + 9], 4, -640364487)
|
||||
p = y(p, s, l, f, c[d + 12], 11, -421815835)
|
||||
f = y(f, p, s, l, c[d + 15], 16, 530742520)
|
||||
s = m(s, l := y(l, f, p, s, c[d + 2], 23, -995338651), f, p, c[d + 0], 6, -198630844)
|
||||
p = m(p, s, l, f, c[d + 7], 10, 1126891415)
|
||||
f = m(f, p, s, l, c[d + 14], 15, -1416354905)
|
||||
l = m(l, f, p, s, c[d + 5], 21, -57434055)
|
||||
s = m(s, l, f, p, c[d + 12], 6, 1700485571)
|
||||
p = m(p, s, l, f, c[d + 3], 10, -1894986606)
|
||||
f = m(f, p, s, l, c[d + 10], 15, -1051523)
|
||||
l = m(l, f, p, s, c[d + 1], 21, -2054922799)
|
||||
s = m(s, l, f, p, c[d + 8], 6, 1873313359)
|
||||
p = m(p, s, l, f, c[d + 15], 10, -30611744)
|
||||
f = m(f, p, s, l, c[d + 6], 15, -1560198380)
|
||||
l = m(l, f, p, s, c[d + 13], 21, 1309151649)
|
||||
s = m(s, l, f, p, c[d + 4], 6, -145523070)
|
||||
p = m(p, s, l, f, c[d + 11], 10, -1120210379)
|
||||
f = m(f, p, s, l, c[d + 2], 15, 718787259)
|
||||
l = m(l, f, p, s, c[d + 9], 21, -343485551)
|
||||
|
||||
s = (s + g) >> 0 & 0xFFFFFFFF
|
||||
l = (l + b) >> 0 & 0xFFFFFFFF
|
||||
f = (f + w) >> 0 & 0xFFFFFFFF
|
||||
p = (p + A) >> 0 & 0xFFFFFFFF
|
||||
|
||||
return tendian([s, l, f, p])
|
||||
|
||||
def twords_to_bytes(t):
|
||||
e = []
|
||||
for n in range(0, 32 * len(t), 8):
|
||||
e.append((t[n >> 5] >> (24 - n % 32)) & 255)
|
||||
return e
|
||||
|
||||
def tbytes_to_hex(t):
|
||||
e = []
|
||||
for n in range(len(t)):
|
||||
e.append(hex(t[n] >> 4)[2:])
|
||||
e.append(hex(t[n] & 15)[2:])
|
||||
return ''.join(e)
|
||||
|
||||
def get_wrid(e):
|
||||
n = None
|
||||
i = twords_to_bytes(o(e, n))
|
||||
return tbytes_to_hex(i)
|
||||
|
||||
# def test():
|
||||
# e = "dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter=%7B%22ds%22%3A%5B%5D%2C%22wh%22%3A%5B3697%2C5674%2C33%5D%2C%22of%22%3A%5B222%2C444%2C222%5D%7D&dm_img_list=%5B%5D&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&mid=3546666038725258&platform=web&token=&web_location=1550101&wts=1723867512ea1db124af3c7062474693fa704f4ff8"
|
||||
# n = None
|
||||
# x = o(e, n)
|
||||
# i = twords_to_bytes(x)
|
||||
# return tbytes_to_hex(i)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# # test()
|
||||
# encode_query = "dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter=%7B%22ds%22%3A%5B%5D%2C%22wh%22%3A%5B3697%2C5674%2C33%5D%2C%22of%22%3A%5B222%2C444%2C222%5D%7D&dm_img_list=%5B%5D&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&mid=3546666038725258&platform=web&token=&web_location=1550101&wts=1723867512ea1db124af3c7062474693fa704f4ff8"
|
||||
# wrid1 = main(encode_query)
|
||||
# print(wrid1)
|
||||
#
|
||||
# js1 = open('./wrid.js', 'r', encoding='utf-8').read()
|
||||
# wrid2 = execjs.compile(js1).call('main', encode_query)
|
||||
# print(wrid2)
|
||||
|
Loading…
x
Reference in New Issue
Block a user