Merge pull request #481 from Koyomi781/update

[update] 调整代码结构,增加一些接口
This commit is contained in:
Evil0ctal 2024-09-25 20:25:13 -07:00 committed by GitHub
commit ca05f11953
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 457 additions and 235 deletions

View File

@ -223,6 +223,7 @@ TikHub的部分源代码会开源在Github上并且会赞助一些开源项
- [x] 获取列表unique_id - [x] 获取列表unique_id
- 哔哩哔哩网页版API - 哔哩哔哩网页版API
- [x] 获取单个视频详情信息 - [x] 获取单个视频详情信息
- [x] 获取视频流地址
- [x] 获取用户发布视频作品数据 - [x] 获取用户发布视频作品数据
- [x] 获取用户所有收藏夹信息 - [x] 获取用户所有收藏夹信息
- [x] 获取指定收藏夹内视频数据 - [x] 获取指定收藏夹内视频数据
@ -231,8 +232,12 @@ TikHub的部分源代码会开源在Github上并且会赞助一些开源项
- [x] 获取指定视频的评论 - [x] 获取指定视频的评论
- [x] 获取视频下指定评论的回复 - [x] 获取视频下指定评论的回复
- [x] 获取指定用户动态 - [x] 获取指定用户动态
- [x] 获取视频实时弹幕
- [x] 获取指定直播间信息 - [x] 获取指定直播间信息
- [x] 获取直播间视频流
- [x] 获取指定分区正在直播的主播
- [x] 获取所有直播分区列表 - [x] 获取所有直播分区列表
- [x] 通过bv号获得视频分p信息
--- ---
## 📦调用解析库(已废弃需要更新): ## 📦调用解析库(已废弃需要更新):

View File

@ -46,6 +46,48 @@ async def fetch_one_video(request: Request,
raise HTTPException(status_code=status_code, detail=detail.dict()) raise HTTPException(status_code=status_code, detail=detail.dict())
# 获取视频流地址
@router.get("/fetch_video_playurl", response_model=ResponseModel, summary="获取视频流地址/Get video playurl")
async def fetch_one_video(request: Request,
bv_id: str = Query(example="BV1y7411Q7Eq", description="作品id/Video id"),
cid:str = Query(example="171776208", description="作品cid/Video cid")):
"""
# [中文]
### 用途:
- 获取视频流地址
### 参数:
- bv_id: 作品id
- cid: 作品cid
### 返回:
- 视频流地址
# [English]
### Purpose:
- Get video playurl
### Parameters:
- bv_id: Video id
- cid: Video cid
### Return:
- Video playurl
# [示例/Example]
bv_id = "BV1y7411Q7Eq"
cid = "171776208"
"""
try:
data = await BilibiliWebCrawler.fetch_video_playurl(bv_id, cid)
return ResponseModel(code=200,
router=request.url.path,
data=data)
except Exception as e:
status_code = 400
detail = ErrorResponseModel(code=status_code,
router=request.url.path,
params=dict(request.query_params),
)
raise HTTPException(status_code=status_code, detail=detail.dict())
# 获取用户发布视频作品数据 # 获取用户发布视频作品数据
@router.get("/fetch_user_post_videos", response_model=ResponseModel, @router.get("/fetch_user_post_videos", response_model=ResponseModel,
summary="获取用户主页作品数据/Get user homepage video data") summary="获取用户主页作品数据/Get user homepage video data")
@ -385,6 +427,44 @@ async def fetch_collect_folders(request: Request,
raise HTTPException(status_code=status_code, detail=detail.dict()) raise HTTPException(status_code=status_code, detail=detail.dict())
# 获取视频实时弹幕
@router.get("/fetch_video_danmaku", response_model=ResponseModel, summary="获取视频实时弹幕/Get Video Danmaku")
async def fetch_one_video(request: Request,
cid: str = Query(example="1639235405", description="作品cid/Video cid")):
"""
# [中文]
### 用途:
- 获取视频实时弹幕
### 参数:
- cid: 作品cid
### 返回:
- 视频实时弹幕
# [English]
### Purpose:
- Get Video Danmaku
### Parameters:
- cid: Video cid
### Return:
- Video Danmaku
# [示例/Example]
cid = "1639235405"
"""
try:
data = await BilibiliWebCrawler.fetch_video_danmaku(cid)
return ResponseModel(code=200,
router=request.url.path,
data=data)
except Exception as e:
status_code = 400
detail = ErrorResponseModel(code=status_code,
router=request.url.path,
params=dict(request.query_params),
)
raise HTTPException(status_code=status_code, detail=detail.dict())
# 获取指定直播间信息 # 获取指定直播间信息
@router.get("/fetch_live_room_detail", response_model=ResponseModel, @router.get("/fetch_live_room_detail", response_model=ResponseModel,
summary="获取指定直播间信息/Get information of specified live room") summary="获取指定直播间信息/Get information of specified live room")
@ -424,43 +504,86 @@ async def fetch_collect_folders(request: Request,
raise HTTPException(status_code=status_code, detail=detail.dict()) raise HTTPException(status_code=status_code, detail=detail.dict())
# # 获取指定直播间视频流 # 获取指定直播间视频流
# @router.get("/fetch_live_videos", response_model=ResponseModel, @router.get("/fetch_live_videos", response_model=ResponseModel,
# summary="获取直播间视频流/Get live video data of specified room") summary="获取直播间视频流/Get live video data of specified room")
# async def fetch_collect_folders(request: Request, async def fetch_collect_folders(request: Request,
# room_id: str = Query(example="22816111", description="直播间ID/Live room ID")): room_id: str = Query(example="1815229528", description="直播间ID/Live room ID")):
# """ """
# # [中文] # [中文]
# ### 用途: ### 用途:
# - 获取指定直播间视频流 - 获取指定直播间视频流
# ### 参数: ### 参数:
# - room_id: 直播间ID - room_id: 直播间ID
# ### 返回: ### 返回:
# - 指定直播间视频流 - 指定直播间视频流
#
# # [English] # [English]
# ### Purpose: ### Purpose:
# - Get live video data of specified room - Get live video data of specified room
# ### Parameters: ### Parameters:
# - room_id: Live room ID - room_id: Live room ID
# ### Return: ### Return:
# - live video data of specified room - live video data of specified room
#
# # [示例/Example] # [示例/Example]
# room_id = "22816111" room_id = "1815229528"
# """ """
# try: try:
# data = await BilibiliWebCrawler.fetch_live_videos(room_id) data = await BilibiliWebCrawler.fetch_live_videos(room_id)
# return ResponseModel(code=200, return ResponseModel(code=200,
# router=request.url.path, router=request.url.path,
# data=data) data=data)
# except Exception as e: except Exception as e:
# status_code = 400 status_code = 400
# detail = ErrorResponseModel(code=status_code, detail = ErrorResponseModel(code=status_code,
# router=request.url.path, router=request.url.path,
# params=dict(request.query_params), params=dict(request.query_params),
# ) )
# raise HTTPException(status_code=status_code, detail=detail.dict()) raise HTTPException(status_code=status_code, detail=detail.dict())
# 获取指定分区正在直播的主播
@router.get("/fetch_live_streamers", response_model=ResponseModel,
summary="获取指定分区正在直播的主播/Get live streamers of specified live area")
async def fetch_collect_folders(request: Request,
area_id: str = Query(example="9", description="直播分区id/Live area ID"),
pn: int = Query(default=1, description="页码/Page number")):
"""
# [中文]
### 用途:
- 获取指定分区正在直播的主播
### 参数:
- area_id: 直播分区id
- pn: 页码
### 返回:
- 指定分区正在直播的主播
# [English]
### Purpose:
- Get live streamers of specified live area
### Parameters:
- area_id: Live area ID
- pn: Page number
### Return:
- live streamers of specified live area
# [示例/Example]
area_id = "9"
pn = 1
"""
try:
data = await BilibiliWebCrawler.fetch_live_streamers(area_id, pn)
return ResponseModel(code=200,
router=request.url.path,
data=data)
except Exception as e:
status_code = 400
detail = ErrorResponseModel(code=status_code,
router=request.url.path,
params=dict(request.query_params),
)
raise HTTPException(status_code=status_code, detail=detail.dict())
# 获取所有直播分区列表 # 获取所有直播分区列表
@ -496,3 +619,79 @@ async def fetch_collect_folders(request: Request,):
params=dict(request.query_params), params=dict(request.query_params),
) )
raise HTTPException(status_code=status_code, detail=detail.dict()) raise HTTPException(status_code=status_code, detail=detail.dict())
# 通过bv号获得视频aid号
@router.get("/bv_to_aid", response_model=ResponseModel, summary="通过bv号获得视频aid号/Generate aid by bvid")
async def fetch_one_video(request: Request,
bv_id: str = Query(example="BV1M1421t7hT", description="作品id/Video id")):
"""
# [中文]
### 用途:
- 通过bv号获得视频aid号
### 参数:
- bv_id: 作品id
### 返回:
- 视频aid号
# [English]
### Purpose:
- Generate aid by bvid
### Parameters:
- bv_id: Video id
### Return:
- Video aid
# [示例/Example]
bv_id = "BV1M1421t7hT"
"""
try:
data = await BilibiliWebCrawler.bv_to_aid(bv_id)
return ResponseModel(code=200,
router=request.url.path,
data=data)
except Exception as e:
status_code = 400
detail = ErrorResponseModel(code=status_code,
router=request.url.path,
params=dict(request.query_params),
)
raise HTTPException(status_code=status_code, detail=detail.dict())
# 通过bv号获得视频分p信息
@router.get("/fetch_video_parts", response_model=ResponseModel, summary="通过bv号获得视频分p信息/Get Video Parts By bvid")
async def fetch_one_video(request: Request,
bv_id: str = Query(example="BV1vf421i7hV", description="作品id/Video id")):
"""
# [中文]
### 用途:
- 通过bv号获得视频分p信息
### 参数:
- bv_id: 作品id
### 返回:
- 视频分p信息
# [English]
### Purpose:
- Get Video Parts By bvid
### Parameters:
- bv_id: Video id
### Return:
- Video Parts
# [示例/Example]
bv_id = "BV1vf421i7hV"
"""
try:
data = await BilibiliWebCrawler.fetch_video_parts(bv_id)
return ResponseModel(code=200,
router=request.url.path,
data=data)
except Exception as e:
status_code = 400
detail = ErrorResponseModel(code=status_code,
router=request.url.path,
params=dict(request.query_params),
)
raise HTTPException(status_code=status_code, detail=detail.dict())

View File

@ -5,7 +5,7 @@ TokenManager:
'origin': https://www.bilibili.com 'origin': https://www.bilibili.com
'referer': https://space.bilibili.com/ 'referer': https://space.bilibili.com/
'origin_2': https://space.bilibili.com 'origin_2': https://space.bilibili.com
'cookie': buvid3=D6E58E7B-E3A9-7CD3-7BE5-B5F255788A3020034infoc; b_nut=1723702120; _uuid=6E10D69A10-A711-9DA8-6833-1010262296C24B21337infoc; buvid_fp=6cf2ea8e143bbc49f3b7c0dcb2465fc2; buvid4=748EC8F0-82E2-1672-A286-8445DDB2A80C06110-023112304-; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjM5NjEzMjIsImlhdCI6MTcyMzcwMjA2MiwicGx0IjotMX0.IWOEMLCDKqWAX24rePU-1Qgm9Isf5CU8Tz0O-j6GHfo; bili_ticket_expires=1723961262; CURRENT_FNVAL=4048; rpdid=|(JluY|JJ|RR0J'u~kJ~|kkuY; b_lsid=E10B83DC4_191552166D6; header_theme_version=CLOSE; enable_web_push=DISABLE; home_feed_column=5; browser_resolution=1488-714; sid=873ujj7i 'cookie': buvid4=748EC8F0-82E2-1672-A286-8445DDB2A80C06110-023112304-; buvid3=73EF1E2E-B7A9-78DD-F2AE-9AB2B476E27638524infoc; b_nut=1727075638; _uuid=77AA4910F-5C8F-9647-7DA3-F583C8108BD7942063infoc; buvid_fp=75b22e5d0c3dbc642b1c80956c62c7da; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MjczNDI1NTYsImlhdCI6MTcyNzA4MzI5NiwicGx0IjotMX0.G3pvk6OC4FDWBL7GNgKkkVtUMl29UtNdgok_cANoKsw; bili_ticket_expires=1727342496; header_theme_version=CLOSE; enable_web_push=DISABLE; home_feed_column=5; browser_resolution=1488-712; b_lsid=5B4EDF8A_1921EAA1BDA
'user-agent': Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 'user-agent': Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36
proxies: proxies:

View File

@ -11,8 +11,11 @@ class BilibiliAPIEndpoints:
# 作品信息 (Post Detail) # 作品信息 (Post Detail)
POST_DETAIL = f"{BILIAPI_DOMAIN}/x/web-interface/view" POST_DETAIL = f"{BILIAPI_DOMAIN}/x/web-interface/view"
# 用户播放列表 (用于爬取用户所有视频数据) # 作品视频流
USER_POST = f"{BILIAPI_DOMAIN}/x/v2/medialist/resource/list" VIDEO_PLAYURL = f"{BILIAPI_DOMAIN}/x/player/wbi/playurl"
# 用户发布视频作品数据
USER_POST = f"{BILIAPI_DOMAIN}/x/space/wbi/arc/search"
# 收藏夹列表 # 收藏夹列表
COLLECT_FOLDERS = f"{BILIAPI_DOMAIN}/x/v3/fav/folder/created/list-all" COLLECT_FOLDERS = f"{BILIAPI_DOMAIN}/x/v3/fav/folder/created/list-all"
@ -35,9 +38,15 @@ class BilibiliAPIEndpoints:
# 视频评论 # 视频评论
VIDEO_COMMENTS = f"{BILIAPI_DOMAIN}/x/v2/reply" VIDEO_COMMENTS = f"{BILIAPI_DOMAIN}/x/v2/reply"
# 用户动态
USER_DYNAMIC = f"{BILIAPI_DOMAIN}/x/polymer/web-dynamic/v1/feed/space"
# 评论的回复 # 评论的回复
COMMENT_REPLY = f"{BILIAPI_DOMAIN}/x/v2/reply/reply" COMMENT_REPLY = f"{BILIAPI_DOMAIN}/x/v2/reply/reply"
# 视频分p信息
VIDEO_PARTS = f"{BILIAPI_DOMAIN}/x/player/pagelist"
# 直播间信息 # 直播间信息
LIVEROOM_DETAIL = f"{LIVE_DOMAIN}/room/v1/Room/get_info" LIVEROOM_DETAIL = f"{LIVE_DOMAIN}/room/v1/Room/get_info"
@ -47,4 +56,7 @@ class BilibiliAPIEndpoints:
# 直播间视频流 # 直播间视频流
LIVE_VIDEOS = f"{LIVE_DOMAIN}/room/v1/Room/playUrl" LIVE_VIDEOS = f"{LIVE_DOMAIN}/room/v1/Room/playUrl"
# 正在直播的主播
LIVE_STREAMER = f"{LIVE_DOMAIN}/xlive/web-interface/v1/second/getList"

View File

@ -0,0 +1,39 @@
import time
from pydantic import BaseModel
class BaseRequestsModel(BaseModel):
wts: str = str(round(time.time()))
class UserPostVideos(BaseRequestsModel):
dm_img_inter: str = '{"ds":[],"wh":[3557,5674,5],"of":[154,308,154]}'
dm_img_list: list = []
mid: str
pn: int
ps: str = "20"
class UserProfile(BaseRequestsModel):
mid: str
class UserDynamic(BaseRequestsModel):
host_mid: str
offset: str
wts: str = str(round(time.time()))
class ComPopular(BaseRequestsModel):
pn: int
ps: str = "20"
web_location: str = "333.934"
class PlayUrl(BaseRequestsModel):
qn: str
fnval: str = '4048'
bvid: str
cid: str

View File

@ -1,159 +1,58 @@
import time
from urllib.parse import urlencode from urllib.parse import urlencode
import random
from crawlers.bilibili.web import wrid from crawlers.bilibili.web import wrid
from crawlers.utils.logger import logger from crawlers.utils.logger import logger
from crawlers.bilibili.web.endpoints import BilibiliAPIEndpoints
# 装饰器 检查是否正确生成endpoint
def Check_gen(func):
def checker(*args, **kwargs):
try:
result = func(*args, **kwargs)
return result
except Exception as e:
raise RuntimeError("生成w_rid失败:{0}, 函数地址:{1}".format(e, func.__name__))
return checker class EndpointGenerator:
def __init__(self, params: dict):
class EndpointModels: self.params = params
def __init__(self):
# 实例化WridManager
self.wridmanager = WridManager()
# 当前时间戳
self.wts = round(time.time())
# 固定inter也能获得结果。如果失效见--WridManager().get_inter
self.inter = '{"ds":[],"wh":[3557,5674,5],"of":[154,308,154]}'
# 获取wrid示例 通过uid 生成包含w_rid和wts的字典
@Check_gen
async def get_wrid_wts_by_uid(self, uid: str) -> dict:
params = {
'dm_cover_img_str': 'QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ',
'dm_img_inter': self.inter,
'dm_img_list': [],
'dm_img_str': 'V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ',
'mid': uid,
'platform': 'web',
'token': '',
'web_location': '1550101',
'wts': f'{self.wts}ea1db124af3c7062474693fa704f4ff8'
}
# 获取w_rid参数
w_rid = await self.wridmanager.get_wrid(params=params)
reslut = {
"w_rid": w_rid,
"wts": self.wts
}
return reslut
# 获取用户发布视频作品数据 生成enpoint # 获取用户发布视频作品数据 生成enpoint
@Check_gen async def user_post_videos_endpoint(self) -> str:
async def user_post_videos_endpoint(self, uid: str, pn: int, ps: int = 30) -> str: # 添加w_rid
# 编码inter endpoint = await WridManager.wrid_model_endpoint(params=self.params)
new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D") # 拼接成最终结果并返回
# 构建请求参数 final_endpoint = BilibiliAPIEndpoints.USER_POST + '?' + endpoint
params = { return final_endpoint
"dm_cover_img_str": "QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ",
"dm_img_inter": self.inter, # 获取视频流地址 生成enpoint
"dm_img_list": [], async def video_playurl_endpoint(self) -> str:
"dm_img_str": "V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ", # 添加w_rid
"keyword": "", endpoint = await WridManager.wrid_model_endpoint(params=self.params)
"mid": uid, # 拼接成最终结果并返回
"order": "pubdate", final_endpoint = BilibiliAPIEndpoints.VIDEO_PLAYURL + '?' + endpoint
"order_avoided": "true",
"platform": "web",
"pn": pn,
"ps": ps,
"tid": "0",
"web_location": "1550101",
"wts": f"{self.wts}ea1db124af3c7062474693fa704f4ff8",
}
# 获取wrid
w_rid = await self.wridmanager.get_wrid(params=params)
# 将上面结果拼接成最终结果并返回
final_endpoint = f'https://api.bilibili.com/x/space/wbi/arc/search?mid={uid}&ps={ps}&tid=0&pn={pn}&keyword=&order=pubdate&platform=web&web_location=1550101&order_avoided=true&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&w_rid={w_rid}&wts={self.wts}'
return final_endpoint return final_endpoint
# 获取指定用户的信息 生成enpoint # 获取指定用户的信息 生成enpoint
@Check_gen async def user_profile_endpoint(self) -> str:
async def user_profile_endpoint(self, uid: str) -> str: # 添加w_rid
# 编码inter endpoint = await WridManager.wrid_model_endpoint(params=self.params)
new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D") # 拼接成最终结果并返回
# 构建请求参数 final_endpoint = BilibiliAPIEndpoints.USER_DETAIL + '?' + endpoint
params = {
'dm_cover_img_str': 'QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ',
'dm_img_inter': self.inter,
'dm_img_list': [],
'dm_img_str': 'V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ',
'mid': uid,
'platform': 'web',
'token': '',
'web_location': '1550101',
'wts': f'{self.wts}ea1db124af3c7062474693fa704f4ff8'
}
# 获取wrid
w_rid = await self.wridmanager.get_wrid(params=params)
# 将上面结果拼接成最终字符串并返回
final_endpoint = f'https://api.bilibili.com/x/space/wbi/acc/info?mid={uid}&token=&platform=web&web_location=1550101&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&w_rid={w_rid}&wts={self.wts}'
return final_endpoint return final_endpoint
# 获取综合热门视频信息 生成enpoint # 获取综合热门视频信息 生成enpoint
@Check_gen async def com_popular_endpoint(self) -> str:
async def com_popular_endpoint(self, pn: int) -> str: # 添加w_rid
# 构建请求参数 endpoint = await WridManager.wrid_model_endpoint(params=self.params)
params = { # 拼接成最终结果并返回
"pn": pn, final_endpoint = BilibiliAPIEndpoints.COM_POPULAR + '?' + endpoint
"ps": "20",
"web_location": "333.934",
"wts": f"{self.wts}ea1db124af3c7062474693fa704f4ff8",
}
# 获取wrid
w_rid = await self.wridmanager.get_wrid(params=params)
# 将上面结果拼接成最终结果并返回
final_endpoint = f"https://api.bilibili.com/x/web-interface/popular?ps=20&pn={pn}&web_location=333.934&w_rid={w_rid}&wts={self.wts}"
return final_endpoint return final_endpoint
# 获取指定用户动态 # 获取指定用户动态
@Check_gen async def user_dynamic_endpoint(self):
async def user_dynamic_endpoint(self, uid: str, offset: str): # 添加w_rid
# 编码inter endpoint = await WridManager.wrid_model_endpoint(params=self.params)
new_inter = self.inter.replace(" ", "").replace('{', "%7B").replace("'", "%22").replace("}", "%7D") # 拼接成最终结果并返回
# 构建请求参数 final_endpoint = BilibiliAPIEndpoints.USER_DYNAMIC + '?' + endpoint
params = {
"dm_cover_img_str": "QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ",
"dm_img_inter": self.inter,
"dm_img_list": [],
"dm_img_str": "V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&features=itemOpusStyle%2ClistOnlyfans%2CopusBigCover%2ConlyfansVote%2CdecorationCard%2CforwardListHidden%2CugcDelete",
"host_mid": uid,
"offset": offset,
"platform": "web",
"timezone_offset": "-480",
"web_location": "333.999",
"wts": self.wts,
"x-bili-device-req-json": "%7B%22platform%22%3A%22web%22%2C%22device%22%3A%22pc%22%7D",
"x-bili-web-req-json": "%7B%22spm_id%22%3A%22333.999%22%7Dea1db124af3c7062474693fa704f4ff8"
}
# 获取wrid
w_rid = await self.wridmanager.get_wrid(params=params)
# 将上面结果拼接成最终结果并返回
final_endpoint = f'https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space?offset={offset}&host_mid={uid}&timezone_offset=-480&platform=web&features=itemOpusStyle,listOnlyfans,opusBigCover,onlyfansVote,decorationCard,forwardListHidden,ugcDelete&web_location=333.999&dm_img_list=[]&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter={new_inter}&x-bili-device-req-json=%7B%22platform%22:%22web%22,%22device%22:%22pc%22%7D&x-bili-web-req-json=%7B%22spm_id%22:%22333.999%22%7D&w_rid={w_rid}&wts={self.wts}'
return final_endpoint return final_endpoint
class WridManager: class WridManager:
@classmethod
def s(self) -> list: async def get_encode_query(cls, params: dict) -> str:
x = random.randint(0, 113) params['wts'] = params['wts'] + "ea1db124af3c7062474693fa704f4ff8"
return [2 * 1488 + 2 * 311 + 3 * x, 4 * 1488 - 311 + x, x]
def d(self) -> list:
x = random.randint(0, 513)
return [x, 2 * x, x]
def get_inter(self) -> dict:
return {"ds": [], "wh": self.s(), "of": self.d()}
async def get_encode_query(self, params: dict) -> str:
params = dict(sorted(params.items())) # 按照 key 重排参数 params = dict(sorted(params.items())) # 按照 key 重排参数
# 过滤 value 中的 "!'()*" 字符 # 过滤 value 中的 "!'()*" 字符
params = { params = {
@ -164,12 +63,17 @@ class WridManager:
query = urlencode(params) # 序列化参数 query = urlencode(params) # 序列化参数
return query return query
async def get_wrid(self, params: dict) -> str: @classmethod
encode_query = await self.get_encode_query(params) async def wrid_model_endpoint(cls, params: dict) -> str:
wts = params["wts"]
encode_query = await cls.get_encode_query(params)
# 获取w_rid参数 # 获取w_rid参数
w_rid = wrid.get_wrid(e=encode_query) w_rid = wrid.get_wrid(e=encode_query)
return w_rid params["wts"] = wts
params["w_rid"] = w_rid
return "&".join(f"{k}={v}" for k, v in params.items())
# BV号转为对应av号
async def bv2av(bv_id: str) -> int: async def bv2av(bv_id: str) -> int:
table = "fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF" table = "fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF"
s = [11, 10, 3, 8, 4, 6, 2, 9, 5, 7] s = [11, 10, 3, 8, 4, 6, 2, 9, 5, 7]
@ -188,7 +92,6 @@ async def bv2av(bv_id:str) -> int:
aid = (r - add) ^ xor aid = (r - add) ^ xor
return aid return aid
# 响应分析 # 响应分析
class ResponseAnalyzer: class ResponseAnalyzer:
# 用户收藏夹信息 # 用户收藏夹信息

View File

@ -6,9 +6,10 @@ import yaml # 配置文件
# 基础爬虫客户端和哔哩哔哩API端点 # 基础爬虫客户端和哔哩哔哩API端点
from crawlers.base_crawler import BaseCrawler from crawlers.base_crawler import BaseCrawler
from crawlers.bilibili.web.endpoints import BilibiliAPIEndpoints from crawlers.bilibili.web.endpoints import BilibiliAPIEndpoints
# 哔哩哔哩工具类 # 哔哩哔哩工具类
from crawlers.bilibili.web.utils import EndpointModels, bv2av, ResponseAnalyzer from crawlers.bilibili.web.utils import EndpointGenerator, bv2av, ResponseAnalyzer
# 数据请求模型
from crawlers.bilibili.web.models import UserPostVideos, UserProfile, ComPopular, UserDynamic, PlayUrl
# 配置文件路径 # 配置文件路径
@ -50,6 +51,22 @@ class BilibiliWebCrawler:
response = await crawler.fetch_get_json(endpoint) response = await crawler.fetch_get_json(endpoint)
return response return response
# 获取视频流地址
async def fetch_video_playurl(self, bv_id: str, cid: str, qn: str = "64") -> dict:
# 获取请求头信息
kwargs = await self.get_bilibili_headers()
# 创建基础爬虫对象
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
async with base_crawler as crawler:
# 通过模型生成基本请求参数
params = PlayUrl(bvid=bv_id, cid=cid, qn=qn)
# 创建请求endpoint
generator = EndpointGenerator(params.dict())
endpoint = await generator.video_playurl_endpoint()
# 发送请求,获取请求响应结果
response = await crawler.fetch_get_json(endpoint)
return response
# 获取用户发布视频作品数据 # 获取用户发布视频作品数据
async def fetch_user_post_videos(self, uid: str, pn: int) -> dict: async def fetch_user_post_videos(self, uid: str, pn: int) -> dict:
""" """
@ -62,8 +79,11 @@ class BilibiliWebCrawler:
# 创建基础爬虫对象 # 创建基础爬虫对象
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
async with base_crawler as crawler: async with base_crawler as crawler:
# 通过模型生成基本请求参数
params = UserPostVideos(mid=uid, pn=pn)
# 创建请求endpoint # 创建请求endpoint
endpoint = await EndpointModels().user_post_videos_endpoint(uid=uid, pn=pn) generator = EndpointGenerator(params.dict())
endpoint = await generator.user_post_videos_endpoint()
# 发送请求,获取请求响应结果 # 发送请求,获取请求响应结果
response = await crawler.fetch_get_json(endpoint) response = await crawler.fetch_get_json(endpoint)
return response return response
@ -107,9 +127,13 @@ class BilibiliWebCrawler:
# 创建基础爬虫对象 # 创建基础爬虫对象
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
async with base_crawler as crawler: async with base_crawler as crawler:
# 通过模型生成基本请求参数
params = UserProfile(mid=uid)
# 创建请求endpoint # 创建请求endpoint
endpoint = await EndpointModels().user_profile_endpoint(uid=uid) generator = EndpointGenerator(params.dict())
response = await crawler.fetch_get_json(endpoint=endpoint) endpoint = await generator.user_profile_endpoint()
# 发送请求,获取请求响应结果
response = await crawler.fetch_get_json(endpoint)
return response return response
# 获取综合热门视频信息 # 获取综合热门视频信息
@ -119,9 +143,13 @@ class BilibiliWebCrawler:
# 创建基础爬虫对象 # 创建基础爬虫对象
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
async with base_crawler as crawler: async with base_crawler as crawler:
# 通过模型生成基本请求参数
params = ComPopular(pn=pn)
# 创建请求endpoint # 创建请求endpoint
endpoint = await EndpointModels().com_popular_endpoint(pn=pn) generator = EndpointGenerator(params.dict())
response = await crawler.fetch_get_json(endpoint=endpoint) endpoint = await generator.com_popular_endpoint()
# 发送请求,获取请求响应结果
response = await crawler.fetch_get_json(endpoint)
return response return response
# 获取指定视频的评论 # 获取指定视频的评论
@ -165,12 +193,29 @@ class BilibiliWebCrawler:
# 创建基础爬虫对象 # 创建基础爬虫对象
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
async with base_crawler as crawler: async with base_crawler as crawler:
# 通过模型生成基本请求参数
params = UserDynamic(host_mid=uid, offset=offset)
# 创建请求endpoint # 创建请求endpoint
endpoint = await EndpointModels().user_dynamic_endpoint(uid=uid, offset=offset) generator = EndpointGenerator(params.dict())
endpoint = await generator.user_dynamic_endpoint()
print(endpoint)
# 发送请求,获取请求响应结果 # 发送请求,获取请求响应结果
response = await crawler.fetch_get_json(endpoint) response = await crawler.fetch_get_json(endpoint)
return response return response
# 获取视频实时弹幕
async def fetch_video_danmaku(self, cid: str):
# 获取请求头信息
kwargs = await self.get_bilibili_headers()
# 创建基础爬虫对象
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
async with base_crawler as crawler:
# 创建请求endpoint
endpoint = f"https://comment.bilibili.com/{cid}.xml"
# 发送请求,获取请求响应结果
response = await crawler.fetch_response(endpoint)
return response.text
# 获取指定直播间信息 # 获取指定直播间信息
async def fetch_live_room_detail(self, room_id: str) -> dict: async def fetch_live_room_detail(self, room_id: str) -> dict:
# 获取请求头信息 # 获取请求头信息
@ -185,24 +230,50 @@ class BilibiliWebCrawler:
return response return response
# 获取指定直播间视频流 # 获取指定直播间视频流
# async def fetch_live_videos(self, room_id: str) -> dict: async def fetch_live_videos(self, room_id: str) -> dict:
# # 获取请求头信息 # 获取请求头信息
# kwargs = await self.get_bilibili_headers() kwargs = await self.get_bilibili_headers()
# # 创建基础爬虫对象 # 创建基础爬虫对象
# base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"]) base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
# async with base_crawler as crawler: async with base_crawler as crawler:
# # 创建请求endpoint # 创建请求endpoint
# endpoint = f"{BilibiliAPIEndpoints.LIVE_VIDEOS}?cid={room_id}&quality=4" endpoint = f"{BilibiliAPIEndpoints.LIVE_VIDEOS}?cid={room_id}&quality=4"
# # 发送请求,获取请求响应结果 # 发送请求,获取请求响应结果
# response = await crawler.fetch_get_json(endpoint) response = await crawler.fetch_get_json(endpoint)
# return response return response
# 获取指定分区正在直播的主播
async def fetch_live_streamers(self, area_id: str, pn: int):
# 获取请求头信息
kwargs = await self.get_bilibili_headers()
# 创建基础爬虫对象
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
async with base_crawler as crawler:
# 创建请求endpoint
endpoint = f"{BilibiliAPIEndpoints.LIVE_STREAMER}?platform=web&parent_area_id={area_id}&page={pn}"
# 发送请求,获取请求响应结果
response = await crawler.fetch_get_json(endpoint)
return response
"-------------------------------------------------------utils接口列表-------------------------------------------------------" "-------------------------------------------------------utils接口列表-------------------------------------------------------"
# 通过bv号获得视频aid号 # 通过bv号获得视频aid号
async def get_aid(self, bv_id: str) -> int: async def bv_to_aid(self, bv_id: str) -> int:
aid = await bv2av(bv_id=bv_id) aid = await bv2av(bv_id=bv_id)
return aid return aid
# 通过bv号获得视频分p信息
async def fetch_video_parts(self, bv_id: str) -> str:
# 获取请求头信息
kwargs = await self.get_bilibili_headers()
# 创建基础爬虫对象
base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
async with base_crawler as crawler:
# 创建请求endpoint
endpoint = f"{BilibiliAPIEndpoints.VIDEO_PARTS}?bvid={bv_id}"
# 发送请求,获取请求响应结果
response = await crawler.fetch_get_json(endpoint)
return response
# 获取所有直播分区列表 # 获取所有直播分区列表
async def fetch_all_live_areas(self) -> dict: async def fetch_all_live_areas(self) -> dict:
# 获取请求头信息 # 获取请求头信息
@ -216,12 +287,6 @@ class BilibiliWebCrawler:
response = await crawler.fetch_get_json(endpoint) response = await crawler.fetch_get_json(endpoint)
return response return response
# 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数)
# (仅示例 不同接口所需要传进去的参数不同)(待改进)
async def uid_to_wrid(self, uid: str) -> dict:
result = await EndpointModels().get_wrid_wts_by_uid(uid=uid)
return result
"-------------------------------------------------------main-------------------------------------------------------" "-------------------------------------------------------main-------------------------------------------------------"
async def main(self): async def main(self):
@ -231,8 +296,14 @@ class BilibiliWebCrawler:
# result = await self.fetch_one_video(bv_id=bv_id) # result = await self.fetch_one_video(bv_id=bv_id)
# print(result) # print(result)
# 获取视频流地址
# bv_id = 'BV1y7411Q7Eq'
# cid = '171776208'
# result = await self.fetch_video_playurl(bv_id=bv_id, cid=cid)
# print(result)
# 获取用户发布作品数据 # 获取用户发布作品数据
# uid = '178360345' # uid = '94510621'
# pn = 1 # pn = 1
# result = await self.fetch_user_post_videos(uid=uid, pn=pn) # result = await self.fetch_user_post_videos(uid=uid, pn=pn)
# print(result) # print(result)
@ -273,35 +344,46 @@ class BilibiliWebCrawler:
# 获取指定用户动态 # 获取指定用户动态
# uid = "16015678" # uid = "16015678"
# offset = "953154282154098691" # 翻页索引,为空即从最新动态开始,可从获得到的动态数据里面获得 # offset = "" # 翻页索引,为空即从最新动态开始
# result = await self.fetch_user_dynamic(uid=uid, offset=offset) # result = await self.fetch_user_dynamic(uid=uid, offset=offset)
# print(result) # print(result)
# 获取视频实时弹幕
# cid = "1639235405"
# result = await self.fetch_video_danmaku(cid=cid)
# print(result)
# 获取指定直播间信息 # 获取指定直播间信息
# room_id = "22816111" # room_id = "1815229528"
# result = await self.fetch_live_room_detail(room_id=room_id) # result = await self.fetch_live_room_detail(room_id=room_id)
# print(result) # print(result)
# 获取直播间视频流 # 获取直播间视频流
# room_id = "22816111" # room_id = "1815229528"
# result = await self.fetch_user_live_videos_by_room_id(room_id=room_id) # result = await self.fetch_live_videos(room_id=room_id)
# print(result) # print(result)
# 获取指定分区正在直播的主播
pn = 1
area_id = '9'
result = await self.fetch_live_streamers(area_id=area_id, pn=pn)
print(result)
"-------------------------------------------------------utils接口列表-------------------------------------------------------" "-------------------------------------------------------utils接口列表-------------------------------------------------------"
# 通过bv号获得视频aid号 # 通过bv号获得视频aid号
# bv_id = 'BV1M1421t7hT' # bv_id = 'BV1M1421t7hT'
# aid = await self.get_aid(bv_id=bv_id) # aid = await self.get_aid(bv_id=bv_id)
# print(aid) # print(aid)
# 通过bv号获得视频分p信息
# bv_id = "BV1vf421i7hV"
# result = await self.fetch_video_parts(bv_id=bv_id)
# print(result)
# 获取所有直播分区列表 # 获取所有直播分区列表
# result = await self.fetch_all_live_areas() # result = await self.fetch_all_live_areas()
# print(result) # print(result)
# 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数)
# (仅示例 不同接口所需要传进去的参数不同)(待改进)
# uid = '178360345'
# w_rid = await self.uid_to_wrid(uid=uid)
# print(w_rid)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -184,21 +184,3 @@ def get_wrid(e):
n = None n = None
i = twords_to_bytes(o(e, n)) i = twords_to_bytes(o(e, n))
return tbytes_to_hex(i) return tbytes_to_hex(i)
# def test():
# e = "dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter=%7B%22ds%22%3A%5B%5D%2C%22wh%22%3A%5B3697%2C5674%2C33%5D%2C%22of%22%3A%5B222%2C444%2C222%5D%7D&dm_img_list=%5B%5D&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&mid=3546666038725258&platform=web&token=&web_location=1550101&wts=1723867512ea1db124af3c7062474693fa704f4ff8"
# n = None
# x = o(e, n)
# i = twords_to_bytes(x)
# return tbytes_to_hex(i)
# if __name__ == '__main__':
# # test()
# encode_query = "dm_cover_img_str=QU5HTEUgKE5WSURJQSwgTlZJRElBIEdlRm9yY2UgUlRYIDMwNTAgTGFwdG9wIEdQVSAoMHgwMDAwMjVBMikgRGlyZWN0M0QxMSB2c181XzAgcHNfNV8wLCBEM0QxMSlHb29nbGUgSW5jLiAoTlZJRElBKQ&dm_img_inter=%7B%22ds%22%3A%5B%5D%2C%22wh%22%3A%5B3697%2C5674%2C33%5D%2C%22of%22%3A%5B222%2C444%2C222%5D%7D&dm_img_list=%5B%5D&dm_img_str=V2ViR0wgMS4wIChPcGVuR0wgRVMgMi4wIENocm9taXVtKQ&mid=3546666038725258&platform=web&token=&web_location=1550101&wts=1723867512ea1db124af3c7062474693fa704f4ff8"
# wrid1 = main(encode_query)
# print(wrid1)
#
# js1 = open('./wrid.js', 'r', encoding='utf-8').read()
# wrid2 = execjs.compile(js1).call('main', encode_query)
# print(wrid2)