Douyin_TikTok_Download_API/crawlers/bilibili/web/web_crawler.py

import asyncio  # 异步I/O
import os  # 系统操作
import time  # 时间操作
import yaml  # 配置文件

# 基础爬虫客户端和哔哩哔哩API端点
from crawlers.base_crawler import BaseCrawler
from crawlers.bilibili.web.endpoints import BilibiliAPIEndpoints

# 哔哩哔哩工具类
from crawlers.bilibili.web.utils import EndpointModels, bv2av, ResponseAnalyzer


# 配置文件路径
path = os.path.abspath(os.path.dirname(__file__))

# 读取配置文件
with open(f"{path}/config.yaml", "r", encoding="utf-8") as f:
    config = yaml.safe_load(f)


class BilibiliWebCrawler:

    # 从配置文件读取哔哩哔哩请求头
    async def get_bilibili_headers(self):
        bili_config = config['TokenManager']['bilibili']
        kwargs = {
            "headers": {
                    "accept-language": bili_config["headers"]["accept-language"],
                    "origin": bili_config["headers"]["origin"],
                    "referer": bili_config["headers"]["referer"],
                    "user-agent": bili_config["headers"]["user-agent"],
                    "cookie": bili_config["headers"]["cookie"],
            },
            "proxies": {"http://": bili_config["proxies"]["http"], "https://": bili_config["proxies"]["https"]},
        }
        return kwargs

    "-------------------------------------------------------handler接口列表-------------------------------------------------------"
    # 获取单个视频详情信息
    async def fetch_one_video(self, bv_id: str) -> dict:
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = f"{BilibiliAPIEndpoints.POST_DETAIL}?bvid={bv_id}"
            # 发送请求，获取请求响应结果
            response = await crawler.fetch_get_json(endpoint)
        return response

    # 获取用户发布视频作品数据
    async def fetch_user_post_videos(self, uid: str, pn: int) -> dict:
        """
        :param uid: 用户uid
        :param pn: 页码
        :return:
        """
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = await EndpointModels().user_post_videos_endpoint(uid=uid, pn=pn)
            # 发送请求，获取请求响应结果
            response = await crawler.fetch_get_json(endpoint)
        return response

    # 获取用户所有收藏夹信息
    async def fetch_collect_folders(self, uid: str) -> dict:
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = f"{BilibiliAPIEndpoints.COLLECT_FOLDERS}?up_mid={uid}"
            # 发送请求，获取请求响应结果
            response = await crawler.fetch_get_json(endpoint)
        # 分析响应结果
        result_dict = await ResponseAnalyzer.collect_folders_analyze(response=response)
        return result_dict

    # 获取指定收藏夹内视频数据
    async def fetch_folder_videos(self, folder_id: str, pn: int) -> dict:
        """
        :param folder_id: 收藏夹id-- 可从<获取用户所有收藏夹信息>获得
        :param pn: 页码
        :return:
        """
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        # 发送请求，获取请求响应结果
        async with base_crawler as crawler:
                endpoint = f"{BilibiliAPIEndpoints.COLLECT_VIDEOS}?media_id={folder_id}&pn={pn}&ps=20&keyword=&order=mtime&type=0&tid=0&platform=web"
                response = await crawler.fetch_get_json(endpoint)
        return response

    # 获取指定用户的信息
    async def fetch_user_profile(self, uid: str) -> dict:
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = await EndpointModels().user_profile_endpoint(uid=uid)
            response = await crawler.fetch_get_json(endpoint=endpoint)
        return response

    # 获取综合热门视频信息
    async def fetch_com_popular(self, pn: int) -> dict:
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = await EndpointModels().com_popular_endpoint(pn=pn)
            response = await crawler.fetch_get_json(endpoint=endpoint)
        return response

    # 获取指定视频的评论
    async def fetch_video_comments(self, bv_id: str, pn: int) -> dict:
        # 评论排序 -- 1:按点赞数排序. 0:按时间顺序排序
        sort = 1
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = f"{BilibiliAPIEndpoints.VIDEO_COMMENTS}?type=1&oid={bv_id}&sort={sort}&nohot=0&ps=20&pn={pn}"
            # 发送请求，获取请求响应结果
            response = await crawler.fetch_get_json(endpoint)
        return response

    # 获取视频下指定评论的回复
    async def fetch_comment_reply(self, bv_id: str, pn: int, rpid: str) -> dict:
        """
        :param bv_id: 目标视频bv号
        :param pn: 页码
        :param rpid: 目标评论id，可通过fetch_video_comments获得
        :return:
        """
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = f"{BilibiliAPIEndpoints.COMMENT_REPLY}?type=1&oid={bv_id}&root={rpid}&&ps=20&pn={pn}"
            # 发送请求，获取请求响应结果
            response = await crawler.fetch_get_json(endpoint)
            return response

    # 获取指定用户动态
    async def fetch_user_dynamic(self, uid: str, offset: str) -> dict:
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = await EndpointModels().user_dynamic_endpoint(uid=uid, offset=offset)
            # 发送请求，获取请求响应结果
            response = await crawler.fetch_get_json(endpoint)
        return response

    # 获取指定直播间信息
    async def fetch_live_room_detail(self, room_id: str) -> dict:
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = f"{BilibiliAPIEndpoints.LIVEROOM_DETAIL}?room_id={room_id}"
            # 发送请求，获取请求响应结果
            response = await crawler.fetch_get_json(endpoint)
        return response

    # 获取指定直播间视频流
    # async def fetch_live_videos(self, room_id: str) -> dict:
    #     # 获取请求头信息
    #     kwargs = await self.get_bilibili_headers()
    #     # 创建基础爬虫对象
    #     base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
    #     async with base_crawler as crawler:
    #         # 创建请求endpoint
    #         endpoint = f"{BilibiliAPIEndpoints.LIVE_VIDEOS}?cid={room_id}&quality=4"
    #         # 发送请求，获取请求响应结果
    #         response = await crawler.fetch_get_json(endpoint)
    #     return response

    "-------------------------------------------------------utils接口列表-------------------------------------------------------"
    # 通过bv号获得视频aid号
    async def get_aid(self, bv_id: str) -> int:
        aid = await bv2av(bv_id=bv_id)
        return aid

    # 获取所有直播分区列表
    async def fetch_all_live_areas(self) -> dict:
        # 获取请求头信息
        kwargs = await self.get_bilibili_headers()
        # 创建基础爬虫对象
        base_crawler = BaseCrawler(proxies=kwargs["proxies"], crawler_headers=kwargs["headers"])
        async with base_crawler as crawler:
            # 创建请求endpoint
            endpoint = BilibiliAPIEndpoints.LIVE_AREAS
            # 发送请求，获取请求响应结果
            response = await crawler.fetch_get_json(endpoint)
        return response

    # 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数)
    # (仅示例 不同接口所需要传进去的参数不同)(待改进)
    async def uid_to_wrid(self, uid: str) -> dict:
        result = await EndpointModels().get_wrid_wts_by_uid(uid=uid)
        return result

    "-------------------------------------------------------main-------------------------------------------------------"
    async def main(self):

        "-------------------------------------------------------handler接口列表-------------------------------------------------------"
        # 获取单个作品数据
        # bv_id = 'BV1M1421t7hT'
        # result = await self.fetch_one_video(bv_id=bv_id)
        # print(result)

        # 获取用户发布作品数据
        # uid = '178360345'
        # pn = 1
        # result = await self.fetch_user_post_videos(uid=uid, pn=pn)
        # print(result)

        # 获取用户所有收藏夹信息
        # uid = '178360345'
        # reslut = await self.fetch_collect_folders(uid=uid)
        # print(reslut)

        # 获取用户指定收藏夹内视频数据
        # folder_id = '1756059545'  # 收藏夹id，可从<获取用户所有收藏夹信息>获得
        # pn = 1
        # result = await self.fetch_folder_videos(folder_id=folder_id, pn=pn)
        # print(result)

        # 获取指定用户的信息
        # uid = '178360345'
        # result = await self.fetch_user_profile(uid=uid)
        # print(result)

        # 获取综合热门信息
        # pn = 1  # 页码
        # result = await self.fetch_com_popular(pn=pn)
        # print(result)

        # 获取指定视频的评论(不登录只能获取一页的评论)
        # bv_id = "BV1M1421t7hT"
        # pn = 1
        # result = await self.fetch_video_comments(bv_id=bv_id, pn=pn)
        # print(result)

        # 获取视频下指定评论的回复(不登录只能获取一页的评论)
        # bv_id = "BV1M1421t7hT"
        # rpid = "237109455120"
        # pn = 1
        # result = await self.fetch_comment_reply(bv_id=bv_id, pn=pn, rpid=rpid)
        # print(result)

        # 获取指定用户动态
        # uid = "16015678"
        # offset = "953154282154098691"     # 翻页索引，为空即从最新动态开始，可从获得到的动态数据里面获得
        # result = await self.fetch_user_dynamic(uid=uid, offset=offset)
        # print(result)

        # 获取指定直播间信息
        # room_id = "22816111"
        # result = await self.fetch_live_room_detail(room_id=room_id)
        # print(result)

        # 获取直播间视频流
        # room_id = "22816111"
        # result = await self.fetch_user_live_videos_by_room_id(room_id=room_id)
        # print(result)

        "-------------------------------------------------------utils接口列表-------------------------------------------------------"
        # 通过bv号获得视频aid号
        # bv_id = 'BV1M1421t7hT'
        # aid = await self.get_aid(bv_id=bv_id)
        # print(aid)

        # 获取所有直播分区列表
        # result = await self.fetch_all_live_areas()
        # print(result)

        # 根据uid生成wts及其对应w_rid参数(包含dm_img_inter参数)
        # (仅示例 不同接口所需要传进去的参数不同)(待改进)
        # uid = '178360345'
        # w_rid = await self.uid_to_wrid(uid=uid)
        # print(w_rid)


if __name__ == '__main__':
    # 初始化
    BilibiliWebCrawler = BilibiliWebCrawler()

    # 开始时间
    start = time.time()

    asyncio.run(BilibiliWebCrawler.main())

    # 结束时间
    end = time.time()
    print(f"耗时：{end - start}")