From 2644e6d2f3caac784d843119e32844c9f4e76c6f Mon Sep 17 00:00:00 2001 From: luzhisheng Date: Mon, 30 Jan 2023 02:33:20 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=B7=E6=B1=82=E8=A7=84=E5=BE=8B=E6=A3=80?= =?UTF-8?q?=E6=B5=8B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../10骚操作请求规律检测2-headers/__init__.py | 0 .../10骚操作请求规律检测2-headers/main.py | 46 +++++++++++++++++++ .../10骚操作请求规律检测2-headers/readme.md | 37 +++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 猿人学练习/10骚操作请求规律检测2-headers/__init__.py create mode 100644 猿人学练习/10骚操作请求规律检测2-headers/main.py create mode 100644 猿人学练习/10骚操作请求规律检测2-headers/readme.md diff --git a/猿人学练习/10骚操作请求规律检测2-headers/__init__.py b/猿人学练习/10骚操作请求规律检测2-headers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/猿人学练习/10骚操作请求规律检测2-headers/main.py b/猿人学练习/10骚操作请求规律检测2-headers/main.py new file mode 100644 index 0000000..7c51c6a --- /dev/null +++ b/猿人学练习/10骚操作请求规律检测2-headers/main.py @@ -0,0 +1,46 @@ +import requests + + +def challenge10(page): + url = "https://www.python-spider.com/api/challenge10" + payload = f"page={page}" + session = requests.session() + headers = { + 'content-length': '6', + 'pragma': 'no-cache', + 'cache-control': 'no-cache', + 'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"', + 'accept': 'application/json, text/javascript, */*; q=0.01', + 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'x-requested-with': 'XMLHttpRequest', + 'sec-ch-ua-mobile': '?0', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', + 'sec-ch-ua-platform': '"Windows"', + 'origin': 'https://www.python-spider.com', + 'sec-fetch-site': 'same-origin', + 'sec-fetch-mode': 'cors', + 'sec-fetch-dest': 'empty', + 'referer': 'https://www.python-spider.com/challenge/10', + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'cookie': '' + } + session.headers = headers + response = session.request("POST", url, data=payload) + return response.json() + + +def run(): + data_num = 0 + for page in range(1, 101): + res_dict = challenge10(page) + data_list = res_dict.get('data') + print(data_list) + for data in data_list: + data_num += int(data.get('value')) + print(data_num) + print(data_num) + + +if __name__ == '__main__': + run() diff --git a/猿人学练习/10骚操作请求规律检测2-headers/readme.md b/猿人学练习/10骚操作请求规律检测2-headers/readme.md new file mode 100644 index 0000000..8701732 --- /dev/null +++ b/猿人学练习/10骚操作请求规律检测2-headers/readme.md @@ -0,0 +1,37 @@ +# 知识点:请求头的顺序导致请求失败 + +## 解题思路 + +浏览器可以正常返回数据,但是postman请求返回js乱码 + +![请求](./img/1.png) + +1.浏览器header顺序是显示的顺序,不是请求发送的header顺序; + +2.这里要通过Charles拦截请求地址,找到正确的header顺序; + +3.由于python字典的特性,在使用requests包发起请求时候要用 requests.session() 去固定 headers 字典的顺序 + + session = requests.session() + headers = { + 'content-length': '6', + 'pragma': 'no-cache', + 'cache-control': 'no-cache', + 'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"', + 'accept': 'application/json, text/javascript, */*; q=0.01', + 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'x-requested-with': 'XMLHttpRequest', + 'sec-ch-ua-mobile': '?0', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', + 'sec-ch-ua-platform': '"Windows"', + 'origin': 'https://www.python-spider.com', + 'sec-fetch-site': 'same-origin', + 'sec-fetch-mode': 'cors', + 'sec-fetch-dest': 'empty', + 'referer': 'https://www.python-spider.com/challenge/10', + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'cookie': '' + } + session.headers = headers + response = session.request("POST", url, data=payload)