猿人学第2题-headers请求顺序-sessionid

This commit is contained in:
luzhisheng 2022-04-06 14:24:53 +08:00
parent 2299bf9e5e
commit 41b850ad7f
3 changed files with 133 additions and 0 deletions

View File

@ -0,0 +1,48 @@
## 解体思路
有些网站会验证提交的参数值且是同键名不同值的字段这个就是针对python爬虫的反制因为python的字典里默认是不能出现同键名不同值的想到这里我突然想到headers有的网站会验证顺序也就是有序的字典因为python的字典默认也是无序的不过不知道从哪个版本python3开始python的字典也开始有点顺序了我是记得requests里给的headers=headers参数时requests会自动的对headers字段做一定的排序处理
测试代码
import requests
class Headers(object):
def items(self):
return (
('content-length', '0'),
('pragma', 'no-cache'),
('cache-control', 'no-cache'),
('sec-ch-ua', '"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"'),
('sec-ch-ua-mobile', '?0'),
('user-agent',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'),
('sec-ch-ua-platform', '"macOS"'),
('accept', '*/*'),
('origin', 'https://match.yuanrenxue.com'),
('sec-fetch-site', 'same-origin'),
('sec-fetch-mode', 'cors'),
('sec-fetch-dest', 'empty'),
('referer', 'https://match.yuanrenxue.com/match/3'),
('accept-encoding', 'gzip, deflate, br'),
('accept-language', 'zh-CN,zh;q=0.9'),
('cookie', 'Hm_lvt_c99546cf032aaa5a679230de9a95c7db=1648698333,1648863299'),
('cookie', 'Hm_lvt_9bcbda9cbf86757998a2339a0437208e=1648718340,1648863297'),
('cookie', 'no-alert3=true'),
('cookie', 'sessionid=6k0qhqvkp2jwtcph63e3k6ft7nwbl8ov'),
('cookie', 'm=155'),
('cookie', 'tk=9019357195599414472'),
('cookie', 'Hm_lpvt_9bcbda9cbf86757998a2339a0437208e=1649223546'),
('cookie', 'Hm_lpvt_c99546cf032aaa5a679230de9a95c7db=1649224147'),
)
req = requests.post('https://match.yuanrenxue.com/jssm', headers=Headers(), verify=False)
print(req.cookies)
print(req.headers)
输出结果
{'Server': 'nginx', 'Date': 'Wed, 06 Apr 2022 05:55:00 GMT', 'Content-Type': 'image/jpg', 'Content-Length': '0', 'Connection': 'keep-alive', 'Vary': 'Cookie', 'Set-Cookie': 'sessionid=663n3wmn8sdhth9jt101kx3v14x82zaq; expires=Wed, 06 Apr 2022 11:54:59 GMT; HttpOnly; Max-Age=21600; Path=/; SameSite=Lax'}
最后提是一下,用 postman 测试请求也会存在此类问题

View File

@ -0,0 +1,36 @@
import requests
class Headers(object):
def items(self):
return (
('content-length', '0'),
('pragma', 'no-cache'),
('cache-control', 'no-cache'),
('sec-ch-ua', '"Google Chrome";v="93", " Not;A Brand";v="99", "Chromium";v="93"'),
('sec-ch-ua-mobile', '?0'),
('user-agent',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'),
('sec-ch-ua-platform', '"macOS"'),
('accept', '*/*'),
('origin', 'https://match.yuanrenxue.com'),
('sec-fetch-site', 'same-origin'),
('sec-fetch-mode', 'cors'),
('sec-fetch-dest', 'empty'),
('referer', 'https://match.yuanrenxue.com/match/3'),
('accept-encoding', 'gzip, deflate, br'),
('accept-language', 'zh-CN,zh;q=0.9'),
('cookie', 'Hm_lvt_c99546cf032aaa5a679230de9a95c7db=1648698333,1648863299'),
('cookie', 'Hm_lvt_9bcbda9cbf86757998a2339a0437208e=1648718340,1648863297'),
('cookie', 'no-alert3=true'),
('cookie', 'sessionid=6k0qhqvkp2jwtcph63e3k6ft7nwbl8ov'),
('cookie', 'm=155'),
('cookie', 'tk=9019357195599414472'),
('cookie', 'Hm_lpvt_9bcbda9cbf86757998a2339a0437208e=1649223546'),
('cookie', 'Hm_lpvt_c99546cf032aaa5a679230de9a95c7db=1649224147'),
)
req = requests.post('https://match.yuanrenxue.com/jssm', headers=Headers())
print(req.headers)

View File

@ -0,0 +1,49 @@
import requests
def get_cookie(page):
url = 'https://match.yuanrenxue.com/jssm'
headers = {
'Host': 'match.yuanrenxue.com',
'Connection': 'keep-alive',
'Content-Length': '0',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '"Google Chrome";v="95", "Chromium";v="95", ";Not A Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/95.0.4638.10 Safari/537.36',
'sec-ch-ua-platform': '"Windows"',
'Accept': '*/*',
'Origin': 'https://match.yuanrenxue.com',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://match.yuanrenxue.com/match/3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
session = requests.Session()
session.headers = headers
cookies = {'Cookie': 'sessionid=;'}
response = session.post(url, verify=False, cookies=cookies)
sessionid = requests.utils.dict_from_cookiejar(response.cookies)
print(sessionid)
if page == 4 or page == 5:
headers['user-agent'] = 'yuanrenxue.project'
url_api = 'http://match.yuanrenxue.com/api/match/3?page={page}'.format(page=page)
res = session.get(url=url_api).json()
for i in res['data']:
data = i['value']
data_list.append(data)
if __name__ == '__main__':
data_list = []
for i in range(1, 6):
get_cookie(i)
print('======================')
print('出現最多的值:', max(data_list, key=data_list.count))
print('======================')