mirror of
https://github.com/ylw00/qxVm.git
synced 2025-04-22 14:09:44 +08:00
108 lines
3.7 KiB
Python
108 lines
3.7 KiB
Python
import requests, re
|
||
from urllib.parse import unquote
|
||
|
||
|
||
import execjs
|
||
|
||
|
||
def get_index(url):
|
||
# 请求列表页,获取seed、js-filename、ts
|
||
headers = {
|
||
'authority': 'www.zhipin.com',
|
||
'upgrade-insecure-requests': '1',
|
||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36',
|
||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||
'sec-fetch-site': 'none',
|
||
'sec-fetch-mode': 'navigate',
|
||
'sec-fetch-user': '?1',
|
||
'sec-fetch-dest': 'document',
|
||
'accept-language': 'zh-CN,zh;q=0.9',
|
||
}
|
||
res = requests.get(url, headers=headers, allow_redirects=False)
|
||
print('===', res.text, '====')
|
||
location = res.headers['location']
|
||
seed = unquote(re.findall(r'seed=(.*?)&', location)[0])
|
||
name = re.findall(r'name=(.*?)&', location)[0]
|
||
ts = re.findall(r'ts=(.*?)&', location)[0]
|
||
print(res.headers)
|
||
print("seed:", seed)
|
||
print("js-filename:", name)
|
||
print("ts:", ts)
|
||
return seed, name, ts
|
||
|
||
|
||
def get_code(name):
|
||
# 请求js-filename,获取动态代码
|
||
headers = {
|
||
'authority': 'www.zhipin.com',
|
||
'cache-control': 'max-age=0',
|
||
'upgrade-insecure-requests': '1',
|
||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36',
|
||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||
'sec-fetch-site': 'none',
|
||
'sec-fetch-mode': 'navigate',
|
||
'sec-fetch-user': '?1',
|
||
'sec-fetch-dest': 'document',
|
||
'accept-language': 'zh-CN,zh;q=0.9',
|
||
# 'if-modified-since': 'Tue, 26 May 2020 11:27:36 GMT',
|
||
}
|
||
url = 'https://www.zhipin.com/web/common/security-js/%s.js' % name
|
||
response = requests.get(url, headers=headers)
|
||
print("code.length:", len(response.text))
|
||
return response.text
|
||
|
||
|
||
def get_cookie(seed, ts, code):
|
||
# 传入seed、ts、code代码,获取zp_stoken
|
||
data = {
|
||
"seed": seed,
|
||
"ts": ts,
|
||
"js_code": code + """
|
||
function get_form(m, t) {
|
||
let o = window.ABC;
|
||
return (new o).z(m, t);
|
||
}"""
|
||
}
|
||
# print(data)
|
||
res = requests.post('http://127.0.0.1:3300/boss', json=data).json()
|
||
zp_stoken = res["zptokn"]
|
||
|
||
# zp_stoken = ctx.call("get_cookie", seed, ts, code)["cookie"]
|
||
|
||
print("zp_stoken:", zp_stoken)
|
||
return zp_stoken
|
||
|
||
|
||
def main(url):
|
||
# 主要运行逻辑
|
||
# 1、请求列表页,获取参数
|
||
# 2、获取js代码
|
||
# 3、获取cookie
|
||
# 4、携带cookie重新请求列表页,获取数据
|
||
seed, name, ts = get_index(url)
|
||
print(seed, name, ts)
|
||
code = get_code(name)
|
||
zp_stoken = get_cookie(seed, ts, code)
|
||
headers = {
|
||
'authority': 'www.zhipin.com',
|
||
'upgrade-insecure-requests': '1',
|
||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36',
|
||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||
'sec-fetch-site': 'same-origin',
|
||
'sec-fetch-mode': 'navigate',
|
||
'sec-fetch-dest': 'document',
|
||
'accept-language': 'zh-CN,zh;q=0.9',
|
||
"cookie": '__zp_stoken__=' + zp_stoken
|
||
}
|
||
res = requests.get(url, headers=headers)
|
||
print(res.text)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
with open("boss.js", "r", encoding="utf-8") as f:
|
||
ctx = execjs.compile(f.read())
|
||
|
||
# 列表页url
|
||
url = 'https://www.zhipin.com/job_detail/?query=java&city=101280600&industry=&position='
|
||
main(url)
|