mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-22 02:05:24 +08:00
代码更新
This commit is contained in:
parent
21536e5c24
commit
b58ade53d9
@ -46,11 +46,11 @@ class extractor(Baes):
|
|||||||
offerUnit = globalData.get('tempModel').get('offerUnit')
|
offerUnit = globalData.get('tempModel').get('offerUnit')
|
||||||
images = globalData.get('images')
|
images = globalData.get('images')
|
||||||
|
|
||||||
for image in images:
|
# for image in images:
|
||||||
fullPathImageURI = image.get('fullPathImageURI')
|
# fullPathImageURI = image.get('fullPathImageURI')
|
||||||
download_img(fullPathImageURI, offerId)
|
# download_img(fullPathImageURI, offerId)
|
||||||
print(f"【{datetime.now()}】图片下载{fullPathImageURI}")
|
# print(f"【{datetime.now()}】图片下载{fullPathImageURI}")
|
||||||
time.sleep(1)
|
# time.sleep(1)
|
||||||
|
|
||||||
a_590893001984 = data.get('590893001984')
|
a_590893001984 = data.get('590893001984')
|
||||||
if not a_590893001984:
|
if not a_590893001984:
|
||||||
@ -72,8 +72,9 @@ class extractor(Baes):
|
|||||||
detailUrl = globalData.get('detailModel').get('detailUrl')
|
detailUrl = globalData.get('detailModel').get('detailUrl')
|
||||||
|
|
||||||
item = {
|
item = {
|
||||||
|
"sign": self.generate_sign("https://detail.1688.com/offer/{}.html".format(offerId)),
|
||||||
"company_name": companyName,
|
"company_name": companyName,
|
||||||
"company_name_url": "https://detail.1688.com/offer/{}.html".format(offerId),
|
"url": "https://detail.1688.com/offer/{}.html".format(offerId),
|
||||||
"title": title,
|
"title": title,
|
||||||
"sub_categorys": sub_categorys,
|
"sub_categorys": sub_categorys,
|
||||||
"sub_colour_categorys": sub_colour_categorys,
|
"sub_colour_categorys": sub_colour_categorys,
|
||||||
@ -86,8 +87,7 @@ class extractor(Baes):
|
|||||||
"unit_weight": unitWeight
|
"unit_weight": unitWeight
|
||||||
}
|
}
|
||||||
print(json.dumps(item))
|
print(json.dumps(item))
|
||||||
exit()
|
self.col.insert_item('CLEAN_CONTENT', item)
|
||||||
|
|
||||||
print(f"【{datetime.now()}】解析{offerId}")
|
print(f"【{datetime.now()}】解析{offerId}")
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ import os
|
|||||||
|
|
||||||
|
|
||||||
MONGODB_CONF = {
|
MONGODB_CONF = {
|
||||||
'host': '192.168.5.151',
|
'host': '127.0.0.1',
|
||||||
'port': 27017,
|
'port': 27017,
|
||||||
'username': '',
|
'username': '',
|
||||||
'pwd': "",
|
'pwd': "",
|
||||||
|
@ -10,15 +10,14 @@ class Film(Baes):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.col = MongoDao()
|
self.col = MongoDao()
|
||||||
self.url = "https://dearbei.1688.com/page/offerlist.htm?spm=a2615.7691456.autotrace-paginator.2&pageNum={}"
|
self.url = "https://shop1456245592469.1688.com/page/offerlist.htm?spm=a2615.7691456.autotrace-paginator." \
|
||||||
|
"4.79f525026COu37&pageNum={}"
|
||||||
super(Film, self).__init__()
|
super(Film, self).__init__()
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
for i in range(15, 24):
|
for i in range(1, 33):
|
||||||
cookie2 = "1bdee7e6f5206d15ccfabb2cc828a2d1"
|
cookie2 = "1bd7858d65500ba53956ab164308ad2a"
|
||||||
x5sec = "7b226b796c696e3b32223a2232386636646266333930343734353861333765356163386535" \
|
x5sec = "7b226b796c696e3b32223a223932613866656331376631373065326331363635306638303764646635666438434f766276497347454c36483635715739704f7968674561437a59324f4449794d5463344e4473784d4f57426e355542227d"
|
||||||
"35636232343339434a757676346f47454c434b357258693249655973674561437a59324f44497" \
|
|
||||||
"4d5463344e4473784d4f57426e355542227d"
|
|
||||||
url = self.url.format(i).replace('detail', 'm')
|
url = self.url.format(i).replace('detail', 'm')
|
||||||
headers = {
|
headers = {
|
||||||
'cookie': f"cookie2={cookie2};x5sec={x5sec}"
|
'cookie': f"cookie2={cookie2};x5sec={x5sec}"
|
||||||
@ -26,21 +25,23 @@ class Film(Baes):
|
|||||||
response = requests.request("GET", url, headers=headers)
|
response = requests.request("GET", url, headers=headers)
|
||||||
|
|
||||||
if '系统自动生成,请勿修改 100%' in response.text:
|
if '系统自动生成,请勿修改 100%' in response.text:
|
||||||
print(f"【{datetime.now()}】报错{i}")
|
print(f"系统自动生成,请勿修改 100%【{datetime.now()}】报错{i}")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
if '全球领先的采购批发平台,批发网' in response.text:
|
if '全球领先的采购批发平台,批发网' in response.text:
|
||||||
print(f"【{datetime.now()}】报错{i}")
|
print(f"全球领先的采购批发平台,批发网【{datetime.now()}】报错{i}")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
sel = Selector(text=response.text, type='html')
|
sel = Selector(text=response.text, type='html')
|
||||||
urls = sel.xpath('//ul[@class="offer-list-row"]//div[@class="image"]/a/@href').extract()
|
urls = sel.xpath('//ul[@class="offer-list-row"]//div[@class="image"]/a/@href').extract()
|
||||||
|
shop_name = sel.xpath('//div[@class="name-wrap"]//a/text()').extract_first()
|
||||||
|
|
||||||
for url in urls:
|
for url in urls:
|
||||||
item = {
|
item = {
|
||||||
"sign": self.generate_sign(url),
|
"sign": self.generate_sign(url),
|
||||||
"url": url,
|
"url": url,
|
||||||
"stauts": "0"
|
"stauts": "0",
|
||||||
|
"shop_name": shop_name
|
||||||
}
|
}
|
||||||
self.col.insert_item('RAW_URLS', item)
|
self.col.insert_item('RAW_URLS', item)
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
@ -16,20 +16,18 @@ class 企业产品详情页面(Baes):
|
|||||||
for s in res:
|
for s in res:
|
||||||
url = s.get('url').replace('detail', 'm')
|
url = s.get('url').replace('detail', 'm')
|
||||||
sign = s.get('sign')
|
sign = s.get('sign')
|
||||||
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a2236653736" \
|
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223532323634333863383734346532666230393835646164396366336533376664434d756776597347454a5056377266593972325539414561437a59324f4449794d5463344e4473784d4f584d6a3462342f2f2f2f2f77453d227d"
|
||||||
"323835663332623033396233366663613833323639396433326236364350372b76346" \
|
|
||||||
"f47454b7a58673776446d357578685145773563795068766a2f2f2f2f2f41513d3d227d"
|
|
||||||
headers = {
|
headers = {
|
||||||
'cookie': f"x5sec={x5sec}"
|
'Cookie': f"x5sec={x5sec}"
|
||||||
}
|
}
|
||||||
response = requests.request("GET", url, headers=headers)
|
response = requests.request("GET", url, headers=headers)
|
||||||
|
|
||||||
if '系统自动生成,请勿修改 100%' in response.text:
|
if '系统自动生成,请勿修改 100%' in response.text:
|
||||||
print(f"【{datetime.now()}】报错{url}")
|
print(f"系统自动生成,请勿修改 100%【{datetime.now()}】报错{url}")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
if '全球领先的采购批发平台,批发网' in response.text:
|
if '全球领先的采购批发平台,批发网' in response.text:
|
||||||
print(f"【{datetime.now()}】报错{url}")
|
print(f"全球领先的采购批发平台,批发网【{datetime.now()}】报错{url}")
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
item = {
|
item = {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user