1688代码更新

This commit is contained in:
aiguigu 2021-11-03 08:20:24 +08:00
parent 363a490400
commit bc1a2ed3fa
3 changed files with 21 additions and 6 deletions

View File

@ -10,14 +10,15 @@ class Film(Baes):
def __init__(self):
self.col = MongoDao()
self.url = "https://shop1456245592469.1688.com/page/offerlist.htm?spm=a2615.7691456.autotrace-paginator." \
domain = "https://hymxfs.1688.com/"
self.url = f"{domain}page/offerlist.htm?spm=a2615.7691456.autotrace-paginator." \
"4.79f525026COu37&pageNum={}"
super(Film, self).__init__()
def run(self):
for i in range(1, 33):
cookie2 = "1bd7858d65500ba53956ab164308ad2a"
x5sec = "7b226b796c696e3b32223a223932613866656331376631373065326331363635306638303764646635666438434f766276497347454c36483635715739704f7968674561437a59324f4449794d5463344e4473784d4f57426e355542227d"
cookie2 = "181121407f591d0971aa4a0751559b75"
x5sec = "7b226b796c696e3b32223a223736366266373939656335633166326666653261393931656464613964306339434d6e57386f7347455047366974503932497a5232674561437a51344f5463774e7a6b774e7a73784b414977355947666c51453d227d"
url = self.url.format(i).replace('detail', 'm')
headers = {
'cookie': f"cookie2={cookie2};x5sec={x5sec}"

View File

@ -16,7 +16,7 @@ class 企业产品详情页面(Baes):
for s in res:
url = s.get('url').replace('detail', 'm')
sign = s.get('sign')
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223532323634333863383734346532666230393835646164396366336533376664434d756776597347454a5056377266593972325539414561437a59324f4449794d5463344e4473784d4f584d6a3462342f2f2f2f2f77453d227d"
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a22313336323861633166303531646664306233326164313139386263343465313343505867386f7347454f7963717172677a49437643686f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
headers = {
'Cookie': f"x5sec={x5sec}"
}

View File

@ -16,7 +16,7 @@ class 导出到本地csv数据(Baes):
self.writer = pd.ExcelWriter(pd_path, options={'strings_to_urls': False})
def export(self):
res = self.col.find_item('CLEAN_CONTENT', {}, None)
res = self.col.find_item('CLEAN_CONTENT', {"company_name" : "坂戈实力旗舰店"}, None)
# 初始化df
df = pd.DataFrame(columns={
@ -69,6 +69,20 @@ class 导出到本地csv数据(Baes):
url_path = urlparse(fullPathImageURI).path
carousel_id.append(url_path.split("/")[-1])
pp = ''
hh = ''
bz = ''
cz = ''
cc = ''
ys = ''
yszlzy = ''
sfjk = ''
zx = ''
zyxy = ''
zyxsdq = ''
yksqdzypp = ''
sfkjckzgzy = ''
propsList = s.get('propsList')
for props in propsList:
if props.get('name') == "品牌":
@ -174,7 +188,7 @@ class 导出到本地csv数据(Baes):
imageUrl_id = ''
specAttrs = sub_category.get('specAttrs')
for sub_colour_category in sub_colour_categorys:
if specAttrs == sub_colour_category.get('name'):
if sub_colour_category.get('name') in specAttrs:
imageUrl = sub_colour_category.get('imageUrl') or ''
if imageUrl:
url_path = urlparse(imageUrl).path