diff --git a/1688/spider/1688企业产品列表页面.py b/1688/spider/1688企业产品列表页面.py index b0f22f0..fe11330 100644 --- a/1688/spider/1688企业产品列表页面.py +++ b/1688/spider/1688企业产品列表页面.py @@ -10,14 +10,15 @@ class Film(Baes): def __init__(self): self.col = MongoDao() - self.url = "https://shop1456245592469.1688.com/page/offerlist.htm?spm=a2615.7691456.autotrace-paginator." \ + domain = "https://hymxfs.1688.com/" + self.url = f"{domain}page/offerlist.htm?spm=a2615.7691456.autotrace-paginator." \ "4.79f525026COu37&pageNum={}" super(Film, self).__init__() def run(self): for i in range(1, 33): - cookie2 = "1bd7858d65500ba53956ab164308ad2a" - x5sec = "7b226b796c696e3b32223a223932613866656331376631373065326331363635306638303764646635666438434f766276497347454c36483635715739704f7968674561437a59324f4449794d5463344e4473784d4f57426e355542227d" + cookie2 = "181121407f591d0971aa4a0751559b75" + x5sec = "7b226b796c696e3b32223a223736366266373939656335633166326666653261393931656464613964306339434d6e57386f7347455047366974503932497a5232674561437a51344f5463774e7a6b774e7a73784b414977355947666c51453d227d" url = self.url.format(i).replace('detail', 'm') headers = { 'cookie': f"cookie2={cookie2};x5sec={x5sec}" diff --git a/1688/spider/1688企业产品详情页面.py b/1688/spider/1688企业产品详情页面.py index ab4c970..6a64888 100644 --- a/1688/spider/1688企业产品详情页面.py +++ b/1688/spider/1688企业产品详情页面.py @@ -16,7 +16,7 @@ class 企业产品详情页面(Baes): for s in res: url = s.get('url').replace('detail', 'm') sign = s.get('sign') - x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223532323634333863383734346532666230393835646164396366336533376664434d756776597347454a5056377266593972325539414561437a59324f4449794d5463344e4473784d4f584d6a3462342f2f2f2f2f77453d227d" + x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a22313336323861633166303531646664306233326164313139386263343465313343505867386f7347454f7963717172677a49437643686f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d" headers = { 'Cookie': f"x5sec={x5sec}" } diff --git a/1688/spider/导出到本地csv数据.py b/1688/spider/导出到本地csv数据.py index 9df1f5a..61267af 100644 --- a/1688/spider/导出到本地csv数据.py +++ b/1688/spider/导出到本地csv数据.py @@ -16,7 +16,7 @@ class 导出到本地csv数据(Baes): self.writer = pd.ExcelWriter(pd_path, options={'strings_to_urls': False}) def export(self): - res = self.col.find_item('CLEAN_CONTENT', {}, None) + res = self.col.find_item('CLEAN_CONTENT', {"company_name" : "坂戈实力旗舰店"}, None) # 初始化df df = pd.DataFrame(columns={ @@ -69,6 +69,20 @@ class 导出到本地csv数据(Baes): url_path = urlparse(fullPathImageURI).path carousel_id.append(url_path.split("/")[-1]) + pp = '' + hh = '' + bz = '' + cz = '' + cc = '' + ys = '' + yszlzy = '' + sfjk = '' + zx = '' + zyxy = '' + zyxsdq = '' + yksqdzypp = '' + sfkjckzgzy = '' + propsList = s.get('propsList') for props in propsList: if props.get('name') == "品牌": @@ -174,7 +188,7 @@ class 导出到本地csv数据(Baes): imageUrl_id = '' specAttrs = sub_category.get('specAttrs') for sub_colour_category in sub_colour_categorys: - if specAttrs == sub_colour_category.get('name'): + if sub_colour_category.get('name') in specAttrs: imageUrl = sub_colour_category.get('imageUrl') or '' if imageUrl: url_path = urlparse(imageUrl).path