From b58ade53d90e1823e5bbb4c2f96751771fa9406a Mon Sep 17 00:00:00 2001
From: aiguigu <aiyingfeng110@qq.com>
Date: Wed, 20 Oct 2021 09:28:21 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 1688/clean/extractor.py             | 16 ++++++++--------
 1688/settings.py                    |  2 +-
 1688/spider/1688企业产品列表页面.py | 19 ++++++++++---------
 1688/spider/1688企业产品详情页面.py | 10 ++++------
 4 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/1688/clean/extractor.py b/1688/clean/extractor.py
index e00a738..cfec631 100644
--- a/1688/clean/extractor.py
+++ b/1688/clean/extractor.py
@@ -46,11 +46,11 @@ class extractor(Baes):
             offerUnit = globalData.get('tempModel').get('offerUnit')
             images = globalData.get('images')
 
-            for image in images:
-                fullPathImageURI = image.get('fullPathImageURI')
-                download_img(fullPathImageURI, offerId)
-                print(f"【{datetime.now()}】图片下载{fullPathImageURI}")
-                time.sleep(1)
+            # for image in images:
+            #     fullPathImageURI = image.get('fullPathImageURI')
+            #     download_img(fullPathImageURI, offerId)
+            #     print(f"【{datetime.now()}】图片下载{fullPathImageURI}")
+            #     time.sleep(1)
 
             a_590893001984 = data.get('590893001984')
             if not a_590893001984:
@@ -72,8 +72,9 @@ class extractor(Baes):
             detailUrl = globalData.get('detailModel').get('detailUrl')
 
             item = {
+                "sign": self.generate_sign("https://detail.1688.com/offer/{}.html".format(offerId)),
                 "company_name": companyName,
-                "company_name_url": "https://detail.1688.com/offer/{}.html".format(offerId),
+                "url": "https://detail.1688.com/offer/{}.html".format(offerId),
                 "title": title,
                 "sub_categorys": sub_categorys,
                 "sub_colour_categorys": sub_colour_categorys,
@@ -86,8 +87,7 @@ class extractor(Baes):
                 "unit_weight": unitWeight
             }
             print(json.dumps(item))
-            exit()
-
+            self.col.insert_item('CLEAN_CONTENT', item)
             print(f"【{datetime.now()}】解析{offerId}")
 
 
diff --git a/1688/settings.py b/1688/settings.py
index e123fc9..54b5073 100644
--- a/1688/settings.py
+++ b/1688/settings.py
@@ -2,7 +2,7 @@ import os
 
 
 MONGODB_CONF = {
-    'host': '192.168.5.151',
+    'host': '127.0.0.1',
     'port': 27017,
     'username': '',
     'pwd': "",
diff --git a/1688/spider/1688企业产品列表页面.py b/1688/spider/1688企业产品列表页面.py
index 8cb1874..b0f22f0 100644
--- a/1688/spider/1688企业产品列表页面.py
+++ b/1688/spider/1688企业产品列表页面.py
@@ -10,15 +10,14 @@ class Film(Baes):
 
     def __init__(self):
         self.col = MongoDao()
-        self.url = "https://dearbei.1688.com/page/offerlist.htm?spm=a2615.7691456.autotrace-paginator.2&pageNum={}"
+        self.url = "https://shop1456245592469.1688.com/page/offerlist.htm?spm=a2615.7691456.autotrace-paginator." \
+                   "4.79f525026COu37&pageNum={}"
         super(Film, self).__init__()
 
     def run(self):
-        for i in range(15, 24):
-            cookie2 = "1bdee7e6f5206d15ccfabb2cc828a2d1"
-            x5sec = "7b226b796c696e3b32223a2232386636646266333930343734353861333765356163386535" \
-                    "35636232343339434a757676346f47454c434b357258693249655973674561437a59324f44497" \
-                    "4d5463344e4473784d4f57426e355542227d"
+        for i in range(1, 33):
+            cookie2 = "1bd7858d65500ba53956ab164308ad2a"
+            x5sec = "7b226b796c696e3b32223a223932613866656331376631373065326331363635306638303764646635666438434f766276497347454c36483635715739704f7968674561437a59324f4449794d5463344e4473784d4f57426e355542227d"
             url = self.url.format(i).replace('detail', 'm')
             headers = {
                 'cookie': f"cookie2={cookie2};x5sec={x5sec}"
@@ -26,21 +25,23 @@ class Film(Baes):
             response = requests.request("GET", url, headers=headers)
 
             if '系统自动生成，请勿修改 100%' in response.text:
-                print(f"【{datetime.now()}】报错{i}")
+                print(f"系统自动生成，请勿修改 100%【{datetime.now()}】报错{i}")
                 exit()
 
             if '全球领先的采购批发平台,批发网' in response.text:
-                print(f"【{datetime.now()}】报错{i}")
+                print(f"全球领先的采购批发平台,批发网【{datetime.now()}】报错{i}")
                 exit()
 
             sel = Selector(text=response.text, type='html')
             urls = sel.xpath('//ul[@class="offer-list-row"]//div[@class="image"]/a/@href').extract()
+            shop_name = sel.xpath('//div[@class="name-wrap"]//a/text()').extract_first()
 
             for url in urls:
                 item = {
                     "sign": self.generate_sign(url),
                     "url": url,
-                    "stauts": "0"
+                    "stauts": "0",
+                    "shop_name": shop_name
                 }
                 self.col.insert_item('RAW_URLS', item)
             time.sleep(10)
diff --git a/1688/spider/1688企业产品详情页面.py b/1688/spider/1688企业产品详情页面.py
index 3b9e1a0..ab4c970 100644
--- a/1688/spider/1688企业产品详情页面.py
+++ b/1688/spider/1688企业产品详情页面.py
@@ -16,20 +16,18 @@ class 企业产品详情页面(Baes):
         for s in res:
             url = s.get('url').replace('detail', 'm')
             sign = s.get('sign')
-            x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a2236653736" \
-                    "323835663332623033396233366663613833323639396433326236364350372b76346" \
-                    "f47454b7a58673776446d357578685145773563795068766a2f2f2f2f2f41513d3d227d"
+            x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223532323634333863383734346532666230393835646164396366336533376664434d756776597347454a5056377266593972325539414561437a59324f4449794d5463344e4473784d4f584d6a3462342f2f2f2f2f77453d227d"
             headers = {
-                'cookie': f"x5sec={x5sec}"
+                'Cookie': f"x5sec={x5sec}"
             }
             response = requests.request("GET", url, headers=headers)
 
             if '系统自动生成，请勿修改 100%' in response.text:
-                print(f"【{datetime.now()}】报错{url}")
+                print(f"系统自动生成，请勿修改 100%【{datetime.now()}】报错{url}")
                 exit()
 
             if '全球领先的采购批发平台,批发网' in response.text:
-                print(f"【{datetime.now()}】报错{url}")
+                print(f"全球领先的采购批发平台,批发网【{datetime.now()}】报错{url}")
                 exit()
 
             item = {