From 168f011c247da03f1ac4eb59fcb0a9951beda321 Mon Sep 17 00:00:00 2001 From: aiguigu Date: Tue, 30 Nov 2021 02:44:39 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AF=BC=E5=87=BA=E5=88=B0=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E5=90=8Ejson=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../1688企业产品详情内容.py} | 0 1688/spider/图片下载.py | 0 .../清洗数据json格式.py} | 27 ++++++++++--------- 3 files changed, 15 insertions(+), 12 deletions(-) rename 1688/{clean/__init__.py => spider/1688企业产品详情内容.py} (100%) create mode 100644 1688/spider/图片下载.py rename 1688/{clean/extractor.py => spider/清洗数据json格式.py} (86%) diff --git a/1688/clean/__init__.py b/1688/spider/1688企业产品详情内容.py similarity index 100% rename from 1688/clean/__init__.py rename to 1688/spider/1688企业产品详情内容.py diff --git a/1688/spider/图片下载.py b/1688/spider/图片下载.py new file mode 100644 index 0000000..e69de29 diff --git a/1688/clean/extractor.py b/1688/spider/清洗数据json格式.py similarity index 86% rename from 1688/clean/extractor.py rename to 1688/spider/清洗数据json格式.py index 205b792..a51f436 100644 --- a/1688/clean/extractor.py +++ b/1688/spider/清洗数据json格式.py @@ -2,8 +2,6 @@ from dao.mongo_dao import MongoDao from scrapy.selector import Selector from spider.baes import Baes from datetime import datetime -from tool.download_img import download_img -import time import json import re @@ -47,8 +45,12 @@ class extractor(Baes): prop = skuProp.get('prop') value = skuProp.get('value') for val in value: + if val.get('imageUrl'): + val_imageUrl = val.get('imageUrl').replace('.jpg', '.32x32.jpg') + else: + val_imageUrl = '' item_dict = { - 'OptionImageUrl': val.get('imageUrl') or '', + 'OptionImageUrl': val_imageUrl, 'name': prop, 'optionValue': val.get('name') } @@ -57,18 +59,17 @@ class extractor(Baes): else: list_dict = [] - orderParam = globalData.get('orderParamModel').get('orderParam').get('skuParam').get('skuRangePrices') companyName = globalData.get('tempModel').get('companyName') sellerLoginId = globalData.get('tempModel').get('sellerLoginId') offerUnit = globalData.get('tempModel').get('offerUnit') saledCount = globalData.get('tempModel').get('saledCount') - images = globalData.get('images') - - # for image in images: - # fullPathImageURI = image.get('fullPathImageURI') - # download_img(fullPathImageURI, offerId) - # print(f"【{datetime.now()}】图片下载{fullPathImageURI}") - # time.sleep(1) + images = [] + images_item = globalData.get('images') + for image in images_item: + image_item = { + "imageURI": image.get('imageURI').split('/')[-1] + } + images.append(image_item) a_590893001984 = data.get('590893001984') if not a_590893001984: @@ -110,7 +111,9 @@ class extractor(Baes): "propsList": propsList, "detailUrl": detailUrl, "unit_weight": "", - "logistics": logistics + "logistics": logistics, + "download_img_status": 0, + "detail_url_status": 0 } self.col.insert_item('CLEAN_CONTENT', item)