增加导出程序

This commit is contained in:
luzhisheng 2021-10-20 11:39:41 +08:00
parent b58ade53d9
commit 56b82b0a80
3 changed files with 43 additions and 10 deletions

View File

@ -25,6 +25,8 @@ class extractor(Baes):
globalData = json_dict.get('globalData')
offerId = globalData.get('tempModel').get('offerId')
print(f"{datetime.now()}】解析 {offerId}")
data = json_dict.get('data')
skuInfoMap = globalData.get('skuModel').get('skuInfoMap')
@ -37,8 +39,11 @@ class extractor(Baes):
}
sub_categorys.append(sub_categorys_dict)
value = globalData.get('skuModel').get('skuProps')[0].get('value')
sub_colour_categorys = value
if globalData.get('skuModel').get('skuProps'):
value = globalData.get('skuModel').get('skuProps')[0].get('value')
sub_colour_categorys = value
else:
sub_colour_categorys = []
orderParam = globalData.get('orderParamModel').get('orderParam').get('skuParam').get('skuRangePrices')
companyName = globalData.get('tempModel').get('companyName')
@ -46,11 +51,11 @@ class extractor(Baes):
offerUnit = globalData.get('tempModel').get('offerUnit')
images = globalData.get('images')
# for image in images:
# fullPathImageURI = image.get('fullPathImageURI')
# download_img(fullPathImageURI, offerId)
# print(f"【{datetime.now()}】图片下载{fullPathImageURI}")
# time.sleep(1)
for image in images:
fullPathImageURI = image.get('fullPathImageURI')
download_img(fullPathImageURI, offerId)
print(f"{datetime.now()}】图片下载{fullPathImageURI}")
time.sleep(1)
a_590893001984 = data.get('590893001984')
if not a_590893001984:
@ -86,9 +91,7 @@ class extractor(Baes):
"detailUrl": detailUrl,
"unit_weight": unitWeight
}
print(json.dumps(item))
self.col.insert_item('CLEAN_CONTENT', item)
print(f"{datetime.now()}】解析{offerId}")
if __name__ == '__main__':

View File

@ -2,7 +2,7 @@ import os
MONGODB_CONF = {
'host': '127.0.0.1',
'host': '192.168.1.107',
'port': 27017,
'username': '',
'pwd': "",

View File

@ -0,0 +1,30 @@
from dao.mongo_dao import MongoDao
from spider.baes import Baes
from datetime import datetime
import time
import json
class 导出到本地json数据(Baes):
def __init__(self):
self.col = MongoDao()
super(导出到本地json数据, self).__init__()
def run(self):
res = self.col.find_item('CLEAN_CONTENT', {}, {"company_name": 1, "url": 1, "title": 1, "sub_categorys": 1,
"sub_colour_categorys": 1, "order_param_model": 1,
"sellerLoginId": 1, "offerUnit": 1, "images": 1, "propsList": 1,
"detailUrl": 1, "unit_weight": 1})
for s in res:
s.pop('_id')
with open(f"../docs/导出到本地json数据{time.strftime('%Y-%m-%d', time.localtime())}.json", "a+") as f:
f.write(json.dumps(s) + '\n')
print(f"{datetime.now()}】完成")
if __name__ == '__main__':
f = 导出到本地json数据()
f.run()