导出到解析后json数据

This commit is contained in:
aiguigu 2021-11-15 02:20:41 +08:00
parent 1ae653fec0
commit 48c2c0d92f
4 changed files with 36 additions and 7 deletions

View File

@ -78,14 +78,14 @@ class extractor(Baes):
a_590893001997 = data.get('590893001997')
if not a_590893001997:
unitWeight = data.get('605462009364').get('data').get('test').get('unitWeight')
# unitWeight = data.get('605462009364').get('data').get('test').get('unitWeight')
location = data.get('605462009364').get('data').get('location')
cost = data.get('605462009364').get('data').get('logistics')
else:
unitWeight = a_590893001997.get('data').get('test').get('unitWeight')
# unitWeight = a_590893001997.get('data').get('test').get('unitWeight')
location = a_590893001997.get('data').get('location')
cost = a_590893001997.get('data').get('logistics')
logistics = [{"from": location}, {"cost": cost}]
logistics = [{"from": location}, {"cost": cost.replace('快递', '').strip()}]
a_590893002003 = data.get('590893002003')
if not a_590893002003:
@ -109,7 +109,7 @@ class extractor(Baes):
"images": images,
"propsList": propsList,
"detailUrl": detailUrl,
"unit_weight": unitWeight,
"unit_weight": "",
"logistics": logistics
}
self.col.insert_item('CLEAN_CONTENT', item)

View File

@ -47,7 +47,7 @@ class MongoDao(object):
else:
print(f"{datetime.now()}】过滤")
def find_item(self, collection, query, projection):
def find_item(self, collection, *args, **kwargs):
collection = self.client[collection]
return collection.find(query, projection).batch_size(1)
return collection.find(*args, **kwargs).batch_size(1)

View File

@ -16,7 +16,7 @@ class 企业产品详情页面(Baes):
for s in res:
url = s.get('url').replace('detail', 'm')
sign = s.get('sign')
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a22313336323861633166303531646664306233326164313139386263343465313343505867386f7347454f7963717172677a49437643686f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223433333035343562623433343530616361636164636131373764396164613965434a754f7534774745507959795a577173616e3641526f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
headers = {
'Cookie': f"x5sec={x5sec}"
}

View File

@ -0,0 +1,29 @@
from scrapy.selector import Selector
from dao.mongo_dao import MongoDao
from spider.baes import Baes
from datetime import datetime
import time
import json
class 导出到解析后json数据(Baes):
def __init__(self):
self.col = MongoDao()
super(导出到解析后json数据, self).__init__()
def run(self):
res = self.col.find_item('CLEAN_CONTENT', {})
for s in res:
s.pop('_id')
s.pop('sign')
with open(f"../docs/导出到解析后json数据{time.strftime('%Y-%m-%d', time.localtime())}.json", "a+") as f:
f.write(json.dumps(s) + '\n')
print(f"{datetime.now()}】完成")
if __name__ == '__main__':
f = 导出到解析后json数据()
f.run()