mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-20 01:34:55 +08:00
导出到解析后json数据
This commit is contained in:
parent
1ae653fec0
commit
48c2c0d92f
@ -78,14 +78,14 @@ class extractor(Baes):
|
||||
|
||||
a_590893001997 = data.get('590893001997')
|
||||
if not a_590893001997:
|
||||
unitWeight = data.get('605462009364').get('data').get('test').get('unitWeight')
|
||||
# unitWeight = data.get('605462009364').get('data').get('test').get('unitWeight')
|
||||
location = data.get('605462009364').get('data').get('location')
|
||||
cost = data.get('605462009364').get('data').get('logistics')
|
||||
else:
|
||||
unitWeight = a_590893001997.get('data').get('test').get('unitWeight')
|
||||
# unitWeight = a_590893001997.get('data').get('test').get('unitWeight')
|
||||
location = a_590893001997.get('data').get('location')
|
||||
cost = a_590893001997.get('data').get('logistics')
|
||||
logistics = [{"from": location}, {"cost": cost}]
|
||||
logistics = [{"from": location}, {"cost": cost.replace('快递', '').strip()}]
|
||||
|
||||
a_590893002003 = data.get('590893002003')
|
||||
if not a_590893002003:
|
||||
@ -109,7 +109,7 @@ class extractor(Baes):
|
||||
"images": images,
|
||||
"propsList": propsList,
|
||||
"detailUrl": detailUrl,
|
||||
"unit_weight": unitWeight,
|
||||
"unit_weight": "",
|
||||
"logistics": logistics
|
||||
}
|
||||
self.col.insert_item('CLEAN_CONTENT', item)
|
||||
|
@ -47,7 +47,7 @@ class MongoDao(object):
|
||||
else:
|
||||
print(f"【{datetime.now()}】过滤")
|
||||
|
||||
def find_item(self, collection, query, projection):
|
||||
def find_item(self, collection, *args, **kwargs):
|
||||
collection = self.client[collection]
|
||||
return collection.find(query, projection).batch_size(1)
|
||||
return collection.find(*args, **kwargs).batch_size(1)
|
||||
|
||||
|
@ -16,7 +16,7 @@ class 企业产品详情页面(Baes):
|
||||
for s in res:
|
||||
url = s.get('url').replace('detail', 'm')
|
||||
sign = s.get('sign')
|
||||
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a22313336323861633166303531646664306233326164313139386263343465313343505867386f7347454f7963717172677a49437643686f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
|
||||
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223433333035343562623433343530616361636164636131373764396164613965434a754f7534774745507959795a577173616e3641526f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
|
||||
headers = {
|
||||
'Cookie': f"x5sec={x5sec}"
|
||||
}
|
||||
|
29
1688/spider/导出到解析后json数据.py
Normal file
29
1688/spider/导出到解析后json数据.py
Normal file
@ -0,0 +1,29 @@
|
||||
from scrapy.selector import Selector
|
||||
from dao.mongo_dao import MongoDao
|
||||
from spider.baes import Baes
|
||||
from datetime import datetime
|
||||
import time
|
||||
import json
|
||||
|
||||
|
||||
class 导出到解析后json数据(Baes):
|
||||
|
||||
def __init__(self):
|
||||
self.col = MongoDao()
|
||||
super(导出到解析后json数据, self).__init__()
|
||||
|
||||
def run(self):
|
||||
res = self.col.find_item('CLEAN_CONTENT', {})
|
||||
|
||||
for s in res:
|
||||
s.pop('_id')
|
||||
s.pop('sign')
|
||||
with open(f"../docs/导出到解析后json数据{time.strftime('%Y-%m-%d', time.localtime())}.json", "a+") as f:
|
||||
f.write(json.dumps(s) + '\n')
|
||||
|
||||
print(f"【{datetime.now()}】完成")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
f = 导出到解析后json数据()
|
||||
f.run()
|
Loading…
x
Reference in New Issue
Block a user