mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-20 10:25:01 +08:00
导出到解析后json数据
This commit is contained in:
parent
1ae653fec0
commit
48c2c0d92f
@ -78,14 +78,14 @@ class extractor(Baes):
|
|||||||
|
|
||||||
a_590893001997 = data.get('590893001997')
|
a_590893001997 = data.get('590893001997')
|
||||||
if not a_590893001997:
|
if not a_590893001997:
|
||||||
unitWeight = data.get('605462009364').get('data').get('test').get('unitWeight')
|
# unitWeight = data.get('605462009364').get('data').get('test').get('unitWeight')
|
||||||
location = data.get('605462009364').get('data').get('location')
|
location = data.get('605462009364').get('data').get('location')
|
||||||
cost = data.get('605462009364').get('data').get('logistics')
|
cost = data.get('605462009364').get('data').get('logistics')
|
||||||
else:
|
else:
|
||||||
unitWeight = a_590893001997.get('data').get('test').get('unitWeight')
|
# unitWeight = a_590893001997.get('data').get('test').get('unitWeight')
|
||||||
location = a_590893001997.get('data').get('location')
|
location = a_590893001997.get('data').get('location')
|
||||||
cost = a_590893001997.get('data').get('logistics')
|
cost = a_590893001997.get('data').get('logistics')
|
||||||
logistics = [{"from": location}, {"cost": cost}]
|
logistics = [{"from": location}, {"cost": cost.replace('快递', '').strip()}]
|
||||||
|
|
||||||
a_590893002003 = data.get('590893002003')
|
a_590893002003 = data.get('590893002003')
|
||||||
if not a_590893002003:
|
if not a_590893002003:
|
||||||
@ -109,7 +109,7 @@ class extractor(Baes):
|
|||||||
"images": images,
|
"images": images,
|
||||||
"propsList": propsList,
|
"propsList": propsList,
|
||||||
"detailUrl": detailUrl,
|
"detailUrl": detailUrl,
|
||||||
"unit_weight": unitWeight,
|
"unit_weight": "",
|
||||||
"logistics": logistics
|
"logistics": logistics
|
||||||
}
|
}
|
||||||
self.col.insert_item('CLEAN_CONTENT', item)
|
self.col.insert_item('CLEAN_CONTENT', item)
|
||||||
|
@ -47,7 +47,7 @@ class MongoDao(object):
|
|||||||
else:
|
else:
|
||||||
print(f"【{datetime.now()}】过滤")
|
print(f"【{datetime.now()}】过滤")
|
||||||
|
|
||||||
def find_item(self, collection, query, projection):
|
def find_item(self, collection, *args, **kwargs):
|
||||||
collection = self.client[collection]
|
collection = self.client[collection]
|
||||||
return collection.find(query, projection).batch_size(1)
|
return collection.find(*args, **kwargs).batch_size(1)
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ class 企业产品详情页面(Baes):
|
|||||||
for s in res:
|
for s in res:
|
||||||
url = s.get('url').replace('detail', 'm')
|
url = s.get('url').replace('detail', 'm')
|
||||||
sign = s.get('sign')
|
sign = s.get('sign')
|
||||||
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a22313336323861633166303531646664306233326164313139386263343465313343505867386f7347454f7963717172677a49437643686f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
|
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223433333035343562623433343530616361636164636131373764396164613965434a754f7534774745507959795a577173616e3641526f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
|
||||||
headers = {
|
headers = {
|
||||||
'Cookie': f"x5sec={x5sec}"
|
'Cookie': f"x5sec={x5sec}"
|
||||||
}
|
}
|
||||||
|
29
1688/spider/导出到解析后json数据.py
Normal file
29
1688/spider/导出到解析后json数据.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
from scrapy.selector import Selector
|
||||||
|
from dao.mongo_dao import MongoDao
|
||||||
|
from spider.baes import Baes
|
||||||
|
from datetime import datetime
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class 导出到解析后json数据(Baes):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.col = MongoDao()
|
||||||
|
super(导出到解析后json数据, self).__init__()
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
res = self.col.find_item('CLEAN_CONTENT', {})
|
||||||
|
|
||||||
|
for s in res:
|
||||||
|
s.pop('_id')
|
||||||
|
s.pop('sign')
|
||||||
|
with open(f"../docs/导出到解析后json数据{time.strftime('%Y-%m-%d', time.localtime())}.json", "a+") as f:
|
||||||
|
f.write(json.dumps(s) + '\n')
|
||||||
|
|
||||||
|
print(f"【{datetime.now()}】完成")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
f = 导出到解析后json数据()
|
||||||
|
f.run()
|
Loading…
x
Reference in New Issue
Block a user