mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-21 21:10:21 +08:00
导出到解析后json数据
This commit is contained in:
parent
45986e186e
commit
b8fc9d9517
@ -39,3 +39,10 @@ class MongoDao(object):
|
||||
else:
|
||||
print(f"【{datetime.now()}】入库{item.get('sign')}")
|
||||
return collection.insert_one(item)
|
||||
|
||||
def update_item(self, collection, sign):
|
||||
collection = self.client[collection]
|
||||
if collection.find_one({"sign": sign}):
|
||||
return collection.update_one({"sign": sign}, {"$set": {"stauts": '1'}})
|
||||
else:
|
||||
print(f"【{datetime.now()}】过滤")
|
||||
|
@ -1,4 +1,4 @@
|
||||
from dao.mongo_dao import MongoDao
|
||||
from dao.mongo_dao import MyMongodb, MongoDao
|
||||
from spider.baes import Baes
|
||||
from datetime import datetime
|
||||
import time
|
||||
@ -8,15 +8,16 @@ import requests
|
||||
class 企业产品详情页面(Baes):
|
||||
|
||||
def __init__(self):
|
||||
self.mongodb = MyMongodb().db
|
||||
self.col = MongoDao()
|
||||
super(企业产品详情页面, self).__init__()
|
||||
|
||||
def run(self):
|
||||
res = self.col.find_item('RAW_URLS', {"stauts": "0"}, {"url": 1, "sign": 1})
|
||||
res = self.mongodb['RAW_URLS'].find({"stauts": "0"}, {"url": 1, "sign": 1})
|
||||
for s in res:
|
||||
url = s.get('url').replace('detail', 'm')
|
||||
sign = s.get('sign')
|
||||
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223433333035343562623433343530616361636164636131373764396164613965434a754f7534774745507959795a577173616e3641526f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
|
||||
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a226363663036373930386530363435333061646434616437316231373339646264434e7149744930474550476b36716541796f6655777745773563795068766a2f2f2f2f2f41513d3d227d"
|
||||
headers = {
|
||||
'Cookie': f"x5sec={x5sec}"
|
||||
}
|
||||
@ -37,7 +38,7 @@ class 企业产品详情页面(Baes):
|
||||
}
|
||||
self.col.insert_item('RAW_CONTENT', item)
|
||||
self.col.update_item('RAW_URLS', sign)
|
||||
time.sleep(10)
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -1,4 +1,4 @@
|
||||
from dao.mongo_dao import MongoDao
|
||||
from dao.mongo_dao import MongoDao, MyMongodb
|
||||
from scrapy.selector import Selector
|
||||
from spider.baes import Baes
|
||||
from datetime import datetime
|
||||
@ -9,11 +9,12 @@ import re
|
||||
class extractor(Baes):
|
||||
|
||||
def __init__(self):
|
||||
self.mongodb = MyMongodb().db
|
||||
self.col = MongoDao()
|
||||
super(extractor, self).__init__()
|
||||
|
||||
def run(self):
|
||||
res = self.col.find_item('RAW_CONTENT', {}, {"content": 1})
|
||||
res = self.mongodb['RAW_CONTENT'].find({}, {"content": 1})
|
||||
for s in res:
|
||||
content = s.get('content')
|
||||
sel = Selector(text=content, type='html')
|
||||
@ -33,7 +34,7 @@ class extractor(Baes):
|
||||
sub_categorys_dict = {
|
||||
'specId': value.get('specId'),
|
||||
'specAttrs': key.replace('>', '|'),
|
||||
'Price': globalData.get('tempModel').get('price'),
|
||||
'Price': value.get('price') if value.get('price') else globalData.get('tempModel').get('price'),
|
||||
'canBookCount': value.get('canBookCount')
|
||||
}
|
||||
sub_categorys_canBookCount.append(sub_categorys_dict)
|
||||
|
Loading…
x
Reference in New Issue
Block a user