mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-22 02:05:24 +08:00
导出到解析后json数据
This commit is contained in:
parent
45986e186e
commit
b8fc9d9517
@ -39,3 +39,10 @@ class MongoDao(object):
|
|||||||
else:
|
else:
|
||||||
print(f"【{datetime.now()}】入库{item.get('sign')}")
|
print(f"【{datetime.now()}】入库{item.get('sign')}")
|
||||||
return collection.insert_one(item)
|
return collection.insert_one(item)
|
||||||
|
|
||||||
|
def update_item(self, collection, sign):
|
||||||
|
collection = self.client[collection]
|
||||||
|
if collection.find_one({"sign": sign}):
|
||||||
|
return collection.update_one({"sign": sign}, {"$set": {"stauts": '1'}})
|
||||||
|
else:
|
||||||
|
print(f"【{datetime.now()}】过滤")
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from dao.mongo_dao import MongoDao
|
from dao.mongo_dao import MyMongodb, MongoDao
|
||||||
from spider.baes import Baes
|
from spider.baes import Baes
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import time
|
import time
|
||||||
@ -8,15 +8,16 @@ import requests
|
|||||||
class 企业产品详情页面(Baes):
|
class 企业产品详情页面(Baes):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.mongodb = MyMongodb().db
|
||||||
self.col = MongoDao()
|
self.col = MongoDao()
|
||||||
super(企业产品详情页面, self).__init__()
|
super(企业产品详情页面, self).__init__()
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
res = self.col.find_item('RAW_URLS', {"stauts": "0"}, {"url": 1, "sign": 1})
|
res = self.mongodb['RAW_URLS'].find({"stauts": "0"}, {"url": 1, "sign": 1})
|
||||||
for s in res:
|
for s in res:
|
||||||
url = s.get('url').replace('detail', 'm')
|
url = s.get('url').replace('detail', 'm')
|
||||||
sign = s.get('sign')
|
sign = s.get('sign')
|
||||||
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a223433333035343562623433343530616361636164636131373764396164613965434a754f7534774745507959795a577173616e3641526f4c4e6a59344d6a49784e7a67304f7a45773563795068766a2f2f2f2f2f41513d3d227d"
|
x5sec = "7b22776972656c6573732d7365727665722d72656e6465723b32223a226363663036373930386530363435333061646434616437316231373339646264434e7149744930474550476b36716541796f6655777745773563795068766a2f2f2f2f2f41513d3d227d"
|
||||||
headers = {
|
headers = {
|
||||||
'Cookie': f"x5sec={x5sec}"
|
'Cookie': f"x5sec={x5sec}"
|
||||||
}
|
}
|
||||||
@ -37,7 +38,7 @@ class 企业产品详情页面(Baes):
|
|||||||
}
|
}
|
||||||
self.col.insert_item('RAW_CONTENT', item)
|
self.col.insert_item('RAW_CONTENT', item)
|
||||||
self.col.update_item('RAW_URLS', sign)
|
self.col.update_item('RAW_URLS', sign)
|
||||||
time.sleep(10)
|
time.sleep(3)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from dao.mongo_dao import MongoDao
|
from dao.mongo_dao import MongoDao, MyMongodb
|
||||||
from scrapy.selector import Selector
|
from scrapy.selector import Selector
|
||||||
from spider.baes import Baes
|
from spider.baes import Baes
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@ -9,11 +9,12 @@ import re
|
|||||||
class extractor(Baes):
|
class extractor(Baes):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
self.mongodb = MyMongodb().db
|
||||||
self.col = MongoDao()
|
self.col = MongoDao()
|
||||||
super(extractor, self).__init__()
|
super(extractor, self).__init__()
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
res = self.col.find_item('RAW_CONTENT', {}, {"content": 1})
|
res = self.mongodb['RAW_CONTENT'].find({}, {"content": 1})
|
||||||
for s in res:
|
for s in res:
|
||||||
content = s.get('content')
|
content = s.get('content')
|
||||||
sel = Selector(text=content, type='html')
|
sel = Selector(text=content, type='html')
|
||||||
@ -33,7 +34,7 @@ class extractor(Baes):
|
|||||||
sub_categorys_dict = {
|
sub_categorys_dict = {
|
||||||
'specId': value.get('specId'),
|
'specId': value.get('specId'),
|
||||||
'specAttrs': key.replace('>', '|'),
|
'specAttrs': key.replace('>', '|'),
|
||||||
'Price': globalData.get('tempModel').get('price'),
|
'Price': value.get('price') if value.get('price') else globalData.get('tempModel').get('price'),
|
||||||
'canBookCount': value.get('canBookCount')
|
'canBookCount': value.get('canBookCount')
|
||||||
}
|
}
|
||||||
sub_categorys_canBookCount.append(sub_categorys_dict)
|
sub_categorys_canBookCount.append(sub_categorys_dict)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user