mirror of
https://github.com/luzhisheng/js_reverse.git
synced 2025-04-20 03:59:57 +08:00
修改导出代码
This commit is contained in:
parent
bc1a2ed3fa
commit
d60a1f8efa
@ -40,7 +40,7 @@ class extractor(Baes):
|
||||
sub_categorys.append(sub_categorys_dict)
|
||||
|
||||
if globalData.get('skuModel').get('skuProps'):
|
||||
value = globalData.get('skuModel').get('skuProps')[0].get('value')
|
||||
value = globalData.get('skuModel').get('skuProps')
|
||||
sub_colour_categorys = value
|
||||
else:
|
||||
sub_colour_categorys = []
|
||||
|
@ -1,218 +0,0 @@
|
||||
from dao.mongo_dao import MongoDao
|
||||
from spider.baes import Baes
|
||||
import settings
|
||||
import pandas as pd
|
||||
from urllib.parse import urlparse
|
||||
import os
|
||||
|
||||
|
||||
class 导出到本地csv数据(Baes):
|
||||
|
||||
def __init__(self):
|
||||
self.col = MongoDao()
|
||||
super(导出到本地csv数据, self).__init__()
|
||||
path_1 = "数据分析_1688_{}_v1.xlsx".format("".join(self.getYMDHMSstrList()[0:4]))
|
||||
pd_path = os.path.join(settings.excel_path, path_1)
|
||||
self.writer = pd.ExcelWriter(pd_path, options={'strings_to_urls': False})
|
||||
|
||||
def export(self):
|
||||
res = self.col.find_item('CLEAN_CONTENT', {"company_name" : "坂戈实力旗舰店"}, None)
|
||||
|
||||
# 初始化df
|
||||
df = pd.DataFrame(columns={
|
||||
"店铺id": str,
|
||||
"店铺名称": str,
|
||||
"店铺地址": str,
|
||||
"30天銷量": pd.to_numeric,
|
||||
"商品名称": str,
|
||||
"轮播图": str,
|
||||
"起订量1": str,
|
||||
"价格1": str,
|
||||
"起订量2": str,
|
||||
"价格2": str,
|
||||
"起订量3": str,
|
||||
"价格3": str,
|
||||
"单位": str,
|
||||
"品牌": str,
|
||||
"货号": str,
|
||||
"包装": str,
|
||||
"材质": str,
|
||||
"尺寸": str,
|
||||
"颜色": str,
|
||||
"是否专利货源": str,
|
||||
"是否进口": str,
|
||||
"造型": str,
|
||||
"主要下游平台": str,
|
||||
"主要销售地区": str,
|
||||
"有可授权的自有品牌": str,
|
||||
"是否跨境出口专供货源": str,
|
||||
"单位重量": pd.to_numeric,
|
||||
"详情页html": str
|
||||
})
|
||||
|
||||
# 初始化 df_cat
|
||||
df_cat = pd.DataFrame(columns={
|
||||
"店铺id": str,
|
||||
"规格名称": str,
|
||||
"可售数量": str,
|
||||
"图片id": str
|
||||
})
|
||||
|
||||
dict_list = []
|
||||
dict_list_cat = []
|
||||
|
||||
for s in res:
|
||||
carousel_id = []
|
||||
images = s.get('images')
|
||||
for image in images:
|
||||
fullPathImageURI = image.get('fullPathImageURI')
|
||||
url_path = urlparse(fullPathImageURI).path
|
||||
carousel_id.append(url_path.split("/")[-1])
|
||||
|
||||
pp = ''
|
||||
hh = ''
|
||||
bz = ''
|
||||
cz = ''
|
||||
cc = ''
|
||||
ys = ''
|
||||
yszlzy = ''
|
||||
sfjk = ''
|
||||
zx = ''
|
||||
zyxy = ''
|
||||
zyxsdq = ''
|
||||
yksqdzypp = ''
|
||||
sfkjckzgzy = ''
|
||||
|
||||
propsList = s.get('propsList')
|
||||
for props in propsList:
|
||||
if props.get('name') == "品牌":
|
||||
pp = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "货号":
|
||||
hh = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "包装":
|
||||
bz = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "材质":
|
||||
cz = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "尺寸":
|
||||
cc = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "颜色":
|
||||
ys = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "是否专利货源":
|
||||
yszlzy = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "是否进口":
|
||||
sfjk = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "造型":
|
||||
zx = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "主要下游平台":
|
||||
zyxy = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "主要销售地区":
|
||||
zyxsdq = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "有可授权的自有品牌":
|
||||
yksqdzypp = props.get('value')
|
||||
continue
|
||||
if props.get('name') == "是否跨境出口专供货源":
|
||||
sfkjckzgzy = props.get('value')
|
||||
continue
|
||||
|
||||
originalPrices = s.get('order_param_model').get('originalPrices')
|
||||
qdl_1 = ""
|
||||
jg_1 = ""
|
||||
qdl_2 = ""
|
||||
jg_2 = ""
|
||||
qdl_3 = ""
|
||||
jg_3 = ""
|
||||
if len(originalPrices) >= 3:
|
||||
qdl_1 = s.get('order_param_model').get('originalPrices')[0].get('beginAmount')
|
||||
jg_1 = s.get('order_param_model').get('originalPrices')[0].get('price')
|
||||
qdl_2 = s.get('order_param_model').get('originalPrices')[1].get('beginAmount')
|
||||
jg_2 = s.get('order_param_model').get('originalPrices')[1].get('price')
|
||||
qdl_3 = s.get('order_param_model').get('originalPrices')[2].get('beginAmount')
|
||||
jg_3 = s.get('order_param_model').get('originalPrices')[2].get('price')
|
||||
if len(originalPrices) >= 2:
|
||||
qdl_1 = s.get('order_param_model').get('originalPrices')[0].get('beginAmount')
|
||||
jg_1 = s.get('order_param_model').get('originalPrices')[0].get('price')
|
||||
qdl_2 = s.get('order_param_model').get('originalPrices')[1].get('beginAmount')
|
||||
jg_2 = s.get('order_param_model').get('originalPrices')[1].get('price')
|
||||
if len(originalPrices) >= 1:
|
||||
qdl_1 = s.get('order_param_model').get('originalPrices')[0].get('beginAmount')
|
||||
jg_1 = s.get('order_param_model').get('originalPrices')[0].get('price')
|
||||
|
||||
item = {
|
||||
"店铺id": s.get('id'),
|
||||
"店铺名称": s.get('company_name'),
|
||||
"店铺地址": s.get('url'),
|
||||
"30天銷量": s.get('saledCount'),
|
||||
"商品名称": s.get('title'),
|
||||
"轮播图": carousel_id,
|
||||
"起订量1": qdl_1,
|
||||
"价格1": jg_1,
|
||||
"起订量2": qdl_2,
|
||||
"价格2": jg_2,
|
||||
"起订量3": qdl_3,
|
||||
"价格3": jg_3,
|
||||
"单位": s.get('offerUnit'),
|
||||
"品牌": pp,
|
||||
"货号": hh,
|
||||
"包装": bz,
|
||||
"材质": cz,
|
||||
"尺寸": cc,
|
||||
"颜色": ys,
|
||||
"是否专利货源": yszlzy,
|
||||
"是否进口": sfjk,
|
||||
"造型": zx,
|
||||
"主要下游平台": zyxy,
|
||||
"主要销售地区": zyxsdq,
|
||||
"有可授权的自有品牌": yksqdzypp,
|
||||
"是否跨境出口专供货源": sfkjckzgzy,
|
||||
"单位重量": s.get('unit_weight'),
|
||||
"详情页html": s.get('detailUrl')
|
||||
}
|
||||
dict_list.append(item)
|
||||
|
||||
# 规格详情開始
|
||||
sub_categorys = s.get('sub_categorys')
|
||||
sub_colour_categorys = s.get('sub_colour_categorys')
|
||||
|
||||
for sub_category in sub_categorys:
|
||||
imageUrl_id = ''
|
||||
specAttrs = sub_category.get('specAttrs')
|
||||
for sub_colour_category in sub_colour_categorys:
|
||||
if sub_colour_category.get('name') in specAttrs:
|
||||
imageUrl = sub_colour_category.get('imageUrl') or ''
|
||||
if imageUrl:
|
||||
url_path = urlparse(imageUrl).path
|
||||
imageUrl_id = url_path.split("/")[-1]
|
||||
|
||||
cat_item = {
|
||||
"店铺id": s.get('id'),
|
||||
"规格名称": specAttrs,
|
||||
"可售数量": sub_category.get('canBookCount'),
|
||||
"图片id": imageUrl_id
|
||||
}
|
||||
dict_list_cat.append(cat_item)
|
||||
|
||||
df = df.append(dict_list, ignore_index=True, sort=False)
|
||||
df.to_excel(sheet_name="1-商品详情", index=False, excel_writer=self.writer)
|
||||
|
||||
df_cat = df_cat.append(dict_list_cat, ignore_index=True, sort=False)
|
||||
df_cat.to_excel(sheet_name="2-规格详情", index=False, excel_writer=self.writer)
|
||||
self.writer.save()
|
||||
|
||||
def run(self):
|
||||
self.export()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
f = 导出到本地csv数据()
|
||||
f.run()
|
31
1688/spider/导出到本地json数据.py
Normal file
31
1688/spider/导出到本地json数据.py
Normal file
@ -0,0 +1,31 @@
|
||||
from dao.mongo_dao import MongoDao
|
||||
from spider.baes import Baes
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import time
|
||||
import json
|
||||
|
||||
|
||||
class 导出到本地json数据(Baes):
|
||||
|
||||
def __init__(self):
|
||||
self.col = MongoDao()
|
||||
super(导出到本地json数据, self).__init__()
|
||||
|
||||
def run(self):
|
||||
res = self.col.find_item('CLEAN_CONTENT', {}, {"company_name": 1, "url": 1, "title": 1, "sub_categorys": 1,
|
||||
"sub_colour_categorys": 1, "order_param_model": 1,
|
||||
"sellerLoginId": 1, "offerUnit": 1, "images": 1, "propsList": 1,
|
||||
"detailUrl": 1, "unit_weight": 1})
|
||||
|
||||
for s in res:
|
||||
s.pop('_id')
|
||||
with open(f"../docs/导出到本地json数据{time.strftime('%Y-%m-%d', time.localtime())}.json", "a+") as f:
|
||||
f.write(json.dumps(s) + '\n')
|
||||
|
||||
print(f"【{datetime.now()}】完成")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
f = 导出到本地json数据()
|
||||
f.run()
|
137
1688/spider/导出到本地xlsx数据.py
Normal file
137
1688/spider/导出到本地xlsx数据.py
Normal file
@ -0,0 +1,137 @@
|
||||
from dao.mongo_dao import MongoDao
|
||||
from spider.baes import Baes
|
||||
import settings
|
||||
import pandas as pd
|
||||
from urllib.parse import urlparse
|
||||
import os
|
||||
import json
|
||||
|
||||
|
||||
class 导出到本地xlsx数据(Baes):
|
||||
|
||||
def __init__(self):
|
||||
self.col = MongoDao()
|
||||
super(导出到本地xlsx数据, self).__init__()
|
||||
|
||||
def export(self, company_name, writer):
|
||||
res = self.col.find_item('CLEAN_CONTENT', {"company_name": company_name}, None)
|
||||
|
||||
# 初始化df
|
||||
df = pd.DataFrame(columns={
|
||||
"product_id": str,
|
||||
"product_attributes": str
|
||||
})
|
||||
|
||||
# 初始化 df_cat
|
||||
df_cat = pd.DataFrame(columns={
|
||||
"product_id": str,
|
||||
"sku_description": str,
|
||||
"sku_image": str
|
||||
})
|
||||
|
||||
# 价格区间
|
||||
df_price = pd.DataFrame(columns={
|
||||
"product_id": str,
|
||||
"priceRanges": str
|
||||
})
|
||||
|
||||
# 选项列
|
||||
df_row = pd.DataFrame(columns={
|
||||
"product_id": str,
|
||||
"option_name": str,
|
||||
"option_value": str
|
||||
})
|
||||
|
||||
dict_list = []
|
||||
dict_list_cat = []
|
||||
dict_list_price = []
|
||||
dict_list_row = []
|
||||
|
||||
for s in res:
|
||||
# 产品属性
|
||||
item = {
|
||||
"product_id": s.get('id'),
|
||||
"product_attributes": s.get('propsList')
|
||||
}
|
||||
dict_list.append(item)
|
||||
|
||||
# 产品图片
|
||||
sub_categorys = s.get('sub_categorys')
|
||||
|
||||
if s.get('sub_colour_categorys'):
|
||||
sub_colour_categorys = s.get('sub_colour_categorys')[0].get('value')
|
||||
|
||||
for sub_category in sub_categorys:
|
||||
imageUrl_id = ''
|
||||
specAttrs = sub_category.get('specAttrs').replace('>', '')
|
||||
for sub_colour_category in sub_colour_categorys:
|
||||
if sub_colour_category.get('name') in specAttrs:
|
||||
imageUrl = sub_colour_category.get('imageUrl') or ''
|
||||
if imageUrl:
|
||||
url_path = urlparse(imageUrl).path
|
||||
imageUrl_id = url_path.split("/")[-1]
|
||||
|
||||
cat_item = {
|
||||
"product_id": s.get('id'),
|
||||
"sku_description": specAttrs,
|
||||
"sku_image": imageUrl_id
|
||||
}
|
||||
dict_list_cat.append(cat_item)
|
||||
|
||||
# 价格区间
|
||||
originalPrices = s.get('order_param_model').get('originalPrices')
|
||||
price_str = ""
|
||||
for originalPrice in originalPrices:
|
||||
beginAmount = originalPrice.get('beginAmount')
|
||||
price = originalPrice.get('price')
|
||||
price_item = str(beginAmount) + ':' + str(price)
|
||||
price_item_str = json.dumps(price_item)
|
||||
if price_str:
|
||||
price_str = price_str + '`' + price_item_str
|
||||
else:
|
||||
price_str = price_item_str
|
||||
|
||||
price_item = {
|
||||
"product_id": s.get('id'),
|
||||
"priceRanges": price_str,
|
||||
}
|
||||
dict_list_price.append(price_item)
|
||||
|
||||
# 选项列
|
||||
sub_colour_categorys = s.get('sub_colour_categorys')
|
||||
for sub_colour_category in sub_colour_categorys:
|
||||
values = sub_colour_category.get('value')
|
||||
prop = sub_colour_category.get('prop')
|
||||
for value in values:
|
||||
row_dict = {
|
||||
"product_id": s.get('id'),
|
||||
"option_name": prop,
|
||||
"option_value": value.get('name')
|
||||
}
|
||||
dict_list_row.append(row_dict)
|
||||
|
||||
df = df.append(dict_list, ignore_index=True, sort=False)
|
||||
df.to_excel(sheet_name="1-产品属性", index=False, excel_writer=writer)
|
||||
|
||||
df_cat = df_cat.append(dict_list_cat, ignore_index=True, sort=False)
|
||||
df_cat.to_excel(sheet_name="2-产品图片", index=False, excel_writer=writer)
|
||||
|
||||
df_price = df_price.append(dict_list_price, ignore_index=True, sort=False)
|
||||
df_price.to_excel(sheet_name="3-价格区间", index=False, excel_writer=writer)
|
||||
|
||||
df_row = df_row.append(dict_list_row, ignore_index=True, sort=False)
|
||||
df_row.to_excel(sheet_name="4-选项列", index=False, excel_writer=writer)
|
||||
|
||||
writer.save()
|
||||
|
||||
def run(self, company_name):
|
||||
path_1 = f"{company_name}_1688_{''.join(self.getYMDHMSstrList()[0:4])}_v1.xlsx"
|
||||
pd_path = os.path.join(settings.excel_path, path_1)
|
||||
writer = pd.ExcelWriter(pd_path, options={'strings_to_urls': False})
|
||||
self.export(company_name, writer)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
f = 导出到本地xlsx数据()
|
||||
company_name = '东莞市茶山品轩玩具厂'
|
||||
f.run(company_name)
|
Loading…
x
Reference in New Issue
Block a user