From 0b0fca5fcf46e3696e49fa5ead4e0aec687d018f Mon Sep 17 00:00:00 2001 From: naibo Date: Wed, 20 Dec 2023 04:33:29 +0800 Subject: [PATCH] Remove Duplicate Data!!! --- ElectronJS/src/taskGrid/FlowChart.html | 2 +- ElectronJS/src/taskGrid/FlowChart_CN.html | 2 +- ElectronJS/src/taskGrid/executeTask.html | 2 +- ElectronJS/tasks/149.json | 2 +- ElectronJS/tasks/296.json | 2 +- ExecuteStage/.vscode/launch.json | 2 +- ExecuteStage/easyspider_executestage.py | 25 +++++++++++++++++++++-- ExecuteStage/utils.py | 23 +++++++++++++++++++++ 8 files changed, 52 insertions(+), 8 deletions(-) diff --git a/ElectronJS/src/taskGrid/FlowChart.html b/ElectronJS/src/taskGrid/FlowChart.html index b91f121..2e43c53 100644 --- a/ElectronJS/src/taskGrid/FlowChart.html +++ b/ElectronJS/src/taskGrid/FlowChart.html @@ -721,7 +721,7 @@ If the expression returns a value greater than 0 or evaluates to True, the opera diff --git a/ElectronJS/src/taskGrid/FlowChart_CN.html b/ElectronJS/src/taskGrid/FlowChart_CN.html index 218ea71..152427e 100644 --- a/ElectronJS/src/taskGrid/FlowChart_CN.html +++ b/ElectronJS/src/taskGrid/FlowChart_CN.html @@ -721,7 +721,7 @@ print(emotlib.emoji()) # 使用其中的函数。 diff --git a/ElectronJS/src/taskGrid/executeTask.html b/ElectronJS/src/taskGrid/executeTask.html index c50ab55..efc329b 100644 --- a/ElectronJS/src/taskGrid/executeTask.html +++ b/ElectronJS/src/taskGrid/executeTask.html @@ -264,7 +264,7 @@
- +

diff --git a/ElectronJS/tasks/149.json b/ElectronJS/tasks/149.json index 3083292..5d09492 100644 --- a/ElectronJS/tasks/149.json +++ b/ElectronJS/tasks/149.json @@ -1 +1 @@ -{"id":149,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/7/2023, 6:36:49 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"mysql","saveName":"京东","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"params":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"6h61epq3t9sljrq1vbg","iframe":false,"default":"","paraType":"text","beforeJS":"arguments[0].innerText = \"'\" + arguments[0].innerText + '\"'","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"6h61epq3t9sljrq1vbg","iframe":false,"default":"","paraType":"text","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file +{"id":149,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/7/2023, 6:36:49 AM","update_time":"12/20/2023, 4:03:13 AM","version":"0.6.0","saveThreshold":10,"quitWaitTime":60,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"mysql","saveName":"京东","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":1,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"params":[{"nodeType":1,"contentType":0,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"6h61epq3t9sljrq1vbg","iframe":false,"default":"","paraType":"text","beforeJS":"arguments[0].innerText = \"'\" + arguments[0].innerText + '\"'","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"6h61epq3t9sljrq1vbg","iframe":false,"default":"","paraType":"text","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]} \ No newline at end of file diff --git a/ElectronJS/tasks/296.json b/ElectronJS/tasks/296.json index ca64fa8..f5e4900 100644 --- a/ElectronJS/tasks/296.json +++ b/ElectronJS/tasks/296.json @@ -1 +1 @@ -{"id":296,"name":"wysdemo01","url":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","links":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","create_time":"2023/12/17 09:06:23","update_time":"12/19/2023, 10:01:28 PM","version":"0.6.0","saveThreshold":1,"quitWaitTime":40,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"csv","saveName":"TTT","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":0,"desc":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7"},{"id":1,"name":"loopTimes_1","nodeId":2,"nodeName":"循环点击下一页","desc":"循环循环点击下一页执行的次数(0代表无限循环)","type":"int","exampleValue":0,"value":0}],"outputParameters":[{"id":0,"name":"url","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newinfo.action?articleid=ff8080818c64130e018c6aabf897481d"},{"id":1,"name":"title","desc":"","type":"text","recordASField":1,"exampleValue":"网友夏琴:激荡人力资源“动能” 集聚强国建设“合力”"},{"id":2,"name":"text","desc":"","type":"text","recordASField":1,"exampleValue":"\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t网友夏琴:激荡人力资源“动能” 集聚强国建设“合力”\n\t\t\t\t\n\t\t\t\t来源:松江区委组织部    发布时间:2023-12-15 \n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t近日,第二届全国人力资源服务业发展大会召开。人力资源是推动社会经济社会发展的第一资源,是战略性新兴产业,也是稳定和扩大就业、提高人力资源利用水平、构建现代化产业体系的重要力量。激荡人力资源“动能”,集聚强国建设“合力”,要凝心聚力、坚定信心,引才聚才、焕发活力,务实合作、优化环境,以人口高质量发展支撑中国式现代化。凝心聚力、坚定信心,促进高质量充分就业。就业是最基本的民生,促进高质量充分就业,是增进民生福祉、提高人民生活品质的根本举措。党的二十大报告提出“促进高质量充分就业”的目标要求,是党中央牢牢把握我国发展的阶段性特征对就业工作作出的重大战略部署。当前,我国人口发展面临新形势、新常态,人力资源丰富仍然是我国的突出优势,人力资源服务业发挥着重要作用。新时代新征程上,要凝心聚力、坚定信心,坚持以习近平新时代中国特色社会主义思想为指导,全面贯彻党的二十大精神,深入贯彻习近平总书记重要指示和党中央决策部署,锚定“稳就业”这一重大政治责任,强化就业优先、促进供需匹配,聚焦创新驱动、推进数字赋能,持续稳存量、扩增量、提质量,全力以赴确保就业局势总体稳定,不断夯实经济社会大局“稳固底座”。引才聚才、焕发活力,提高人力资源开发利用。完善政策体系、壮大行业规模、提高服务水平、优化产业环境……近年来,我国人力资源服务业蓬勃发展、亮点纷呈,实现健康发展,取得长足进步,人力资源质量和利用效率逐步提升,形成了“百花齐放”的良好生态。人才是引领发展的第一动力,是建设现代化经济体系的战略支撑。新时代新征程上,要筑巢引凤、引才聚才、以才吸才,不断焕发新活力、新动能。要着力培养知识型、技能型、创新型的复合型人才,持续打造现代化的人力资源体系,创造更充足的人力资本红利。要适应新经济形态变化,开发、培育、推广新职业,让人才在新时代站上更大舞台、发挥更大作用。要推动“有为政府”与“有效市场”同向发力、共同发力,强化供需匹配度、提高招聘质量水平、提升劳动参与率,激发人力资源服务业的最大效能。务实合作、优化环境,推动人口高质量发展。党的十八大以来,习近平总书记围绕人口发展发表了一系列重要论述,作出了一系列科学判断,强调“中国式现代化是人口规模巨大的现代化”。在今年召开的二十届中央财经委员会第一次会议上,习近平总书记提出了以人口高质量发展支撑中国式现代化的重要论断和工作要求。当前,我们迈上了全面建设社会主义现代化国家新征程,我国人口发展也进入了一个新的阶段。要深刻认识人口发展新形势,科学把握人口高质量发展的深刻内涵,以改革创新推动人口高质量发展,推动人民高品质生活、促进人的全面发展和全体人民共同富裕。要致力务实合作,持续优化人力资源服务业发展环境,深耕党和国家事业发展的重点领域、重点行业,一张蓝图绘到底、一茬接着一茬干,不断展现人力资源服务业新气象、开创发展新局面,为推动高质量发展贡献澎湃力量。(作者单位:松江区委统战部)\n\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t<返回>\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t上海市党建服务中心\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","links":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环点击下一页","sequence":[4,3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"next\")]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[2]/div[2]/div[1]/div[1]/div[1]/ul[1]/li[10]/a[1]","//a[contains(., '下一页')]","/html/body/div[last()-6]/div/div[last()-1]/div/div/ul/li[last()-1]/a"]}},{"id":4,"index":3,"parentId":2,"type":0,"option":2,"title":"点击下一页","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/div[2]/div[1]/div[1]/div[1]/ul[1]/li[10]/a[1]","//a[contains(., '下一页')]","/html/body/div[last()-6]/div/div[last()-1]/div/div/ul/li[last()-1]/a"]}},{"id":3,"index":4,"parentId":2,"type":1,"option":8,"title":"循环点击文章标题","sequence":[5,6],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[2]/div[1]/div[1]/ul[1]/li/h5[1]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":5,"index":5,"parentId":3,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":2,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":1,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":""}},{"id":6,"index":6,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":1.5,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"//a[contains(., '<返回>')]","waitElementTime":15,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":0,"contentType":5,"relative":false,"name":"url","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., '')]","/html/body"],"exampleValues":[{"num":0,"value":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newinfo.action?articleid=ff8080818c64130e018c6aabf897481d"}],"unique_index":"488aw4k05kolq8sgpeo","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":1},{"nodeType":0,"contentType":0,"relative":false,"name":"title","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[1]/div[1]/h1[1]","allXPaths":["/html/body/div[2]/div[1]/div[1]/div[1]/h1[1]","//h1[contains(., '网友夏琴:激荡人力资')]","/html/body/div[last()-6]/div/div[last()-1]/div/h1"],"exampleValues":[{"num":0,"value":"网友夏琴:激荡人力资源“动能” 集聚强国建设“合力”"}],"unique_index":"i8o9o8sk6elq8shcvr","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"text","desc":"","extractType":0,"relativeXPath":"//DIV[@class='artbox']","allXPaths":["/html/body/div[2]/div[1]/div[1]/div[1]","//div[contains(., '')]","//DIV[@class='artbox']","/html/body/div[last()-6]/div/div[last()-1]/div"],"exampleValues":[{"num":0,"value":"\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t网友夏琴:激荡人力资源“动能” 集聚强国建设“合力”\n\t\t\t\t\n\t\t\t\t来源:松江区委组织部    发布时间:2023-12-15 \n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t近日,第二届全国人力资源服务业发展大会召开。人力资源是推动社会经济社会发展的第一资源,是战略性新兴产业,也是稳定和扩大就业、提高人力资源利用水平、构建现代化产业体系的重要力量。激荡人力资源“动能”,集聚强国建设“合力”,要凝心聚力、坚定信心,引才聚才、焕发活力,务实合作、优化环境,以人口高质量发展支撑中国式现代化。凝心聚力、坚定信心,促进高质量充分就业。就业是最基本的民生,促进高质量充分就业,是增进民生福祉、提高人民生活品质的根本举措。党的二十大报告提出“促进高质量充分就业”的目标要求,是党中央牢牢把握我国发展的阶段性特征对就业工作作出的重大战略部署。当前,我国人口发展面临新形势、新常态,人力资源丰富仍然是我国的突出优势,人力资源服务业发挥着重要作用。新时代新征程上,要凝心聚力、坚定信心,坚持以习近平新时代中国特色社会主义思想为指导,全面贯彻党的二十大精神,深入贯彻习近平总书记重要指示和党中央决策部署,锚定“稳就业”这一重大政治责任,强化就业优先、促进供需匹配,聚焦创新驱动、推进数字赋能,持续稳存量、扩增量、提质量,全力以赴确保就业局势总体稳定,不断夯实经济社会大局“稳固底座”。引才聚才、焕发活力,提高人力资源开发利用。完善政策体系、壮大行业规模、提高服务水平、优化产业环境……近年来,我国人力资源服务业蓬勃发展、亮点纷呈,实现健康发展,取得长足进步,人力资源质量和利用效率逐步提升,形成了“百花齐放”的良好生态。人才是引领发展的第一动力,是建设现代化经济体系的战略支撑。新时代新征程上,要筑巢引凤、引才聚才、以才吸才,不断焕发新活力、新动能。要着力培养知识型、技能型、创新型的复合型人才,持续打造现代化的人力资源体系,创造更充足的人力资本红利。要适应新经济形态变化,开发、培育、推广新职业,让人才在新时代站上更大舞台、发挥更大作用。要推动“有为政府”与“有效市场”同向发力、共同发力,强化供需匹配度、提高招聘质量水平、提升劳动参与率,激发人力资源服务业的最大效能。务实合作、优化环境,推动人口高质量发展。党的十八大以来,习近平总书记围绕人口发展发表了一系列重要论述,作出了一系列科学判断,强调“中国式现代化是人口规模巨大的现代化”。在今年召开的二十届中央财经委员会第一次会议上,习近平总书记提出了以人口高质量发展支撑中国式现代化的重要论断和工作要求。当前,我们迈上了全面建设社会主义现代化国家新征程,我国人口发展也进入了一个新的阶段。要深刻认识人口发展新形势,科学把握人口高质量发展的深刻内涵,以改革创新推动人口高质量发展,推动人民高品质生活、促进人的全面发展和全体人民共同富裕。要致力务实合作,持续优化人力资源服务业发展环境,深耕党和国家事业发展的重点领域、重点行业,一张蓝图绘到底、一茬接着一茬干,不断展现人力资源服务业新气象、开创发展新局面,为推动高质量发展贡献澎湃力量。(作者单位:松江区委统战部)\n\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t<返回>\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t上海市党建服务中心\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t"}],"unique_index":"0tq1bg6rty8lq8sg1i4","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":1}]}}]} \ No newline at end of file +{"id":296,"name":"wysdemo01","url":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","links":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","create_time":"2023/12/17 09:06:23","update_time":"12/20/2023, 4:00:03 AM","version":"0.6.0","saveThreshold":1,"quitWaitTime":40,"environment":0,"maximizeWindow":0,"maxViewLength":15,"recordLog":1,"outputFormat":"mysql","saveName":"TTT","dataWriteMode":1,"inputExcel":"","startFromExit":0,"pauseKey":"p","containJudge":false,"browser":"chrome","removeDuplicate":1,"desc":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7"},{"id":1,"name":"loopTimes_1","nodeId":2,"nodeName":"循环点击下一页","desc":"Number of loop executions for loop 循环点击下一页, 0 means unlimited loops (until element not found)","type":"int","exampleValue":0,"value":0}],"outputParameters":[{"id":0,"name":"url","desc":"","type":"text","recordASField":1,"exampleValue":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newinfo.action?articleid=ff8080818c64130e018c6aabf897481d"},{"id":1,"name":"title","desc":"","type":"text","recordASField":1,"exampleValue":"网友夏琴:激荡人力资源“动能” 集聚强国建设“合力”"},{"id":2,"name":"text","desc":"","type":"text","recordASField":1,"exampleValue":"\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t网友夏琴:激荡人力资源“动能” 集聚强国建设“合力”\n\t\t\t\t\n\t\t\t\t来源:松江区委组织部    发布时间:2023-12-15 \n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t近日,第二届全国人力资源服务业发展大会召开。人力资源是推动社会经济社会发展的第一资源,是战略性新兴产业,也是稳定和扩大就业、提高人力资源利用水平、构建现代化产业体系的重要力量。激荡人力资源“动能”,集聚强国建设“合力”,要凝心聚力、坚定信心,引才聚才、焕发活力,务实合作、优化环境,以人口高质量发展支撑中国式现代化。凝心聚力、坚定信心,促进高质量充分就业。就业是最基本的民生,促进高质量充分就业,是增进民生福祉、提高人民生活品质的根本举措。党的二十大报告提出“促进高质量充分就业”的目标要求,是党中央牢牢把握我国发展的阶段性特征对就业工作作出的重大战略部署。当前,我国人口发展面临新形势、新常态,人力资源丰富仍然是我国的突出优势,人力资源服务业发挥着重要作用。新时代新征程上,要凝心聚力、坚定信心,坚持以习近平新时代中国特色社会主义思想为指导,全面贯彻党的二十大精神,深入贯彻习近平总书记重要指示和党中央决策部署,锚定“稳就业”这一重大政治责任,强化就业优先、促进供需匹配,聚焦创新驱动、推进数字赋能,持续稳存量、扩增量、提质量,全力以赴确保就业局势总体稳定,不断夯实经济社会大局“稳固底座”。引才聚才、焕发活力,提高人力资源开发利用。完善政策体系、壮大行业规模、提高服务水平、优化产业环境……近年来,我国人力资源服务业蓬勃发展、亮点纷呈,实现健康发展,取得长足进步,人力资源质量和利用效率逐步提升,形成了“百花齐放”的良好生态。人才是引领发展的第一动力,是建设现代化经济体系的战略支撑。新时代新征程上,要筑巢引凤、引才聚才、以才吸才,不断焕发新活力、新动能。要着力培养知识型、技能型、创新型的复合型人才,持续打造现代化的人力资源体系,创造更充足的人力资本红利。要适应新经济形态变化,开发、培育、推广新职业,让人才在新时代站上更大舞台、发挥更大作用。要推动“有为政府”与“有效市场”同向发力、共同发力,强化供需匹配度、提高招聘质量水平、提升劳动参与率,激发人力资源服务业的最大效能。务实合作、优化环境,推动人口高质量发展。党的十八大以来,习近平总书记围绕人口发展发表了一系列重要论述,作出了一系列科学判断,强调“中国式现代化是人口规模巨大的现代化”。在今年召开的二十届中央财经委员会第一次会议上,习近平总书记提出了以人口高质量发展支撑中国式现代化的重要论断和工作要求。当前,我们迈上了全面建设社会主义现代化国家新征程,我国人口发展也进入了一个新的阶段。要深刻认识人口发展新形势,科学把握人口高质量发展的深刻内涵,以改革创新推动人口高质量发展,推动人民高品质生活、促进人的全面发展和全体人民共同富裕。要致力务实合作,持续优化人力资源服务业发展环境,深耕党和国家事业发展的重点领域、重点行业,一张蓝图绘到底、一茬接着一茬干,不断展现人力资源服务业新气象、开创发展新局面,为推动高质量发展贡献澎湃力量。(作者单位:松江区委统战部)\n\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t<返回>\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t上海市党建服务中心\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","links":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newestindex!news.action?catalogid=8aafb7055d1da85a015d2f538ea100e7","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环点击下一页","sequence":[4,3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[contains(@class, \"next\")]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":0,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[2]/div[2]/div[1]/div[1]/div[1]/ul[1]/li[10]/a[1]","//a[contains(., '下一页')]","/html/body/div[last()-6]/div/div[last()-1]/div/div/ul/li[last()-1]/a"]}},{"id":4,"index":3,"parentId":2,"type":0,"option":2,"title":"点击下一页","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":0,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":["/html/body/div[2]/div[2]/div[1]/div[1]/div[1]/ul[1]/li[10]/a[1]","//a[contains(., '下一页')]","/html/body/div[last()-6]/div/div[last()-1]/div/div/ul/li[last()-1]/a"]}},{"id":3,"index":4,"parentId":2,"type":1,"option":8,"title":"循环点击文章标题","sequence":[5,6],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[2]/div[2]/div[1]/div[1]/ul[1]/li/h5[1]/a[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","code":"","waitTime":0,"exitCount":0,"exitElement":"//body","historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":5,"index":5,"parentId":3,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":5,"tabIndex":-1,"useLoop":true,"xpath":"","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":2,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"newTab":1,"maxWaitTime":10,"params":[],"alertHandleType":0,"allXPaths":""}},{"id":6,"index":6,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":1.5,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"//a[contains(., '<返回>')]","waitElementTime":15,"waitElementIframeIndex":0,"clear":0,"newLine":1,"params":[{"nodeType":0,"contentType":5,"relative":false,"name":"url","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., '')]","/html/body"],"exampleValues":[{"num":0,"value":"https://www.shjcdj.cn/djWeb/djweb/web/djweb/newestindex/newinfo.action?articleid=ff8080818c64130e018c6aabf897481d"}],"unique_index":"488aw4k05kolq8sgpeo","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":1},{"nodeType":0,"contentType":0,"relative":false,"name":"title","desc":"","extractType":0,"relativeXPath":"/html/body/div[2]/div[1]/div[1]/div[1]/h1[1]","allXPaths":["/html/body/div[2]/div[1]/div[1]/div[1]/h1[1]","//h1[contains(., '网友夏琴:激荡人力资')]","/html/body/div[last()-6]/div/div[last()-1]/div/h1"],"exampleValues":[{"num":0,"value":"网友夏琴:激荡人力资源“动能” 集聚强国建设“合力”"}],"unique_index":"i8o9o8sk6elq8shcvr","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"text","desc":"","extractType":0,"relativeXPath":"//DIV[@class='artbox']","allXPaths":["/html/body/div[2]/div[1]/div[1]/div[1]","//div[contains(., '')]","//DIV[@class='artbox']","/html/body/div[last()-6]/div/div[last()-1]/div"],"exampleValues":[{"num":0,"value":"\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t网友夏琴:激荡人力资源“动能” 集聚强国建设“合力”\n\t\t\t\t\n\t\t\t\t来源:松江区委组织部    发布时间:2023-12-15 \n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t近日,第二届全国人力资源服务业发展大会召开。人力资源是推动社会经济社会发展的第一资源,是战略性新兴产业,也是稳定和扩大就业、提高人力资源利用水平、构建现代化产业体系的重要力量。激荡人力资源“动能”,集聚强国建设“合力”,要凝心聚力、坚定信心,引才聚才、焕发活力,务实合作、优化环境,以人口高质量发展支撑中国式现代化。凝心聚力、坚定信心,促进高质量充分就业。就业是最基本的民生,促进高质量充分就业,是增进民生福祉、提高人民生活品质的根本举措。党的二十大报告提出“促进高质量充分就业”的目标要求,是党中央牢牢把握我国发展的阶段性特征对就业工作作出的重大战略部署。当前,我国人口发展面临新形势、新常态,人力资源丰富仍然是我国的突出优势,人力资源服务业发挥着重要作用。新时代新征程上,要凝心聚力、坚定信心,坚持以习近平新时代中国特色社会主义思想为指导,全面贯彻党的二十大精神,深入贯彻习近平总书记重要指示和党中央决策部署,锚定“稳就业”这一重大政治责任,强化就业优先、促进供需匹配,聚焦创新驱动、推进数字赋能,持续稳存量、扩增量、提质量,全力以赴确保就业局势总体稳定,不断夯实经济社会大局“稳固底座”。引才聚才、焕发活力,提高人力资源开发利用。完善政策体系、壮大行业规模、提高服务水平、优化产业环境……近年来,我国人力资源服务业蓬勃发展、亮点纷呈,实现健康发展,取得长足进步,人力资源质量和利用效率逐步提升,形成了“百花齐放”的良好生态。人才是引领发展的第一动力,是建设现代化经济体系的战略支撑。新时代新征程上,要筑巢引凤、引才聚才、以才吸才,不断焕发新活力、新动能。要着力培养知识型、技能型、创新型的复合型人才,持续打造现代化的人力资源体系,创造更充足的人力资本红利。要适应新经济形态变化,开发、培育、推广新职业,让人才在新时代站上更大舞台、发挥更大作用。要推动“有为政府”与“有效市场”同向发力、共同发力,强化供需匹配度、提高招聘质量水平、提升劳动参与率,激发人力资源服务业的最大效能。务实合作、优化环境,推动人口高质量发展。党的十八大以来,习近平总书记围绕人口发展发表了一系列重要论述,作出了一系列科学判断,强调“中国式现代化是人口规模巨大的现代化”。在今年召开的二十届中央财经委员会第一次会议上,习近平总书记提出了以人口高质量发展支撑中国式现代化的重要论断和工作要求。当前,我们迈上了全面建设社会主义现代化国家新征程,我国人口发展也进入了一个新的阶段。要深刻认识人口发展新形势,科学把握人口高质量发展的深刻内涵,以改革创新推动人口高质量发展,推动人民高品质生活、促进人的全面发展和全体人民共同富裕。要致力务实合作,持续优化人力资源服务业发展环境,深耕党和国家事业发展的重点领域、重点行业,一张蓝图绘到底、一茬接着一茬干,不断展现人力资源服务业新气象、开创发展新局面,为推动高质量发展贡献澎湃力量。(作者单位:松江区委统战部)\n\t\t\n\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t<返回>\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t上海市党建服务中心\n\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\n\t\t\t\t\n\t\t\t"}],"unique_index":"0tq1bg6rty8lq8sg1i4","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0,"splitLine":1}]}}]} \ No newline at end of file diff --git a/ExecuteStage/.vscode/launch.json b/ExecuteStage/.vscode/launch.json index 98884a9..d02a5a8 100644 --- a/ExecuteStage/.vscode/launch.json +++ b/ExecuteStage/.vscode/launch.json @@ -12,7 +12,7 @@ "justMyCode": false, // "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"] // "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"] - "args": ["--ids", "[30]", "--headless", "0", "--user_data", "0", "--keyboard", "0", + "args": ["--ids", "[40]", "--headless", "0", "--user_data", "0", "--keyboard", "0", "--read_type", "remote"] // "args": "--ids '[97]' --user_data 1 --server_address http://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name" } diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 487e13e..64b2151 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -47,10 +47,11 @@ import requests from ddddocr import DdddOcr from urllib.parse import urljoin from lxml import etree, html + import onnxruntime onnxruntime.set_default_logger_severity(3) # 隐藏onnxruntime的日志 -# import pandas as pd +import pandas as pd # import numpy # import pytesseract # import uuid @@ -481,6 +482,26 @@ class BrowserThread(Thread): if removeDuplicateData == 1: self.print_and_log("正在去除重复数据,请稍后……") self.print_and_log("Removing duplicate data, please wait...") + if self.outputFormat == "csv" or self.outputFormat == "txt" or self.outputFormat == "json" or self.outputFormat == "xlsx": + file_name = "Data/Task_" + \ + str(self.id) + "/" + self.saveName + \ + '.' + self.outputFormat + if self.outputFormat == "csv" or self.outputFormat == "txt": + df = pd.read_csv(file_name) + df.drop_duplicates(inplace=True) + df.to_csv(file_name, index=False) + elif self.outputFormat == "xlsx": + df = pd.read_excel(file_name) + df.drop_duplicates(inplace=True) + df.to_excel(file_name, index=False) + elif self.outputFormat == "json": + df = pd.read_json(file_name) + df.drop_duplicates(inplace=True) + df.to_json(file_name, orient="records", force_ascii=False) + elif self.outputFormat == "mysql": + self.mysql.remove_duplicate_data() + self.print_and_log("去重完成。") + self.print_and_log("Duplicate data removed.") def run(self): # 挨个执行程序 @@ -497,13 +518,13 @@ class BrowserThread(Thread): self.print_and_log("Done!") self.print_and_log("执行完成!") self.saveData(exit=True) + self.removeDuplicateData() if self.outputFormat == "mysql": self.mysql.close() try: quitWaitTime = self.service["quitWaitTime"] except: quitWaitTime = 60 - self.removeDuplicateData() self.print_and_log(f"任务执行完毕,将在{quitWaitTime}秒后自动退出浏览器并清理临时用户目录,等待时间可在保存任务对话框中设置。") self.print_and_log(f"The task is completed, the browser will exit automatically and the temporary user directory will be cleaned up after {quitWaitTime} seconds, the waiting time can be set in the save task dialog.") time.sleep(quitWaitTime) diff --git a/ExecuteStage/utils.py b/ExecuteStage/utils.py index ec9b000..01d2e10 100644 --- a/ExecuteStage/utils.py +++ b/ExecuteStage/utils.py @@ -556,6 +556,10 @@ class myMySQL: sql = "CREATE TABLE " + table_name + \ " (_id INT AUTO_INCREMENT PRIMARY KEY, " for item in parameters: + try: + recordASField = item["recordASField"] + except: + item["recordASField"] = True if item["recordASField"]: name = item['name'] if item['type'] == 'int': @@ -669,6 +673,25 @@ class myMySQL: # 关闭游标和连接 self.cursor.close() + def remove_duplicate_data(self): + self.cursor = self.conn.cursor() + # 删除重复数据 + fields = self.field_sql.replace("(", "").replace(")", "") + sql = f"CREATE TABLE {self.table_name}_temp AS " + \ + f"SELECT MIN(_id) AS _id, " + fields + \ + f" FROM {self.table_name} GROUP BY " + fields + ";" + self.cursor.execute(sql) + sql = f"DELETE FROM {self.table_name};" + self.cursor.execute(sql) + sql = f"INSERT INTO {self.table_name} SELECT * FROM {self.table_name}_temp;" + self.cursor.execute(sql) + sql = f"DROP TABLE {self.table_name}_temp;" + self.cursor.execute(sql) + # 提交到数据库执行 + self.conn.commit() + # 关闭游标和连接 + self.cursor.close() + def close(self): try: self.conn.close()