新增下载图片功能

2025-04-19 08:04:45 +08:00 · 2023-05-20 20:44:34 +08:00 · 2023-05-20 20:44:34 +08:00 · 42db55deb8
commit 42db55deb8
parent 9774592910
23 changed files with 579 additions and 153 deletions
--- a/ElectronJS/.gitignore
+++ b/ElectronJS/.gitignore
@ -10,3 +10,7 @@ user_data/
 Data/
 Chrome/
 execution_instances/*
+EasySpider_en.crx
+EasySpider_zh.crx
+.DS_Store
+npminstall-debug.log
--- a/ElectronJS/EasySpider_en.crx
+++ b/ElectronJS/EasySpider_en.crx
--- a/ElectronJS/EasySpider_zh.crx
+++ b/ElectronJS/EasySpider_zh.crx
--- a/ElectronJS/src/taskGrid/FlowChart_CN.html
+++ b/ElectronJS/src/taskGrid/FlowChart_CN.html
@ -235,6 +235,13 @@
                            <option :value = 3>表单值</option>
                            <option :value = 4>图片地址</option>
                        </select>
+                        <div v-if='paras.parameters[paraIndex]["nodeType"] == 4'>
+                            <label>提取图片地址后是否同时下载图片</label>
+                            <select v-model='paras.parameters[paraIndex]["downloadPic"]' class="form-control">
+                                <option :value = 0>否</option>
+                                <option :value = 1>是</option>
+                            </select>
+                        </div>
 <!--                        <label>提取方式</label>-->
 <!--                        <select v-model='paras.parameters[paraIndex]["extractType"]' class="form-control">-->
 <!--                            <option :value = 0>普通提取</option>-->
@ -390,8 +397,8 @@
                        <input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
                    </div>
                    <div v-else-if='TClass == 7'>
-                        <label>代码/脚本内容: </label>
-                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="输入针对该循环项的JS命令，该循环项用arguments[0]表示，返回值大于0或为真则执行此分支内操作，否则不执行。如：return arguments[0].innerText.indexOf('123') >=0 即判断当前循环项的文本是否包含123，注意要配合循环类型为元素相关（如不固定元素列表）使用。"></textarea>
+                        <label>代码/脚本内容（<a href="https://github.com/NaiboWang/EasySpider/wiki/Example-of-JavaScript-instruction-for-the-current-iteration-in-a-conditional-statement" target="_blank">点击此处</a>查看更多示例）: </label>
+                        <textarea onkeydown="inputDelete(event)" class="form-control" rows="3" v-model='nowNode["parameters"]["code"]' placeholder="输入针对该循环项的JS命令，该循环项用arguments[0]表示，返回值大于0或为真则执行此分支内操作，否则不执行。如：return arguments[0].innerText.length >=5 即判断当前循环项的文本长度是否大于5，注意要配合循环类型为元素相关（如不固定元素列表）使用。"></textarea>
                        <label>最长等待脚本执行时间（0代表无限等待）: </label>
                        <input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
                    </div>
--- a/ElectronJS/src/taskGrid/logic_CN.js
+++ b/ElectronJS/src/taskGrid/logic_CN.js
@ -44,6 +44,7 @@ function changeGetDataParameters(msg, i) {
    msg["parameters"][i]["JSWaitTime"] = 0; //JS等待时间
    msg["parameters"][i]["afterJS"] = ""; //执行后执行的js
    msg["parameters"][i]["afterJSWaitTime"] = 0; //执行后js等待时间
+    msg["parameters"][i]["downloadPic"] = 0; //是否下载图片
 }

 function handleAddElement(msg) {
--- a/ElectronJS/tasks/57.json
+++ b/ElectronJS/tasks/57.json
@ -0,0 +1,335 @@
+{
+    "id": 57,
+    "name": "图片下载",
+    "url": "https://www.jd.com",
+    "links": "https://www.jd.com",
+    "create_time": "5/20/2023, 8:18:15 PM",
+    "containJudge": false,
+    "desc": "https://www.jd.com",
+    "inputParameters": [
+        {
+            "id": 0,
+            "name": "urlList_0",
+            "nodeId": 1,
+            "nodeName": "打开网页",
+            "value": "https://www.jd.com",
+            "desc": "要采集的网址列表，多行以\\n分开",
+            "type": "string",
+            "exampleValue": "https://www.jd.com"
+        }
+    ],
+    "outputParameters": [
+        {
+            "id": 0,
+            "name": "参数3_图片地址",
+            "desc": "",
+            "type": "string",
+            "exampleValue": "//m.360buyimg.com/babel/jfs/t1/223646/1/18719/254758/6458a465F7a57af84/f44d7d983018d9ed.png"
+        }
+    ],
+    "graph": [
+        {
+            "index": 0,
+            "id": 0,
+            "parentId": 0,
+            "type": -1,
+            "option": 0,
+            "title": "root",
+            "sequence": [
+                1,
+                4
+            ],
+            "parameters": {
+                "history": 1,
+                "tabIndex": 0,
+                "useLoop": false,
+                "xpath": "",
+                "wait": 0
+            },
+            "isInLoop": false
+        },
+        {
+            "id": 1,
+            "index": 1,
+            "parentId": 0,
+            "type": 0,
+            "option": 1,
+            "title": "打开网页",
+            "sequence": [],
+            "isInLoop": false,
+            "position": 0,
+            "parameters": {
+                "useLoop": false,
+                "xpath": "",
+                "wait": 0,
+                "beforeJS": "",
+                "beforeJSWaitTime": 0,
+                "afterJS": "",
+                "afterJSWaitTime": 0,
+                "url": "https://www.jd.com",
+                "links": "https://www.jd.com",
+                "maxWaitTime": 10,
+                "scrollType": 0,
+                "scrollCount": 0
+            }
+        },
+        {
+            "id": -1,
+            "index": 2,
+            "parentId": 0,
+            "type": 1,
+            "option": 8,
+            "title": "循环",
+            "sequence": [
+                3
+            ],
+            "isInLoop": false,
+            "position": 1,
+            "parameters": {
+                "history": 4,
+                "tabIndex": -1,
+                "useLoop": false,
+                "xpath": "/html/body/div[4]/div[1]/div[4]/a",
+                "wait": 0,
+                "beforeJS": "",
+                "beforeJSWaitTime": 0,
+                "afterJS": "",
+                "afterJSWaitTime": 0,
+                "scrollType": 0,
+                "scrollCount": 0,
+                "loopType": 1,
+                "pathList": "",
+                "textList": "",
+                "code": "",
+                "waitTime": 0,
+                "exitCount": 0,
+                "historyWait": 2,
+                "allXPaths": [
+                    "/html/body/div[4]/div[1]/div[4]/a[1]",
+                    "//a[contains(., '平板電腦')]"
+                ]
+            }
+        },
+        {
+            "id": -1,
+            "index": 3,
+            "parentId": 2,
+            "type": 0,
+            "option": 3,
+            "title": "提取数据",
+            "sequence": [],
+            "isInLoop": true,
+            "position": 0,
+            "parameters": {
+                "history": 4,
+                "tabIndex": -1,
+                "useLoop": false,
+                "xpath": "",
+                "wait": 0,
+                "beforeJS": "",
+                "beforeJSWaitTime": 0,
+                "afterJS": "",
+                "afterJSWaitTime": 0,
+                "paras": [
+                    {
+                        "nodeType": 1,
+                        "contentType": 0,
+                        "relative": true,
+                        "name": "参数1_链接文本",
+                        "desc": "",
+                        "extractType": 0,
+                        "relativeXPath": "",
+                        "allXPaths": "",
+                        "exampleValues": [
+                            {
+                                "num": 0,
+                                "value": "平板電腦"
+                            },
+                            {
+                                "num": 1,
+                                "value": "爆款耳機"
+                            },
+                            {
+                                "num": 2,
+                                "value": "手機"
+                            },
+                            {
+                                "num": 3,
+                                "value": "數據線"
+                            },
+                            {
+                                "num": 4,
+                                "value": "年貨節"
+                            }
+                        ],
+                        "default": "",
+                        "beforeJS": "",
+                        "beforeJSWaitTime": 0,
+                        "JS": "",
+                        "JSWaitTime": 0,
+                        "afterJS": "",
+                        "afterJSWaitTime": 0,
+                        "downloadPic": 0
+                    },
+                    {
+                        "nodeType": 2,
+                        "contentType": 0,
+                        "relative": true,
+                        "name": "参数2_链接地址",
+                        "desc": "",
+                        "relativeXPath": "",
+                        "allXPaths": "",
+                        "exampleValues": [
+                            {
+                                "num": 0,
+                                "value": "https://search.jd.com/Search?keyword=%E5%B9%B3%E6%9D%BF%E7%94%B5%E8%84%91&enc=utf-8&wq=%E5%B9%B3%E6%9D%BF%E7%94%B5%E8%84%91&pvid=84c62205dccd43dfad1b6eb5fdf5077b"
+                            },
+                            {
+                                "num": 1,
+                                "value": "https://audio.jd.com/"
+                            },
+                            {
+                                "num": 2,
+                                "value": "https://search.jd.com/search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&wq=%E6%89%8B%E6%9C%BA&cid2=653&cid3=655&ev=exbrand_%E5%B0%8F%E7%B1%B3%EF%BC%88MI%EF%BC%89%5E&uc=0#J_searchWrap"
+                            },
+                            {
+                                "num": 3,
+                                "value": "https://mall.jd.com/index-1000007418.html"
+                            },
+                            {
+                                "num": 4,
+                                "value": "https://pro.jd.com/mall/active/22WyJjMqTCbvjj1YB3pSJssBonLR/index.html"
+                            }
+                        ],
+                        "default": "",
+                        "beforeJS": "",
+                        "beforeJSWaitTime": 0,
+                        "JS": "",
+                        "JSWaitTime": 0,
+                        "afterJS": "",
+                        "afterJSWaitTime": 0,
+                        "downloadPic": 0
+                    }
+                ],
+                "loopType": 1
+            }
+        },
+        {
+            "id": 2,
+            "index": 4,
+            "parentId": 0,
+            "type": 1,
+            "option": 8,
+            "title": "循环",
+            "sequence": [
+                5
+            ],
+            "isInLoop": false,
+            "position": 1,
+            "parameters": {
+                "history": 4,
+                "tabIndex": -1,
+                "useLoop": false,
+                "xpath": "/html/body/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div/div[1]/div[1]/a[1]/img[1]",
+                "wait": 0,
+                "beforeJS": "",
+                "beforeJSWaitTime": 0,
+                "afterJS": "",
+                "afterJSWaitTime": 0,
+                "scrollType": 0,
+                "scrollCount": 0,
+                "loopType": 1,
+                "pathList": "",
+                "textList": "",
+                "code": "",
+                "waitTime": 0,
+                "exitCount": 0,
+                "historyWait": 2,
+                "allXPaths": [
+                    "/html/body/div[5]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/a[1]/img[1]",
+                    "//img[contains(., '')]"
+                ]
+            }
+        },
+        {
+            "id": 3,
+            "index": 5,
+            "parentId": 2,
+            "type": 0,
+            "option": 3,
+            "title": "提取数据",
+            "sequence": [],
+            "isInLoop": true,
+            "position": 0,
+            "parameters": {
+                "history": 4,
+                "tabIndex": -1,
+                "useLoop": false,
+                "xpath": "",
+                "wait": 0,
+                "beforeJS": "",
+                "beforeJSWaitTime": 0,
+                "afterJS": "",
+                "afterJSWaitTime": 0,
+                "paras": [
+                    {
+                        "nodeType": 4,
+                        "contentType": 0,
+                        "relative": true,
+                        "name": "参数3_图片地址",
+                        "desc": "",
+                        "extractType": 0,
+                        "relativeXPath": "",
+                        "allXPaths": "",
+                        "exampleValues": [
+                            {
+                                "num": 0,
+                                "value": "//m.360buyimg.com/babel/jfs/t1/223646/1/18719/254758/6458a465F7a57af84/f44d7d983018d9ed.png"
+                            },
+                            {
+                                "num": 1,
+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/194401/20/32669/76553/64142a96F7733e6ad/cf2727848c86cf45.jpg!q70.dpg"
+                            },
+                            {
+                                "num": 2,
+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/222655/28/27238/153145/644b858eF2cd1200f/e37bd7da42a814b0.jpg!q70.dpg"
+                            },
+                            {
+                                "num": 3,
+                                "value": "//m.360buyimg.com/babel/s710x370_jfs/t1/197659/30/31344/62825/640fd751F694963ed/a6e1ac2e5c27f160.jpg!q70.dpg"
+                            },
+                            {
+                                "num": 4,
+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/194401/20/32669/76553/64142a96F7733e6ad/cf2727848c86cf45.jpg!q70.dpg"
+                            },
+                            {
+                                "num": 5,
+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/222655/28/27238/153145/644b858eF2cd1200f/e37bd7da42a814b0.jpg!q70.dpg"
+                            },
+                            {
+                                "num": 6,
+                                "value": "//m.360buyimg.com/babel/s710x370_jfs/t1/197659/30/31344/62825/640fd751F694963ed/a6e1ac2e5c27f160.jpg!q70.dpg"
+                            },
+                            {
+                                "num": 7,
+                                "value": "//m.360buyimg.com/babel/jfs/t1/223646/1/18719/254758/6458a465F7a57af84/f44d7d983018d9ed.png"
+                            },
+                            {
+                                "num": 8,
+                                "value": "//m.360buyimg.com/babel/s1420x740_jfs/t1/194401/20/32669/76553/64142a96F7733e6ad/cf2727848c86cf45.jpg!q70.dpg"
+                            }
+                        ],
+                        "default": "",
+                        "beforeJS": "",
+                        "beforeJSWaitTime": 0,
+                        "JS": "",
+                        "JSWaitTime": 0,
+                        "afterJS": "",
+                        "afterJSWaitTime": 0,
+                        "downloadPic": 1
+                    }
+                ]
+            }
+        }
+    ]
+}
--- a/ElectronJS/tasks/58.json
+++ b/ElectronJS/tasks/58.json
@ -0,0 +1 @@
+{"id":58,"name":"新web采集任务","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/20/2023, 8:35:56 PM","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表，多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_图片地址","desc":"","type":"string","exampleValue":"//m.360buyimg.com/babel/jfs/t1/81488/28/23346/102165/63b41485F7ecc4f22/be5cee8cf04d7e16.png"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[3]/div[1]/div[1]/div[2]/ul[1]/li/a[1]/img[1]","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":0,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"allXPaths":["/html/body/div[5]/div[1]/div[3]/div[1]/div[1]/div[2]/ul[1]/li[1]/a[1]/img[1]","//img[contains(., '')]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数1_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"//m.360buyimg.com/babel/jfs/t1/81488/28/23346/102165/63b41485F7ecc4f22/be5cee8cf04d7e16.png"},{"num":1,"value":"//m.360buyimg.com/babel/jfs/t1/93200/23/34752/53589/63b4148cF5150739b/5d0dc855fe43ca85.png"},{"num":2,"value":"//m.360buyimg.com/babel/jfs/t1/54690/10/22629/29568/63b41496Fad92ac75/605f4fe1c473192c.png"},{"num":3,"value":"//m.360buyimg.com/babel/jfs/t1/53202/29/23281/117684/63b4149dF2beb8956/ac86841f42a75cd0.png"},{"num":4,"value":"//m.360buyimg.com/babel/jfs/t1/90039/40/25105/65438/63b414a5Fb09a6926/740fb4daeef82c57.png"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":1}],"loopType":1}}]}
--- a/ElectronJS/tasks/59.json
+++ b/ElectronJS/tasks/59.json
--- a/ExecuteStage/.gitignore
+++ b/ExecuteStage/.gitignore
@ -11,4 +11,5 @@ Data/
 tasks/
 Application/
 .history
-execution_instances/
+execution_instances/
+.DS_Store
--- a/ExecuteStage/.vscode/launch.json
+++ b/ExecuteStage/.vscode/launch.json
@ -12,7 +12,7 @@
            "console": "integratedTerminal",
            "justMyCode": true,
            // "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
-            "args": ["--id", "10", "--headless", "0"]
+            "args": ["--id", "15", "--headless", "0"]
        }
    ]
 }
--- a/ExecuteStage/easyspider_executestage.py
+++ b/ExecuteStage/easyspider_executestage.py
@ -30,7 +30,7 @@ from selenium.webdriver.common.by import By
 from commandline_config import Config
 import pytesseract
 from PIL import Image
-
+import uuid

 saveName, log, OUTPUT, browser, SAVED = None, "", "", None, False

@ -65,6 +65,38 @@ def Log(text, text2=""):
 # 屏幕滚动函数


+
+def download_image(url, save_directory):
+    # 定义浏览器头信息
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+    }
+    
+    # 发送 GET 请求获取图片数据
+    response = requests.get(url, headers=headers)
+
+    # 检查响应状态码是否为成功状态
+    if response.status_code == requests.codes.ok:
+        # 提取文件名
+        file_name = url.split('/')[-1]
+        
+        # 生成唯一的新文件名
+        new_file_name = str(uuid.uuid4()) + '_' + file_name
+        
+        # 构建保存路径
+        save_path = os.path.join(save_directory, new_file_name)
+        
+        # 保存图片到本地
+        with open(save_path, 'wb') as file:
+            file.write(response.content)
+        
+        print("图片已成功下载到:", save_path)
+        print("The image has been successfully downloaded to:", save_path)
+    else:
+        print("下载图片失败，请检查此图片链接是否有效:", url)
+        print("Failed to download image, please check if this image link is valid:", url)
+
+
 def scrollDown(para, rt=""):
    try:
        if para["scrollType"] != 0 and para["scrollCount"] > 0:  # 控制屏幕向下滚动
@ -180,6 +212,7 @@ def executeNode(nodeId, loopValue="", clickPath="", index=0):
        inputInfo(node["parameters"], loopValue)
    elif node["option"] == 5:  # 自定义操作
        customOperation(node, loopValue)
+        saveData()
    elif node["option"] == 8:  # 循环
        recordLog("loop")
        loopExcute(node, loopValue, clickPath, index)  # 执行循环
@ -644,8 +677,8 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
                recordLog('Element %s not found, use default' % p["relativeXPath"])
                continue
            except TimeoutException:  # 超时的时候设置超时值
-                Log('time out after 10 seconds when getting data')
-                recordLog('time out after 10 seconds when getting data')
+                Log('time out after set seconds when getting data')
+                recordLog('time out after set seconds when getting data')
                browser.execute_script('window.stop()')
                if p["relative"]:  # 是否相对xpath
                    if p["relativeXPath"] == "":  # 相对xpath有时候就是元素本身，不需要二次查找
@ -660,104 +693,44 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
            element = browser.find_element(By.XPATH, "//body")
        try:
            execute_code(2, p["beforeJS"], p["beforeJSWaitTime"], element) # 执行前置js
-            if p["contentType"] == 2:
-                content = element.get_attribute('innerHTML')
-            elif p["contentType"] == 3:
-                content = element.get_attribute('outerHTML')
-            elif p["contentType"] == 4:
-                # 获取元素的背景图片地址
-                bg_url = element.value_of_css_property('background-image')
-                # 清除背景图片地址中的多余字符
-                bg_url = bg_url.replace('url("', '').replace('")', '')
-                content = bg_url
-            elif p["contentType"] == 5:
-                content = browser.current_url
-            elif p["contentType"] == 6:
-                content = browser.title
-            elif p["contentType"] == 7:
-                # 获取整个网页的高度和宽度
-                height = browser.execute_script("return document.body.scrollHeight");
-                width = browser.execute_script("return document.body.scrollWidth");
-                # 调整浏览器窗口的大小
-                browser.set_window_size(width, height)
-                element.screenshot("Data/" +saveName + "/"+ str(time.time()) + ".png")
-            elif p["contentType"] == 8:
-                try:
-                    screenshot = element.screenshot_as_png
-                    screenshot_stream = io.BytesIO(screenshot)
-                    # 使用Pillow库打开截图，并转换为灰度图像
-                    image = Image.open(screenshot_stream).convert('L')
-                    # 使用Tesseract OCR引擎识别图像中的文本
-                    text = pytesseract.image_to_string(image,  lang='chi_sim+eng')
-                    content = text
-                except Exception as e:
-                    content = "OCR Error"
-                    print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH: https://tesseract-ocr.github.io/tessdoc/Installation.html")
-                    print("要使用OCR识别功能，你需要安装Tesseract-OCR并将其添加到环境变量PATH中：https://blog.csdn.net/u010454030/article/details/80515501")
-            elif p["contentType"] == 9:
-                content = execute_code(2, p["JS"], p["JSWaitTime"], element)
-            elif p["contentType"] == 1:  # 只采集当期元素下的文本，不包括子元素
-                command = 'var arr = [];\
-                var content = arguments[0];\
-                for(var i = 0, len = content.childNodes.length; i < len; i++) {\
-                    if(content.childNodes[i].nodeType === 3){  \
-                        arr.push(content.childNodes[i].nodeValue);\
-                    }\
-                }\
-                var str = arr.join(" "); \
-                return str;'
-                content = browser.execute_script(command, element).replace(
-                    "\n", "").replace("\\s+", " ")
-                if p["nodeType"] == 2:
-                    if element.get_attribute("href") != None:
-                        content = element.get_attribute("href")
-                    else:
-                        content = ""
-                elif p["nodeType"] == 3:
-                    if element.get_attribute("value") != None:
-                        content = element.get_attribute("value")
-                    else:
-                        content = ""
-                elif p["nodeType"] == 4:  # 图片
-                    if element.get_attribute("src") != None:
-                        content = element.get_attribute("src")
-                    else:
-                        content = ""
-            elif p["contentType"] == 0:
-                content = element.text
-                if p["nodeType"] == 2:
-                    if element.get_attribute("href") != None:
-                        content = element.get_attribute("href")
-                    else:
-                        content = ""
-                elif p["nodeType"] == 3:
-                    if element.get_attribute("value") != None:
-                        content = element.get_attribute("value")
-                    else:
-                        content = ""
-                elif p["nodeType"] == 4:  # 图片
-                    if element.get_attribute("src") != None:
-                        content = element.get_attribute("src")
-                    else:
-                        content = ""
-        except StaleElementReferenceException:  # 发生找不到元素的异常后，等待几秒重新查找
-            recordLog('StaleElementReferenceException：'+p["relativeXPath"])
-            time.sleep(3)
-            try:
-                if p["relative"]:  # 是否相对xpath
-                    if p["relativeXPath"] == "":  # 相对xpath有时候就是元素本身，不需要二次查找
-                        element = loopElement
-                        recordLog('StaleElementReferenceException：loopElement')
-                    else:
-                        element = loopElement.find_element(By.XPATH,
-                                                           p["relativeXPath"][1:])
-                        recordLog(
-                            'StaleElementReferenceException：loopElement+relativeXPath')
+            # 先处理特殊节点类型
+            if p["nodeType"] == 2:
+                if element.get_attribute("href") != None:
+                    content = element.get_attribute("href")
                else:
-                    element = browser.find_element(
-                        By.XPATH, p["relativeXPath"])
-                    recordLog('StaleElementReferenceException：relativeXPath')
-                if p["contentType"] == 2:
+                    content = ""
+            elif p["nodeType"] == 3:
+                if element.get_attribute("value") != None:
+                    content = element.get_attribute("value")
+                else:
+                    content = ""
+            elif p["nodeType"] == 4:  # 图片
+                if element.get_attribute("src") != None:
+                    content = element.get_attribute("src")
+                else:
+                    content = ""
+                try:
+                    downloadPic = p["downloadPic"]
+                except:
+                    downloadPic = 0
+                if downloadPic == 1:
+                    download_image(content, "Data/" +saveName + "/")
+            else: # 普通节点
+                if p["contentType"] == 0:
+                    content = element.text
+                elif p["contentType"] == 1:  # 只采集当期元素下的文本，不包括子元素
+                    command = 'var arr = [];\
+                    var content = arguments[0];\
+                    for(var i = 0, len = content.childNodes.length; i < len; i++) {\
+                        if(content.childNodes[i].nodeType === 3){  \
+                            arr.push(content.childNodes[i].nodeValue);\
+                        }\
+                    }\
+                    var str = arr.join(" "); \
+                    return str;'
+                    content = browser.execute_script(command, element).replace(
+                        "\n", "").replace("\\s+", " ")
+                elif p["contentType"] == 2:
                    content = element.get_attribute('innerHTML')
                elif p["contentType"] == 3:
                    content = element.get_attribute('outerHTML')
@ -788,55 +761,101 @@ def getData(para, loopElement, isInLoop=True, parentPath="", index=0):
                        text = pytesseract.image_to_string(image,  lang='chi_sim+eng')
                        content = text
                    except Exception as e:
-                        content = "OCR失败"
-                        print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable path: https://tesseract-ocr.github.io/tessdoc/Installation.html")
-                        print("要使用OCR识别功能，你需要安装Tesseract-OCR并将其添加到环境变量path中：")
+                        content = "OCR Error"
+                        print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH: https://tesseract-ocr.github.io/tessdoc/Installation.html")
+                        print("要使用OCR识别功能，你需要安装Tesseract-OCR并将其添加到环境变量PATH中：https://blog.csdn.net/u010454030/article/details/80515501")
                elif p["contentType"] == 9:
                    content = execute_code(2, p["JS"], p["JSWaitTime"], element)
-                elif p["contentType"] == 1:  # 只采集当期元素下的文本，不包括子元素
-                    command = 'var arr = [];\
-                    var content = arguments[0];\
-                    for(var i = 0, len = content.childNodes.length; i < len; i++) {\
-                        if(content.childNodes[i].nodeType === 3){  \
-                            arr.push(content.childNodes[i].nodeValue);\
+        except StaleElementReferenceException:  # 发生找不到元素的异常后，等待几秒重新查找
+            recordLog('StaleElementReferenceException：'+p["relativeXPath"])
+            time.sleep(3)
+            try:
+                if p["relative"]:  # 是否相对xpath
+                    if p["relativeXPath"] == "":  # 相对xpath有时候就是元素本身，不需要二次查找
+                        element = loopElement
+                        recordLog('StaleElementReferenceException：loopElement')
+                    else:
+                        element = loopElement.find_element(By.XPATH,
+                                                           p["relativeXPath"][1:])
+                        recordLog(
+                            'StaleElementReferenceException：loopElement+relativeXPath')
+                else:
+                    element = browser.find_element(
+                        By.XPATH, p["relativeXPath"])
+                    recordLog('StaleElementReferenceException：relativeXPath')
+                # 先处理特殊节点类型
+                if p["nodeType"] == 2:
+                    if element.get_attribute("href") != None:
+                        content = element.get_attribute("href")
+                    else:
+                        content = ""
+                elif p["nodeType"] == 3:
+                    if element.get_attribute("value") != None:
+                        content = element.get_attribute("value")
+                    else:
+                        content = ""
+                elif p["nodeType"] == 4:  # 图片
+                    if element.get_attribute("src") != None:
+                        content = element.get_attribute("src")
+                    else:
+                        content = ""
+                    try:
+                        downloadPic = p["downloadPic"]
+                    except:
+                        downloadPic = 0
+                    if downloadPic == 1:
+                        download_image(content, "Data/" +saveName + "/")
+                else: # 普通节点
+                    if p["contentType"] == 0:
+                        content = element.text
+                    elif p["contentType"] == 1:  # 只采集当期元素下的文本，不包括子元素
+                        command = 'var arr = [];\
+                        var content = arguments[0];\
+                        for(var i = 0, len = content.childNodes.length; i < len; i++) {\
+                            if(content.childNodes[i].nodeType === 3){  \
+                                arr.push(content.childNodes[i].nodeValue);\
+                            }\
                        }\
-                    }\
-                    var str = arr.join(" "); \
-                    return str;'
-                    content = browser.execute_script(command, element).replace(
-                        "\n", "").replace("\\s+", " ")
-                    if p["nodeType"] == 2:
-                        if element.get_attribute("href") != None:
-                            content = element.get_attribute("href")
-                        else:
-                            content = ""
-                    elif p["nodeType"] == 3:
-                        if element.get_attribute("value") != None:
-                            content = element.get_attribute("value")
-                        else:
-                            content = ""
-                    elif p["nodeType"] == 4:  # 图片
-                        if element.get_attribute("src") != None:
-                            content = element.get_attribute("src")
-                        else:
-                            content = ""
-                elif p["contentType"] == 0:
-                    content = element.text
-                    if p["nodeType"] == 2:
-                        if element.get_attribute("href") != None:
-                            content = element.get_attribute("href")
-                        else:
-                            content = ""
-                    elif p["nodeType"] == 3:
-                        if element.get_attribute("value") != None:
-                            content = element.get_attribute("value")
-                        else:
-                            content = ""
-                    elif p["nodeType"] == 4:  # 图片
-                        if element.get_attribute("src") != None:
-                            content = element.get_attribute("src")
-                        else:
-                            content = ""
+                        var str = arr.join(" "); \
+                        return str;'
+                        content = browser.execute_script(command, element).replace(
+                            "\n", "").replace("\\s+", " ")
+                    elif p["contentType"] == 2:
+                        content = element.get_attribute('innerHTML')
+                    elif p["contentType"] == 3:
+                        content = element.get_attribute('outerHTML')
+                    elif p["contentType"] == 4:
+                        # 获取元素的背景图片地址
+                        bg_url = element.value_of_css_property('background-image')
+                        # 清除背景图片地址中的多余字符
+                        bg_url = bg_url.replace('url("', '').replace('")', '')
+                        content = bg_url
+                    elif p["contentType"] == 5:
+                        content = browser.current_url
+                    elif p["contentType"] == 6:
+                        content = browser.title
+                    elif p["contentType"] == 7:
+                        # 获取整个网页的高度和宽度
+                        height = browser.execute_script("return document.body.scrollHeight");
+                        width = browser.execute_script("return document.body.scrollWidth");
+                        # 调整浏览器窗口的大小
+                        browser.set_window_size(width, height)
+                        element.screenshot("Data/" +saveName + "/"+ str(time.time()) + ".png")
+                    elif p["contentType"] == 8:
+                        try:
+                            screenshot = element.screenshot_as_png
+                            screenshot_stream = io.BytesIO(screenshot)
+                            # 使用Pillow库打开截图，并转换为灰度图像
+                            image = Image.open(screenshot_stream).convert('L')
+                            # 使用Tesseract OCR引擎识别图像中的文本
+                            text = pytesseract.image_to_string(image,  lang='chi_sim+eng')
+                            content = text
+                        except Exception as e:
+                            content = "OCR Error"
+                            print("To use OCR, You need to install Tesseract-OCR and add it to the environment variable PATH: https://tesseract-ocr.github.io/tessdoc/Installation.html")
+                            print("要使用OCR识别功能，你需要安装Tesseract-OCR并将其添加到环境变量PATH中：https://blog.csdn.net/u010454030/article/details/80515501")
+                    elif p["contentType"] == 9:
+                        content = execute_code(2, p["JS"], p["JSWaitTime"], element)
            except StaleElementReferenceException:
                recordLog('StaleElementReferenceException：'+p["relativeXPath"])
                continue  # 再出现类似问题直接跳过
@ -859,7 +878,7 @@ def isnull(s):

 def saveData(exit=False):
    global saveName, log, OUTPUT, browser
-    if exit == True or len(OUTPUT) > 100: # 每100条保存一次
+    if exit == True or len(OUTPUT) >= 100: # 每100条保存一次
        with open("Data/"+saveName + '_log.txt', 'a', encoding='utf-8-sig') as file_obj:
            file_obj.write(log)
            file_obj.close()
@ -890,6 +909,7 @@ if __name__ == '__main__':
        "config_folder": "",
        "config_file_name": "config.json",
        "headless": False,
+        "version": "0.3.0",
    }
    c = Config(config)
    print(c)
--- a/Extension/manifest_v3/.gitignore
+++ b/Extension/manifest_v3/.gitignore
@ -4,3 +4,5 @@ dist
 .env
 EasySpider_en
 EasySpider_zh
+EasySpider_en.crx
+EasySpider_zh.crx
--- a/Extension/manifest_v3/EasySpider_en.crx
+++ b/Extension/manifest_v3/EasySpider_en.crx
--- a/Extension/manifest_v3/EasySpider_zh.crx
+++ b/Extension/manifest_v3/EasySpider_zh.crx
--- a/Extension/manifest_v3/src/manifest.json
+++ b/Extension/manifest_v3/src/manifest.json
@ -1,6 +1,6 @@
 {
  "name": "EasySpider",
-  "version": "0.2",
+  "version": "0.3.0",
  "description": "EasySpider's chrome extension",
  "author": "Naibo Wang",
  "manifest_version": 3,
--- a/Releases/EasySpider_windows_amd64/V0.3.0
+++ b/Releases/EasySpider_windows_amd64/V0.3.0
@ -0,0 +1,48 @@
+https://github.com/NaiboWang/EasySpider/releases/tag/v0.3.0
+
+### 强烈建议大家观看新特性讲解视频
+
+B站最新版特性视频已上传，新视频非常有用，推荐大家观看。
+
+[【重要】自定义条件判断之使用循环项内的JS命令返回值 - 第二弹](https://www.bilibili.com/video/BV1mu411x7Nn/)
+
+[如何执行自己写的JS代码和系统代码 （自定义操作）](https://www.bilibili.com/video/BV1qs4y1z7Hc/)
+
+[如何自定义循环和判断条件 - 第一弹](https://www.bilibili.com/video/BV1Ys4y1z777/)
+
+[如何对元素和网页截图及（无头模式）命令行执行指南](https://www.bilibili.com/video/BV1dV4y1z764/)
+
+[OCR识别元素内容功能](https://www.bilibili.com/video/BV1xz4y1b72D/)
+
+注意，v0.3.0版本任务task文件夹内`.json`文件和v0.2.0版本不兼容，请重新设计v0.3.0版本任务。
+
+## 更新说明
+1. 高级操作：
+ - 可以在任务流程中**执行自定义脚本**，包括在浏览器中**执行Javascript指令**以及**操作系统级别的脚本调用**并可**得到命令返回值并记录**，大大扩展了可操作空间。
+
+![image](https://github.com/NaiboWang/EasySpider/assets/30287768/06e63a06-328d-4339-b40b-2d57c94cee66)
+
+ - 在每一个操作执行前和执行后，都可以指定执行一段针对当前定位元素的JavaScript指令。
+ 
+<img src="https://github.com/NaiboWang/EasySpider/assets/30287768/dde64388-5668-40ff-951e-fb8f60655c49" height=50% width=50%> 
+
+2. **判断条件和循环条件**中同样增加了**执行自定义脚本**，并根据自定义脚本的返回值是否为真来作为条件判断和循环的判断条件，同样极大的增加了任务的可操作性。
+![image](https://github.com/NaiboWang/EasySpider/assets/30287768/9dea0564-1a1c-487d-9fa4-427c5e284796)
+3. 可同时生成多种XPath供用户选择，并**预装了XPath Helper扩展**供大家调试XPath。
+4. 增加采集元素背景图片地址，当前页面标题，当前页面URL地址功能。
+5. 增加保存元素截图功能，如要截图某元素或整个网页页面，可以用此功能（配合无头模式效果更好）。
+6. 增加下载图片功能（正式版，Beta版没有）。
+7. 增加OCR识别元素功能（使用此功能需首先自行安装Tesseract库：[https://blog.csdn.net/u010454030/article/details/80515501](https://blog.csdn.net/u010454030/article/details/80515501)）
+8. 可直接提取对元素执行JavaScript代码后的返回值，实现如正则表达式，获得元素背景颜色等功能。
+<img src="https://github.com/NaiboWang/EasySpider/assets/30287768/f6a9b5ce-63c5-4348-8967-053c21d67ef9" width=50% height=50%>
+
+9. 大幅增加使用提示和说明，使软件更易用（如增加了iframe标签的处理方式说明，各个选项的参数意义，以及循环项XPath的修改说明等等）。
+10. 执行命令时增加了如何用命令行执行任务的提示：[https://github.com/NaiboWang/EasySpider/wiki/Argument-Instruction](https://github.com/NaiboWang/EasySpider/wiki/Argument-Instruction)。
+![image](https://github.com/NaiboWang/EasySpider/assets/30287768/a9e774df-e345-4d51-b7c9-2c4dac0ec624)
+11. 增加无头模式，即无浏览器界面模式配置。
+12. 修复了使用用户配置浏览器模式下的中文路径不能正确识别的问题。
+13. 修复了条件分支没有无条件分支时会卡死的问题。
+14. 修复了保存任务后会输入框卡死的问题。
+15. 打开网页操作和点击元素操作新增设置页面最长加载等待时间。
+16. 增加版本更新提示。
+17. 更新chrome版本为113。
--- a/Releases/EasySpider_windows_amd64/config.json
+++ b/Releases/EasySpider_windows_amd64/config.json
@ -1 +1 @@
-{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\Releases\\EasySpider_windows_amd64\\user_data1"}
+{"webserver_address":"http://localhost","webserver_port":8074,"user_data_folder":"./user_data12","absolute_user_data_folder":"D:\\Documents\\Projects\\EasySpider\\Releases\\EasySpider_windows_amd64\\user_data1"}
--- a/Releases/EasySpider_windows_amd64/execution_instances/0.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/0.json
--- a/Releases/EasySpider_windows_amd64/execution_instances/1.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/1.json
--- a/Releases/EasySpider_windows_amd64/execution_instances/2.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/2.json
--- a/Releases/EasySpider_windows_amd64/execution_instances/3.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/3.json
--- a/Releases/EasySpider_windows_amd64/execution_instances/4.json
+++ b/Releases/EasySpider_windows_amd64/execution_instances/4.json
--- a/Releases/EasySpider_windows_amd64/tasks/49.json
+++ b/Releases/EasySpider_windows_amd64/tasks/49.json
				`@ -0,0 +1 @@`
				{"id":58,"name":"新web采集任务","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/20/2023, 8:35:56 PM","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表，多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_图片地址","desc":"","type":"string","exampleValue":"//m.360buyimg.com/babel/jfs/t1/81488/28/23346/102165/63b41485F7ecc4f22/be5cee8cf04d7e16.png"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":0}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[3]/div[1]/div[1]/div[2]/ul[1]/li/a[1]/img[1]","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":0,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"allXPaths":["/html/body/div[5]/div[1]/div[3]/div[1]/div[1]/div[2]/ul[1]/li[1]/a[1]/img[1]","//img[contains(., '')]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数1_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"//m.360buyimg.com/babel/jfs/t1/81488/28/23346/102165/63b41485F7ecc4f22/be5cee8cf04d7e16.png"},{"num":1,"value":"//m.360buyimg.com/babel/jfs/t1/93200/23/34752/53589/63b4148cF5150739b/5d0dc855fe43ca85.png"},{"num":2,"value":"//m.360buyimg.com/babel/jfs/t1/54690/10/22629/29568/63b41496Fad92ac75/605f4fe1c473192c.png"},{"num":3,"value":"//m.360buyimg.com/babel/jfs/t1/53202/29/23281/117684/63b4149dF2beb8956/ac86841f42a75cd0.png"},{"num":4,"value":"//m.360buyimg.com/babel/jfs/t1/90039/40/25105/65438/63b414a5Fb09a6926/740fb4daeef82c57.png"}],"default":"","beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":1}],"loopType":1}}]}