EasySpider/ExecuteStage/Program.ipynb
NaiboWang-Alienware f125db1f8e New version
2022-10-19 15:33:12 +08:00

911 lines
54 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 服务包装手动版工具执行阶段"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## 导入包"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [],
"source": [
"# -*- coding: utf-8 -*-\n",
"import json\n",
"import re\n",
"from urllib import parse\n",
"import base64\n",
"import hashlib\n",
"import time\n",
"import requests\n",
"from selenium.webdriver.common.keys import Keys\n",
"from selenium.webdriver.common.action_chains import ActionChains\n",
"from selenium import webdriver\n",
"from selenium.webdriver.support.ui import WebDriverWait\n",
"from selenium.webdriver.support import expected_conditions as EC\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.common.exceptions import NoSuchElementException\n",
"from selenium.common.exceptions import TimeoutException\n",
"from selenium.common.exceptions import StaleElementReferenceException\n",
"import random\n",
"import numpy\n",
"import csv"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"## 核心函数处理部分"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [],
"source": [
"# 记录log\n",
"def recordLog(str=\"\"):\n",
" global log\n",
" log = log + str + \"\\n\"\n",
" \n",
"#控制台打印log函数\n",
"def Log(text,text2=\"\"):\n",
" switch = False\n",
" if switch:\n",
" print(text,text2)\n",
"\n",
"# 执行节点关键函数部分\n",
"def excuteNode(nodeId, loopValue=\"\", clickPath=\"\", index=0):\n",
" node = procedure[nodeId]\n",
" WebDriverWait(browser, 10).until\n",
" (EC.visibility_of_element_located((By.XPATH, node[\"parameters\"][\"xpath\"]))) # 等待元素出现才进行操作10秒内未出现则报错\n",
"\n",
" # 根据不同选项执行不同操作\n",
" if node[\"option\"] == 0 or node[\"option\"] == 10: # root操作,条件分支操作\n",
" for i in node[\"sequence\"]: # 从根节点开始向下读取\n",
" excuteNode(i, loopValue)\n",
" elif node[\"option\"] == 1: # 打开网页操作\n",
" recordLog(\"openPage\")\n",
" openPage(node[\"parameters\"], loopValue)\n",
" elif node[\"option\"] == 2: # 点击元素\n",
" recordLog(\"Click\")\n",
" clickElement(node[\"parameters\"], loopValue, clickPath, index)\n",
" elif node[\"option\"] == 3: # 提取数据\n",
" recordLog(\"getData\")\n",
" getData(node[\"parameters\"], loopValue, node[\"isInLoop\"])\n",
" elif node[\"option\"] == 4: # 输入文字\n",
" inputInfo(node[\"parameters\"], loopValue)\n",
" elif node[\"option\"] == 8: # 循环\n",
" recordLog(\"loop\")\n",
" loopExcute(node, loopValue) # 执行循环\n",
" elif node[\"option\"] == 9: # 条件分支\n",
" recordLog(\"judge\")\n",
" judgeExcute(node, loopValue)\n",
"\n",
" # 执行完之后进行等待\n",
" if node[\"option\"] != 0:\n",
" waitTime = 0.01 # 默认等待0.01秒\n",
" if node[\"parameters\"][\"wait\"] > 1:\n",
" waitTime = node[\"parameters\"][\"wait\"]\n",
" time.sleep(waitTime)\n",
" Log(\"Node执行完后等待\",waitTime)\n",
"\n",
"\n",
"# 对判断条件的处理\n",
"def judgeExcute(node, loopElement):\n",
" global bodyText # 引入bodyText\n",
" excuteBranchId = 0 # 要执行的BranchId\n",
" for i in node[\"sequence\"]:\n",
" cnode = procedure[i] # 获得条件分支\n",
" tType = int(cnode[\"parameters\"][\"class\"]) # 获得判断条件类型\n",
" if tType == 0: # 什么条件都没有\n",
" excuteBranchId = i\n",
" break\n",
" elif tType == 1: # 当前页面包含文本\n",
" try:\n",
" if bodyText.find(cnode[\"parameters\"][\"value\"]) >= 0:\n",
" excuteBranchId = i\n",
" break\n",
" except: # 找不到元素下一个条件\n",
" continue\n",
" elif tType == 2: # 当前页面包含元素\n",
" try:\n",
" if browser.find_element_by_xpath(cnode[\"parameters\"][\"value\"]):\n",
" excuteBranchId = i\n",
" break\n",
" except: # 找不到元素或者xpath写错了下一个条件\n",
" continue\n",
" elif tType == 3: # 当前循环元素包括文本\n",
" try:\n",
" if loopElement.text.find(cnode[\"parameters\"][\"value\"]) >= 0:\n",
" excuteBranchId = i\n",
" break\n",
" except: # 找不到元素或者xpath写错了下一个条件\n",
" continue\n",
" elif tType == 4: # 当前循环元素包括元素\n",
" try:\n",
" if loopElement.find_element_by_xpath(cnode[\"parameters\"][\"value\"][1:]):\n",
" excuteBranchId = i\n",
" break\n",
" except: # 找不到元素或者xpath写错了下一个条件\n",
" continue\n",
" excuteNode(excuteBranchId, loopElement)\n",
"\n",
"\n",
"# 对循环的处理\n",
"def loopExcute(node, loopValue):\n",
" time.sleep(0.1) # 第一次执行循环的时候强制等待1秒\n",
" Log(\"循环执行前等待0.1秒\")\n",
" global history\n",
" thisHandle = browser.current_window_handle # 记录本次循环内的标签页的ID\n",
" thisHistoryLength = browser.execute_script('return history.length') # 记录本次循环内的history的length\n",
"\n",
" if int(node[\"parameters\"][\"loopType\"]) == 0: # 单个元素循环\n",
" # 无跳转标签页操作\n",
" count = 0 # 执行次数\n",
" while True: # do while循环\n",
" try:\n",
" element = browser.find_element_by_xpath(node[\"parameters\"][\"xpath\"])\n",
" for i in node[\"sequence\"]: # 挨个执行操作\n",
" excuteNode(i, element, node[\"parameters\"][\"xpath\"])\n",
" Log(\"click: \", node[\"parameters\"][\"xpath\"])\n",
" recordLog(\"click:\" + node[\"parameters\"][\"xpath\"])\n",
" except NoSuchElementException:\n",
" break # 如果找不到元素,退出循环\n",
" except Exception as e:\n",
" raise\n",
" count = count + 1\n",
" Log(\"页数:\", count)\n",
" recordLog(\"页数:\" + str(count))\n",
" if node[\"parameters\"][\"exitCount\"] == count: # 如果达到设置的退出循环条件的话\n",
" break\n",
" elif int(node[\"parameters\"][\"loopType\"]) == 1: # 不固定元素列表\n",
" try:\n",
" elements = browser.find_elements_by_xpath(node[\"parameters\"][\"xpath\"])\n",
" for index in range(len(elements)):\n",
" for i in node[\"sequence\"]: # 挨个执行操作\n",
" excuteNode(i, elements[index], node[\"parameters\"][\"xpath\"], index)\n",
" if browser.current_window_handle != thisHandle: # 如果执行完一次循环之后标签页的位置发生了变化\n",
" while True: # 一直关闭窗口直到当前标签页\n",
" browser.close() # 关闭使用完的标签页\n",
" browser.switch_to.window(browser.window_handles[-1])\n",
" if browser.current_window_handle == thisHandle:\n",
" break\n",
" if history[\"index\"] != thisHistoryLength and history[\"handle\"] == browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断\n",
" difference = thisHistoryLength - history[\"index\"] # 计算历史记录变化差值\n",
" browser.execute_script('history.go(' + str(difference) + ')') # 回退历史记录\n",
" if node[\"parameters\"][\"historyWait\"] > 2: # 回退后要等待的时间\n",
" time.sleep(node[\"parameters\"][\"historyWait\"])\n",
" else:\n",
" time.sleep(2)\n",
" Log(\"切换历史记录等待2秒或者\",node[\"parameters\"][\"historyWait\"])\n",
" browser.execute_script('window.stop()')\n",
" except NoSuchElementException:\n",
" Log(\"pathNotFound: \", node[\"parameters\"][\"xpath\"])\n",
" recordLog(\"pathNotFound: \" + node[\"parameters\"][\"xpath\"])\n",
" pass # 循环中找不到元素就略过操作\n",
" except Exception as e:\n",
" raise\n",
" elif int(node[\"parameters\"][\"loopType\"]) == 2: # 固定元素列表\n",
" for path in node[\"parameters\"][\"pathList\"].split(\"\\n\"): # 千万不要忘了分割!!\n",
" try:\n",
" element = browser.find_element_by_xpath(path)\n",
" for i in node[\"sequence\"]: # 挨个执行操作\n",
" excuteNode(i, element, path,0)\n",
" if browser.current_window_handle != thisHandle: # 如果执行完一次循环之后标签页的位置发生了变化\n",
" while True: # 一直关闭窗口直到当前标签页\n",
" browser.close() # 关闭使用完的标签页\n",
" browser.switch_to.window(browser.window_handles[-1])\n",
" if browser.current_window_handle == thisHandle:\n",
" break\n",
" if history[\"index\"] != thisHistoryLength and history[\"handle\"] == browser.current_window_handle: # 如果执行完一次循环之后历史记录发生了变化,注意当前页面的判断\n",
" difference = thisHistoryLength - history[\"index\"] # 计算历史记录变化差值\n",
" browser.execute_script('history.go(' + str(difference) + ')') # 回退历史记录\n",
" if node[\"parameters\"][\"historyWait\"] > 2: # 回退后要等待的时间\n",
" time.sleep(node[\"parameters\"][\"historyWait\"])\n",
" else:\n",
" time.sleep(2)\n",
" Log(\"切换历史记录等待2秒或者\",node[\"parameters\"][\"historyWait\"])\n",
" browser.execute_script('window.stop()')\n",
" except NoSuchElementException:\n",
" Log(\"pathNotFound: \", path)\n",
" recordLog(\"pathNotFound: \" + path)\n",
" continue # 循环中找不到元素就略过操作\n",
" except Exception as e:\n",
" raise\n",
" elif int(node[\"parameters\"][\"loopType\"]) == 3: # 固定文本列表\n",
" textList = node[\"parameters\"][\"textList\"].split(\"\\n\")\n",
" for text in textList:\n",
" recordLog(\"input: \" + text)\n",
" for i in node[\"sequence\"]: # 挨个执行操作\n",
" excuteNode(i, text, \"\")\n",
" elif int(node[\"parameters\"][\"loopType\"]) == 4: # 固定网址列表\n",
" pass # 以后再做\n",
" history[\"index\"] = thisHistoryLength\n",
" history[\"handle\"] = browser.current_window_handle\n",
" \n",
"# 打开网页事件\n",
"def openPage(para, loopValue):\n",
" global links\n",
" global urlId\n",
" global history\n",
" browser.switch_to.window(browser.window_handles[0]) # 打开网页操作从第1个页面开始\n",
" history[\"handle\"] = browser.current_window_handle\n",
" if para[\"useLoop\"]:\n",
" url = loopValue\n",
" else:\n",
" url = links[urlId]\n",
" try:\n",
" browser.get(url)\n",
" except TimeoutException:\n",
" Log('time out after 10 seconds when loading page: ' + url)\n",
" recordLog('time out after 10 seconds when loading page: ' + url)\n",
" browser.execute_script('window.stop()')\n",
" try:\n",
" history[\"index\"] = browser.execute_script(\"return history.length\")\n",
" except TimeoutException:\n",
" browser.execute_script('window.stop()')\n",
" history[\"index\"] = browser.execute_script(\"return history.length\")\n",
" try:\n",
" if para[\"scrollType\"] != 0 and para[\"scrollCount\"] > 0: # 控制屏幕向下滚动\n",
" for i in range(para[\"scrollCount\"]):\n",
" time.sleep(1) # 下拉完等1秒\n",
" Log(\"下拉等待1秒\")\n",
" body = browser.find_element_by_css_selector(\"body\")\n",
" body.send_keys(Keys.END)\n",
" except TimeoutException:\n",
" Log('time out after 10 seconds when loading page: ' + url)\n",
" recordLog('time out after 10 seconds when loading page: ' + url)\n",
" browser.execute_script('window.stop()')\n",
" if containJudge:\n",
" global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText\n",
" try:\n",
" bodyText = browser.find_element_by_css_selector(\"body\").text\n",
" except TimeoutException:\n",
" Log('time out after 10 seconds when getting body text: ' + url)\n",
" recordLog('time out after 10 seconds when getting body text:: ' + url)\n",
" browser.execute_script('window.stop()')\n",
" time.sleep(1)\n",
" Log(\"获得bodytext等待1秒\")\n",
" # 再执行一遍\n",
" bodyText = browser.find_element_by_css_selector(\"body\").text\n",
" except Exception as e:\n",
" Log(e)\n",
" recordLog(str(e))\n",
"\n",
"\n",
"# 键盘输入事件\n",
"def inputInfo(para, loopValue):\n",
" time.sleep(1) # 输入之前等待1秒\n",
" Log(\"输入前等待1秒\")\n",
" try:\n",
" textbox = browser.find_element_by_xpath(para[\"xpath\"])\n",
" except:\n",
" Log(\"找不到输入框元素:\" + para[\"xpath\"] + \"请尝试执行前等待\")\n",
" recordLog(\"找不到输入框元素:\" + para[\"xpath\"] + \"请尝试执行前等待\")\n",
" exit()\n",
" textbox.send_keys(Keys.CONTROL, 'a')\n",
" textbox.send_keys(Keys.BACKSPACE)\n",
" if para[\"useLoop\"]:\n",
" textbox.send_keys(loopValue)\n",
" else:\n",
" textbox.send_keys(para[\"value\"])\n",
" global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText\n",
" bodyText = browser.find_element_by_css_selector(\"body\").text\n",
"\n",
"\n",
"# 点击元素事件\n",
"def clickElement(para, loopElement=None, clickPath=\"\", index=0):\n",
" global history\n",
" time.sleep(1) # 点击之前等待1秒\n",
" Log(\"点击之前等待1秒\")\n",
" if para[\"useLoop\"]: #使用循环的情况下传入的clickPath就是实际的xpath\n",
" path = clickPath\n",
" else:\n",
" path = clickPath + para[\"xpath\"] #不然使用元素定义的xpath\n",
" tempHandleNum = len(browser.window_handles) #记录之前的窗口位置\n",
" try:\n",
" script = 'var result = document.evaluate(`' + path + '`, document, null, XPathResult.ANY_TYPE, null);for(let i=0;i<arguments[0];i++){result.iterateNext();} result.iterateNext().click();'\n",
" browser.execute_script(script,str(index))# 用js的点击方法\n",
"\n",
" except TimeoutException:\n",
" Log('time out after 10 seconds when loading clicked page')\n",
" recordLog('time out after 10 seconds when loading clicked page')\n",
" browser.execute_script('window.stop()')\n",
" except Exception as e:\n",
" Log(e)\n",
" recordLog(str(e))\n",
" time.sleep(0.5) # 点击之后等半秒\n",
" Log(\"点击之后等待0.5秒\")\n",
" if tempHandleNum != len(browser.window_handles): # 如果有新标签页的行为发生\n",
" browser.switch_to.window(browser.window_handles[-1]) # 跳转到新的标签页\n",
" history[\"handle\"] = browser.current_window_handle\n",
" try:\n",
" history[\"index\"] = browser.execute_script(\"return history.length\")\n",
" except TimeoutException:\n",
" browser.execute_script('window.stop()')\n",
" history[\"index\"] = browser.execute_script(\"return history.length\")\n",
" else:\n",
" try:\n",
" history[\"index\"] = browser.execute_script(\"return history.length\")\n",
" except TimeoutException:\n",
" browser.execute_script('window.stop()')\n",
" history[\"index\"] = browser.execute_script(\"return history.length\")\n",
" # 如果打开了新窗口,切换到新窗口\n",
" try:\n",
" if para[\"scrollType\"] != 0 and para[\"scrollCount\"] > 0: # 控制屏幕向下滚动\n",
" for i in range(para[\"scrollCount\"]):\n",
" time.sleep(1) # 下拉完等1秒\n",
" Log(\"下拉完等待1秒\")\n",
" body = browser.find_element_by_css_selector(\"body\")\n",
" body.send_keys(Keys.END)\n",
" except TimeoutException:\n",
" Log('time out after 10 seconds when scrolling. ')\n",
" recordLog('time out after 10 seconds when scrolling')\n",
" browser.execute_script('window.stop()')\n",
" if para[\"scrollType\"] != 0 and para[\"scrollCount\"] > 0: # 控制屏幕向下滚动\n",
" for i in range(para[\"scrollCount\"]):\n",
" time.sleep(1) # 下拉完等1秒\n",
" Log(\"下拉完等待1秒\")\n",
" body = browser.find_element_by_css_selector(\"body\")\n",
" body.send_keys(Keys.END)\n",
" if containJudge: #有判断语句才执行以下操作\n",
" global bodyText # 每次执行点击输入元素和打开网页操作后需要更新bodyText\n",
" try:\n",
" bodyText = browser.find_element_by_css_selector(\"body\").text\n",
" except TimeoutException:\n",
" Log('time out after 10 seconds when getting body text')\n",
" recordLog('time out after 10 seconds when getting body text')\n",
" browser.execute_script('window.stop()')\n",
" time.sleep(1)\n",
" Log(\"bodytext等待1秒\")\n",
" # 再执行一遍\n",
" bodyText = browser.find_element_by_css_selector(\"body\").text\n",
" except Exception as e:\n",
" Log(e)\n",
" recordLog(str(e))\n",
"\n",
"\n",
"# 提取数据事件\n",
"def getData(para, loopElement, isInLoop=True):\n",
" if not isInLoop and para[\"wait\"] == 0:\n",
" time.sleep(1) # 如果提取数据字段不在循环内而且设置的等待时间为0默认等待1秒\n",
" Log(\"提取数据等待1秒\")\n",
" for p in para[\"paras\"]:\n",
" content = \"\"\n",
" try:\n",
" if p[\"relative\"]: # 是否相对xpath\n",
" if p[\"relativeXpath\"] == \"\": # 相对xpath有时候就是元素本身不需要二次查找\n",
" element = loopElement\n",
" else:\n",
" element = loopElement.find_element_by_xpath(p[\"relativeXpath\"][1:])\n",
" else:\n",
" element = browser.find_element_by_xpath(p[\"relativeXpath\"])\n",
" except NoSuchElementException: # 找不到元素的时候,使用默认值\n",
" outputParameters[p[\"name\"]] = p[\"default\"]\n",
" Log('Element not found,use default')\n",
" recordLog('Element not found,use default')\n",
" continue\n",
" except TimeoutException: #超时的时候设置超时值\n",
" Log('time out after 10 seconds when getting data')\n",
" recordLog('time out after 10 seconds when getting data')\n",
" browser.execute_script('window.stop()')\n",
" if p[\"relative\"]: # 是否相对xpath\n",
" if p[\"relativeXpath\"] == \"\": # 相对xpath有时候就是元素本身不需要二次查找\n",
" element = loopElement\n",
" else:\n",
" element = loopElement.find_element_by_xpath(p[\"relativeXpath\"][1:])\n",
" else:\n",
" element = browser.find_element_by_xpath(p[\"relativeXpath\"])\n",
" if p[\"contentType\"] == 2:\n",
" content = element.get_attribute('innerHTML')\n",
" elif p[\"contentType\"] == 3:\n",
" content = element.get_attribute('outerHTML')\n",
" elif p[\"contentType\"] == 1: # 只采集当期元素下的文本,不包括子元素\n",
" command = 'var arr = [];\\\n",
" var content = arguments[0];\\\n",
" for(var i = 0, len = content.childNodes.length; i < len; i++) {\\\n",
" if(content.childNodes[i].nodeType === 3){ \\\n",
" arr.push(content.childNodes[i].nodeValue);\\\n",
" }\\\n",
" }\\\n",
" var str = arr.join(\"\"); \\\n",
" return str;'\n",
" content = browser.execute_script(command, element).replace(\" \", \"\").replace(\"\\n\", \"\")\n",
" if p[\"nodeType\"] == 2:\n",
" if element.get_attribute(\"href\") != None:\n",
" content = element.get_attribute(\"href\")\n",
" else:\n",
" content = \"\"\n",
" elif p[\"nodeType\"] == 3:\n",
" if element.get_attribute(\"value\") != None:\n",
" content = element.get_attribute(\"value\")\n",
" else:\n",
" content = \"\"\n",
" elif p[\"nodeType\"] == 4: # 图片\n",
" if element.get_attribute(\"src\") != None:\n",
" content = element.get_attribute(\"src\")\n",
" else:\n",
" content = \"\"\n",
" elif p[\"contentType\"] == 0:\n",
" content = element.text\n",
" if p[\"nodeType\"] == 2:\n",
" if element.get_attribute(\"href\") != None:\n",
" content = element.get_attribute(\"href\")\n",
" else:\n",
" content = \"\"\n",
" elif p[\"nodeType\"] == 3:\n",
" if element.get_attribute(\"value\") != None:\n",
" content = element.get_attribute(\"value\")\n",
" else:\n",
" content = \"\"\n",
" elif p[\"nodeType\"] == 4: # 图片\n",
" if element.get_attribute(\"src\") != None:\n",
" content = element.get_attribute(\"src\")\n",
" else:\n",
" content = \"\"\n",
" outputParameters[p[\"name\"]] = content\n",
" global OUTPUT\n",
" line = []\n",
" for value in outputParameters.values():\n",
" line.append(value)\n",
" print(value[:15], \" \", end=\"\")\n",
" print(\"\")\n",
" OUTPUT.append(line)\n",
"\n",
"\n",
"# 判断字段是否为空\n",
"def isnull(s):\n",
" return len(s) != 0"
]
},
{
"cell_type": "markdown",
"source": [
"## 核心代码执行部分只需要修改id为taskid即可"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"if __name__ == '__main__':\n",
" browser = webdriver.Chrome();\n",
" browser.get('about:blank')\n",
" browser.set_page_load_timeout(10) # 加载页面最大超时时间\n",
"\n",
" id = 4 #taskId这里修改\n",
"\n",
" saveName = \"task_\" + str(id) + \"_\" + str(random.randint(0, 999999999)) # 保存文件的名字\n",
" content = requests.get(\"http://183.129.170.180:8041/backEnd/queryTask?id=\" + str(id))\n",
" service = json.loads(content.text) # 加载服务信息\n",
" procedure = service[\"graph\"] # 程序执行流程\n",
" links = list(filter(isnull, service[\"links\"].split(\"\\n\"))) # 要执行的link的列表\n",
" OUTPUT = [] # 采集的数据\n",
" OUTPUT.append([]) # 添加表头\n",
" containJudge = service[\"containJudge\"] #是否含有判断语句\n",
" bodyText = \"\" # 记录bodyText\n",
" tOut = service[\"outputParameters\"] # 生成输出参数对象\n",
" outputParameters = {}\n",
" log = \"\" # 记下现在总共开了多少个标签页\n",
" history = {\"index\":0,\"handle\":None} #记录页面现在所以在的历史记录的位置\n",
" for para in tOut:\n",
" outputParameters[para[\"name\"]] = \"\"\n",
" OUTPUT[0].append(para[\"name\"])\n",
" # 挨个执行程序\n",
" urlId = 0 # 全局记录变量\n",
" for i in range(len(links)):\n",
" excuteNode(0)\n",
" urlId = urlId + 1\n",
" print(\"执行完成!\")\n",
" recordLog(\"Done!\")\n",
" with open(saveName + '_log.txt', 'w',encoding='utf-8-sig') as file_obj:\n",
" file_obj.write(log)\n",
" file_obj.close()\n",
" with open(saveName + '.csv', 'w', encoding='utf-8-sig', newline=\"\") as f:\n",
" f_csv = csv.writer(f)\n",
" for line in OUTPUT:\n",
" f_csv.writerow(line)\n",
" f.close()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# node = procedure[9]\n",
"# excuteOnce(node)\n",
"OUTPUT"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<div class=\"blo \n",
"执行完成!\n"
]
}
],
"source": [
"if __name__ == '__main__':\n",
" browser = webdriver.Chrome();\n",
" browser.get('about:blank')\n",
" browser.set_page_load_timeout(10) # 加载页面最大超时时间\n",
"\n",
" id = 4 #taskId这里修改\n",
"\n",
" saveName = \"task_\" + str(id) + \"_\" + str(random.randint(0, 999999999)) # 保存文件的名字\n",
" content = requests.get(\"http://183.129.170.180:8041/backEnd/queryTask?id=\" + str(id))\n",
" service = json.loads(content.text) # 加载服务信息\n",
" procedure = service[\"graph\"] # 程序执行流程\n",
" links = list(filter(isnull, service[\"links\"].split(\"\\n\"))) # 要执行的link的列表\n",
" OUTPUT = [] # 采集的数据\n",
" OUTPUT.append([]) # 添加表头\n",
" containJudge = service[\"containJudge\"] #是否含有判断语句\n",
" bodyText = \"\" # 记录bodyText\n",
" tOut = service[\"outputParameters\"] # 生成输出参数对象\n",
" outputParameters = {}\n",
" log = \"\" # 记下现在总共开了多少个标签页\n",
" history = {\"index\":0,\"handle\":None} #记录页面现在所以在的历史记录的位置\n",
" for para in tOut:\n",
" outputParameters[para[\"name\"]] = \"\"\n",
" OUTPUT[0].append(para[\"name\"])\n",
" # 挨个执行程序\n",
" urlId = 0 # 全局记录变量\n",
" for i in range(len(links)):\n",
" excuteNode(0)\n",
" urlId = urlId + 1\n",
" print(\"执行完成!\")\n",
" recordLog(\"Done!\")\n",
" with open(saveName + '_log.txt', 'w',encoding='utf-8-sig') as file_obj:\n",
" file_obj.write(log)\n",
" file_obj.close()\n",
" with open(saveName + '.csv', 'w', encoding='utf-8-sig', newline=\"\") as f:\n",
" f_csv = csv.writer(f)\n",
" for line in OUTPUT:\n",
" f_csv.writerow(line)\n",
" f.close()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"pycharm": {
"is_executing": false,
"name": "#%% 测试单个函数\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"[['参数1_outerHTML'],\n",
" ['<div class=\"blog-content-box\">\\n <div class=\"article-header-box\">\\n <div class=\"article-header\">\\n <div class=\"article-title-box\">\\n <h1 class=\"title-article\">该扩展程序未列在 Chrome 网上应用店中,并可能是在您不知情的情况下添加的解决办法</h1>\\n </div>\\n <div class=\"article-info-box\">\\n <div class=\"article-bar-top\">\\n <!--文章类型-->\\n <span class=\"article-type type-1 float-left\">原创</span> <a class=\"follow-nickName\" href=\"https://me.csdn.net/gexiaochao\" target=\"_blank\" rel=\"noopener\">葛小勺</a>\\n <span class=\"time\">最后发布于2019-03-22 17:28:27 </span>\\n <span class=\"read-count\">阅读数 17488</span>\\n <a id=\"blog_detail_zk_collection\" data-report-click=\"{&quot;mod&quot;:&quot;popu_823&quot;}\">\\n <svg class=\"icon\">\\n <use xlink:href=\"#icon-csdnc-Collection-G\"></use>\\n </svg>\\n 收藏\\n </a>\\n </div>\\n <div class=\"up-time\">发布于2019-03-22 17:28:27</div>\\n <div class=\"slide-content-box\">\\n <div class=\"tags-box artic-tag-box\">\\n <span class=\"label\">分类专栏:</span>\\n <a class=\"tag-link\" target=\"_blank\" rel=\"noopener\" href=\"https://blog.csdn.net/gexiaochao/category_6923169.html\">\\n 计算机网络 </a>\\n </div>\\n <div class=\"article-copyright\">\\n <span class=\"creativecommons\">\\n <a rel=\"license\" href=\"http://creativecommons.org/licenses/by-sa/4.0/\"></a>\\n <span>\\n 版权声明:本文为博主原创文章,遵循<a href=\"http://creativecommons.org/licenses/by-sa/4.0/\" target=\"_blank\" rel=\"noopener\"> CC 4.0 BY-SA </a>版权协议,转载请附上原文出处链接和本声明。 </span>\\n <div class=\"article-source-link2222\">\\n 本文链接:<a href=\"https://blog.csdn.net/gexiaochao/article/details/88746278\">https://blog.csdn.net/gexiaochao/article/details/88746278</a>\\n </div>\\n </span> \\n </div>\\n </div>\\n <div class=\"operating\">\\n <a class=\"href-article-edit slide-toggle\">展开</a>\\n </div>\\n </div>\\n </div>\\n </div>\\n <article class=\"baidu_pl\">\\n <!--python安装手册开始-->\\n <!--python安装手册结束-->\\n <!--####专栏广告位图文切换开始-->\\n <!--####专栏广告位图文切换结束-->\\n <div id=\"article_content\" class=\"article_content clearfix\">\\n <link rel=\"stylesheet\" href=\"https://csdnimg.cn/release/phoenix/template/css/ck_htmledit_views-833878f763.css\">\\n <link rel=\"stylesheet\" href=\"https://csdnimg.cn/release/phoenix/template/css/ck_htmledit_views-833878f763.css\">\\n <div class=\"htmledit_views\" id=\"content_views\">\\n <p>如何解决该扩展程序未列在 Chrome 网上应用店中,并可能是在您不知情的情况下添加的</p>\\n\\n<p>在使用Google插件的时候出现了这个问题当时是直接下载的crx文件然后拖拽到浏览器中进行安装的过了不久这个插件并不能进行使用了。<br>\\n出现</p>\\n\\n<p>如何解决<br>\\n该扩展程序未列在 Chrome 网上应用店中,并可能是在您不知情的情况下添加的<br>\\n-------------------</p>\\n\\n<p>方法一</p>\\n\\n<p>1、首先把需要安装的第三方插件后缀.crx 改成 .rar然后解压得到一个文件夹<br>\\n2、再打开chrome://extensions/谷歌扩展应用管理,点击右上角的开发者模式,就可以看到“加载正在开发的扩展程序”这一选项。<br>\\n3、选择刚才步骤1中解压好的文件夹确定<br>\\n4、确认新增扩展程序点击添加成功添加应用程序。</p>\\n\\n<p>如出现如图情况</p>\\n\\n<p><img alt=\"\" class=\"has\" height=\"183\" src=\"https://img-blog.csdnimg.cn/20190322173309261.png\" width=\"642\"></p>\\n\\n<p>出现这种情况Chrome浏览器会提示无法加载以下来源的扩展程序 xxx路径Chrome插件文件的解压位置Cannot load extension with file or directory name _metadata. Filenames starting with \"_\" are reserved for use by the system.出现这种情况是因为这款Chrome插件与新版的Chrome浏览器有些不兼容这时候用户可以打开刚刚解压的Chrome插件文件夹并把其中_metadata文件夹的名字修改为metadata把前面的下划线去掉如图所示</p>\\n\\n<p><img alt=\"\" class=\"has\" height=\"166\" src=\"https://img-blog.csdnimg.cn/20190322173431209.png\" width=\"630\"></p>\\n\\n<p>更新文件夹名称成功以后点击该错误提示下方的“重试”按钮就可以成功地把Chrome插件加载谷歌浏览器中了如图所示</p>\\n\\n<p><img alt=\"\" class=\"has\" src=\"https://img-blog.csdnimg.cn/20190322173504497.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2dleGlhb2NoYW8=,size_16,color_FFFFFF,t_70\"></p>\\n\\n<p>基于这种模式安装的chrome插件会因为用户启用了开发者模式而遭到谷歌的警告用户可以选择忽略Chrome的警告<br>\\n---------------------&nbsp;</p>\\n\\n<p>方法二</p>\\n\\n<p>运行中输入“gpedit.msc” ,打开 本地策略组 ,导入chrome.adm再被禁用的插件ID复制下来依次找到Google Chrome→扩展程序→配置扩展程序白名单将刚才的复制的ID粘贴进去操作如图<br><img alt=\"\" class=\"has\" src=\"https://img-blog.csdnimg.cn/20190322172648864.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2dleGlhb2NoYW8=,size_16,color_FFFFFF,t_70\"></p>\\n\\n<p><img alt=\"\" class=\"has\" src=\"https://img-blog.csdnimg.cn/20190322172814111.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L2dleGlhb2NoYW8=,size_16,color_FFFFFF,t_70\"></p>\\n\\n<p>操作完后再回到chrome扩展列表页面可以看到被禁用的扩展右侧启用的选项已变成可勾选状态勾选启用该扩展即可</p>\\n </div>\\n <div class=\"more-toolbox\">\\n <div class=\"left-toolbox\">\\n <ul class=\"toolbox-list\">\\n \\n <li class=\"tool-item tool-active is-like \"><a href=\"javascript:;\"><svg class=\"icon\" aria-hidden=\"true\">\\n <use xlink:href=\"#csdnc-thumbsup\"></use>\\n </svg><span class=\"name\">点赞</span>\\n <span class=\"count\">4</span>\\n </a></li>\\n <li class=\"tool-item tool-active is-collection \"><a href=\"javascript:;\" data-report-click=\"{&quot;mod&quot;:&quot;popu_824&quot;}\"><svg class=\"icon\" aria-hidden=\"true\">\\n <use xlink:href=\"#icon-csdnc-Collection-G\"></use>\\n </svg><span class=\"name\">收藏</span></a></li>\\n <li class=\"tool-item tool-active is-share\"><a href=\"javascript:;\" data-report-click=\"{&quot;mod&quot;:&quot;1582594662_002&quot;}\"><svg class=\"icon\" aria-hidden=\"true\">\\n <use xlink:href=\"#icon-csdnc-fenxiang\"></use>\\n </svg>分享</a></li>\\n <!--打赏开始-->\\n <!--打赏结束-->\\n <li class=\"tool-item tool-more\">\\n <a>\\n <svg t=\"1575545411852\" class=\"icon\" viewBox=\"0 0 1024 1024\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\" p-id=\"5717\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" width=\"200\" height=\"200\"><defs><style type=\"text/css\"></style></defs><path d=\"M179.176 499.222m-113.245 0a113.245 113.245 0 1 0 226.49 0 113.245 113.245 0 1 0-226.49 0Z\" p-id=\"5718\"></path><path d=\"M509.684 499.222m-113.245 0a113.245 113.245 0 1 0 226.49 0 113.245 113.245 0 1 0-226.49 0Z\" p-id=\"5719\"></path><path d=\"M846.175 499.222m-113.245 0a113.245 113.245 0 1 0 226.49 0 113.245 113.245 0 1 0-226.49 0Z\" p-id=\"5720\"></path></svg>\\n </a>\\n <ul class=\"more-box\">\\n <li class=\"item\"><a class=\"article-report\">文章举报</a></li>\\n </ul>\\n </li>\\n </ul>\\n </div>\\n </div>\\n <div class=\"person-messagebox\">\\n <div class=\"left-message\"><a href=\"https://blog.csdn.net/gexiaochao\">\\n <img src=\"https://profile.csdnimg.cn/6/9/B/3_gexiaochao\" class=\"avatar_pic\" username=\"gexiaochao\">\\n <img src=\"https://g.csdnimg.cn/static/user-reg-year/1x/7.png\" class=\"user-years\">\\n </a></div>\\n <div class=\"middle-message\">\\n <div class=\"title\"><span class=\"tit\"><a href=\"https://blog.csdn.net/gexiaochao\" data-report-click=\"{&quot;mod&quot;:&quot;popu_379&quot;}\" target=\"_blank\">葛小勺</a></span>\\n </div>\\n <div class=\"text\"><span>发布了6 篇原创文章</span> · <span>获赞 8</span> · <span>访问量 11万+</span></div>\\n </div>\\n <div class=\"right-message\">\\n <a href=\"https://im.csdn.net/im/main.html?userName=gexiaochao\" target=\"_blank\" class=\"btn btn-sm btn-red-hollow bt-button personal-letter\">私信\\n </a>\\n <a class=\"btn btn-sm bt-button personal-watch\" data-report-click=\"{&quot;mod&quot;:&quot;popu_379&quot;}\">关注</a>\\n </div>\\n </div>\\n </div>\\n </article>\\n \\n</div>']]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# node = procedure[9]\n",
"# excuteOnce(node)\n",
"OUTPUT"
]
},
{
"cell_type": "code",
"execution_count": 192,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"参数1_链接文本 参数2_链接地址 参数3_图片地址 参数4_文本 参数5_文本 参数6_文本 \n",
"\n",
"通用新闻资讯接口\n",
"¥ 3.00 元/10 https://www.idataapi https://www.idataapi 通用新闻资讯接口 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(23909) \n",
"\n",
"新浪微博\n",
"¥ 1.00 元/100 次\n",
" https://www.idataapi https://www.idataapi 新浪微博 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(12605) \n",
"\n",
"通用酒店数据接口\n",
"免费\n",
"使用人数(971 https://www.idataapi https://www.idataapi 通用酒店数据接口 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(9718) \n",
"\n",
"微信公众号\n",
"¥ 1.00 元/100 次 https://www.idataapi https://www.idataapi 微信公众号 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(1137) \n",
"\n",
"天猫\n",
"¥ 3.00 元/100 次\n",
"使用 https://www.idataapi https://www.idataapi 天猫 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(601) \n",
"\n",
"今日头条\n",
"¥ 0.50 元/100 次\n",
" https://www.idataapi https://www.idataapi 今日头条 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥0. 使用人数(520) \n",
"\n",
"小红书\n",
"¥ 1.00 元/100 次\n",
"使 https://www.idataapi https://www.idataapi 小红书 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(470) \n",
"\n",
"京东商城\n",
"¥ 1.00 元/100 次\n",
" https://www.idataapi https://www.idataapi 京东商城 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(275) \n",
"\n",
"携程\n",
"¥ 1.00 元/100 次\n",
"使用 https://www.idataapi https://www.idataapi 携程 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(250) \n",
"\n",
"饿了么\n",
"¥ 3.00 元/100 次\n",
"使 https://www.idataapi https://www.idataapi 饿了么 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(178) \n",
"\n",
"360新闻\n",
"免费\n",
"使用人数(3380)\n",
" https://www.idataapi https://www.idataapi 360新闻 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(3380) \n",
"\n",
"中文分词\n",
"免费\n",
"使用人数(1754)\n",
"文 https://www.idataapi https://www.idataapi 中文分词 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(1754) \n",
"\n",
"微信公众号文章link版\n",
"¥ 1.00 https://www.idataapi https://www.idataapi 微信公众号文章link版 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(1109) \n",
"\n",
"微信公众号文章高级版\n",
"¥ 3.00 元/ https://www.idataapi https://www.idataapi 微信公众号文章高级版 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(1013) \n",
"\n",
"微信公众号文章专业版(关键词)\n",
"¥ 2. https://www.idataapi https://www.idataapi 微信公众号文章专业版(关键词) \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥2. 使用人数(946) \n",
"\n",
"餐饮类情感分析语料\n",
"¥ 0.01 元\n",
"使 https://www.idataapi https://www.idataapi 餐饮类情感分析语料 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥0. 使用人数(515) \n",
"\n",
"谷歌验证码识别训练集数据\n",
"¥ 0.01 https://www.idataapi https://www.idataapi 谷歌验证码识别训练集数据 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥0. 使用人数(301) \n",
"\n",
"微信20180320特定信息\n",
"¥ 500 https://www.idataapi https://www.idataapi 微信20180320特定信息 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥50 使用人数(4) \n",
"\n",
"微信公众号房地产3月份文章\n",
"¥ 800. https://www.idataapi https://www.idataapi 微信公众号房地产3月份文章 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥80 使用人数(3) \n",
"\n",
"专辑数据-2018\n",
"¥ 1000.00 https://www.idataapi https://www.idataapi 专辑数据-2018 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥10 使用人数(2) \n",
"\n",
"单曲数据-2018\n",
"¥ 2000.00 https://www.idataapi https://www.idataapi 单曲数据-2018 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥20 使用人数(2) \n",
"\n",
"天猫定制数据2018\n",
"¥ 3000.00 https://www.idataapi https://www.idataapi 天猫定制数据2018 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥30 使用人数(2) \n",
"\n",
"甜品店铺信息-2018.3.30\n",
"¥ 1 https://www.idataapi https://www.idataapi 甜品店铺信息-2018.3.30 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥15 使用人数(2) \n",
"\n",
"甜品店铺对应商品信息\n",
"¥ 1500.00 https://www.idataapi https://www.idataapi 甜品店铺对应商品信息 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥15 使用人数(2) \n",
"\n",
"电影,电视剧及图书短评语料\n",
"¥ 4000 https://www.idataapi https://www.idataapi 电影,电视剧及图书短评语料 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥40 使用人数(2) \n",
"\n",
"综艺数据-2018\n",
"¥ 1000.00 https://www.idataapi https://www.idataapi 综艺数据-2018 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥10 使用人数(2) \n",
"\n",
"创业数据库\n",
"¥ 190000.00 元\n",
" https://www.idataapi https://www.idataapi 创业数据库 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥19 使用人数(1) \n",
"\n",
"中国餐馆词库\n",
"¥ 20000.00 元\n",
" https://www.idataapi https://www.idataapi 中国餐馆词库 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥20 使用人数(0) \n",
"\n",
"通用新闻资讯接口\n",
"¥ 3.00 元/10 https://www.idataapi https://www.idataapi 通用新闻资讯接口 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(23909) \n",
"\n",
"新浪微博\n",
"¥ 1.00 元/100 次\n",
" https://www.idataapi https://www.idataapi 新浪微博 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(12605) \n",
"\n",
"通用酒店数据接口\n",
"免费\n",
"使用人数(971 https://www.idataapi https://www.idataapi 通用酒店数据接口 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(9718) \n",
"\n",
"微信公众号\n",
"¥ 1.00 元/100 次 https://www.idataapi https://www.idataapi 微信公众号 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(1137) \n",
"\n",
"天猫\n",
"¥ 3.00 元/100 次\n",
"使用 https://www.idataapi https://www.idataapi 天猫 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(600) \n",
"\n",
"今日头条\n",
"¥ 0.50 元/100 次\n",
" https://www.idataapi https://www.idataapi 今日头条 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥0. 使用人数(520) \n",
"\n",
"小红书\n",
"¥ 1.00 元/100 次\n",
"使 https://www.idataapi https://www.idataapi 小红书 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(470) \n",
"\n",
"京东商城\n",
"¥ 1.00 元/100 次\n",
" https://www.idataapi https://www.idataapi 京东商城 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(275) \n",
"\n",
"携程\n",
"¥ 1.00 元/100 次\n",
"使用 https://www.idataapi https://www.idataapi 携程 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(250) \n",
"\n",
"饿了么\n",
"¥ 3.00 元/100 次\n",
"使 https://www.idataapi https://www.idataapi 饿了么 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(178) \n",
"\n",
"360新闻\n",
"免费\n",
"使用人数(3380)\n",
" https://www.idataapi https://www.idataapi 360新闻 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(3380) \n",
"\n",
"中文分词\n",
"免费\n",
"使用人数(1754)\n",
"文 https://www.idataapi https://www.idataapi 中文分词 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(1754) \n",
"\n",
"微信公众号文章link版\n",
"¥ 1.00 https://www.idataapi https://www.idataapi 微信公众号文章link版 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(1109) \n",
"\n",
"微信公众号文章高级版\n",
"¥ 3.00 元/ https://www.idataapi https://www.idataapi 微信公众号文章高级版 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(1013) \n",
"\n",
"微信公众号文章专业版(关键词)\n",
"¥ 2. https://www.idataapi https://www.idataapi 微信公众号文章专业版(关键词) \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥2. 使用人数(946) \n",
"\n",
"通用新闻资讯接口\n",
"¥ 3.00 元/10 https://www.idataapi https://www.idataapi 通用新闻资讯接口 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(23909) \n",
"\n",
"新浪微博\n",
"¥ 1.00 元/100 次\n",
" https://www.idataapi https://www.idataapi 新浪微博 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(12605) \n",
"\n",
"通用酒店数据接口\n",
"免费\n",
"使用人数(971 https://www.idataapi https://www.idataapi 通用酒店数据接口 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(9718) \n",
"\n",
"微信公众号\n",
"¥ 1.00 元/100 次 https://www.idataapi https://www.idataapi 微信公众号 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(1137) \n",
"\n",
"天猫\n",
"¥ 3.00 元/100 次\n",
"使用 https://www.idataapi https://www.idataapi 天猫 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(600) \n",
"\n",
"今日头条\n",
"¥ 0.50 元/100 次\n",
" https://www.idataapi https://www.idataapi 今日头条 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥0. 使用人数(520) \n",
"\n",
"小红书\n",
"¥ 1.00 元/100 次\n",
"使 https://www.idataapi https://www.idataapi 小红书 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(470) \n",
"\n",
"京东商城\n",
"¥ 1.00 元/100 次\n",
" https://www.idataapi https://www.idataapi 京东商城 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(275) \n",
"\n",
"携程\n",
"¥ 1.00 元/100 次\n",
"使用 https://www.idataapi https://www.idataapi 携程 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(250) \n",
"\n",
"饿了么\n",
"¥ 3.00 元/100 次\n",
"使 https://www.idataapi https://www.idataapi 饿了么 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(178) \n",
"\n",
"360新闻\n",
"免费\n",
"使用人数(3380)\n",
" https://www.idataapi https://www.idataapi 360新闻 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(3380) \n",
"\n",
"中文分词\n",
"免费\n",
"使用人数(1754)\n",
"文 https://www.idataapi https://www.idataapi 中文分词 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t免费\t\t 使用人数(1754) \n",
"\n",
"微信公众号文章link版\n",
"¥ 1.00 https://www.idataapi https://www.idataapi 微信公众号文章link版 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥1. 使用人数(1109) \n",
"\n",
"微信公众号文章高级版\n",
"¥ 3.00 元/ https://www.idataapi https://www.idataapi 微信公众号文章高级版 \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥3. 使用人数(1013) \n",
"\n",
"微信公众号文章专业版(关键词)\n",
"¥ 2. https://www.idataapi https://www.idataapi 微信公众号文章专业版(关键词) \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t¥2. 使用人数(946) \n",
"\n"
]
}
],
"source": [
"for i in OUTPUT:\n",
" for j in i:\n",
" print(j[:20],\" \",end=\"\")\n",
" print(\"\\n\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"pycharm": {
"stem_cell": {
"cell_type": "raw",
"source": [],
"metadata": {
"collapsed": false
}
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}