# from lxml import etree # # 解析HTML # html = """ #
# 123 # # 456 #
# 789 #
# """ # html = etree.HTML(html) # element = html.xpath("*") # direct_text = "/html/body/" + html[0][0].tag + "/text()" # all_text = "/html/body/" + html[0][0].tag + "//text()" # # 使用XPath选择元素 # results = html.xpath(direct_text) # # print(results) # # 拼接所有文本内容并去掉两边的空白 # text = ' '.join(result.strip() for result in results if result.strip()) # # 输出结果 # print(text) # results = html.xpath(all_text) # # print(results) # # 拼接所有文本内容并去掉两边的空白 # text = ' '.join(result.strip() for result in results if result.strip()) # # 输出结果 # print(text) import re def lowercase_xpath_tags(xpath): return re.sub(r"([A-Z]+)(?=[\[\]//]|$)", lambda x: x.group(0).lower(), xpath) print(lowercase_xpath_tags('//DIV[@id="J_recommendGoods"]/DIV[2]/UL')) print("//strong//span[contains(@class,'page-item_M4MDr')]/..//following-sibling::a[1]") print("")