mirror of
https://github.com/NaiboWang/EasySpider.git
synced 2025-04-12 11:37:11 +08:00
37 lines
1.1 KiB
Python
37 lines
1.1 KiB
Python
from transformers import AutoProcessor, AutoModelForVision2Seq
|
|
from PIL import Image
|
|
import torch
|
|
|
|
# 加载 Llama 3.2 视觉模型和处理器
|
|
model_name = "meta-llama/Llama-3.2-11B-Vision" # 请根据实际模型路径替换
|
|
processor = AutoProcessor.from_pretrained(model_name)
|
|
model = AutoModelForVision2Seq.from_pretrained(model_name)
|
|
|
|
# 处理网页截图并提取结构
|
|
def predict_structure_from_image(image_path):
|
|
# 加载图像
|
|
image = Image.open(image_path).convert("RGB")
|
|
|
|
# 预处理图像
|
|
inputs = processor(images=image, return_tensors="pt")
|
|
|
|
# 生成描述(结构描述)
|
|
outputs = model.generate(
|
|
inputs["pixel_values"],
|
|
max_length=512,
|
|
num_beams=5,
|
|
early_stopping=True
|
|
)
|
|
description = processor.decode(outputs[0], skip_special_tokens=True)
|
|
return description
|
|
|
|
# 示例使用
|
|
if __name__ == "__main__":
|
|
# 提供网页截图的路径
|
|
image_path = "webpage_screenshot.png" # 请替换为实际的图像文件路径
|
|
|
|
# 预测结构
|
|
predicted_structure = predict_structure_from_image(image_path)
|
|
|
|
print("预测的结构:", predicted_structure)
|