优化代码
This commit is contained in:
parent
4d2635fba8
commit
ffd9e70346
48
main.py
48
main.py
@ -2,12 +2,8 @@ import os
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import io
|
import io
|
||||||
from reportlab.lib.pagesizes import letter
|
|
||||||
from reportlab.pdfgen import canvas
|
|
||||||
|
|
||||||
|
|
||||||
def get_html(url, max_retries=3):
|
def get_html(url, max_retries=3):
|
||||||
@ -52,10 +48,11 @@ def get_img_urls(html_content):
|
|||||||
return img_urls
|
return img_urls
|
||||||
|
|
||||||
|
|
||||||
def create_img_obj_list(img_url_list):
|
def create_img_obj_list(img_url_list, file_name):
|
||||||
img_obj_list = []
|
img_obj_list = []
|
||||||
for url in img_url_list:
|
for url in img_url_list:
|
||||||
obj = dict()
|
obj = dict()
|
||||||
|
obj["file_name"] = file_name
|
||||||
obj["url"] = url
|
obj["url"] = url
|
||||||
obj["data"] = None
|
obj["data"] = None
|
||||||
img_obj_list.append(obj)
|
img_obj_list.append(obj)
|
||||||
@ -67,35 +64,11 @@ def set_img_obj_data(img_obj):
|
|||||||
url = img_obj["url"]
|
url = img_obj["url"]
|
||||||
data = download_image(url)
|
data = download_image(url)
|
||||||
if data is None:
|
if data is None:
|
||||||
raise Exception("下载图片失败")
|
file_name = img_obj["file_name"]
|
||||||
|
raise Exception(f"{file_name}, 下载图片失败")
|
||||||
img_obj["data"] = data
|
img_obj["data"] = data
|
||||||
|
|
||||||
|
|
||||||
def save_images_to_directory(img_obj_list, directory_path):
|
|
||||||
try:
|
|
||||||
# 创建保存图片的目录(如果不存在)
|
|
||||||
os.makedirs(directory_path, exist_ok=True)
|
|
||||||
|
|
||||||
for idx, img_obj in enumerate(img_obj_list):
|
|
||||||
url = img_obj["url"]
|
|
||||||
data = img_obj["data"]
|
|
||||||
|
|
||||||
# 获取图片的扩展名(假设url以图片扩展名结尾)
|
|
||||||
extension = os.path.splitext(url)[1]
|
|
||||||
|
|
||||||
# 图片文件名,这里用序号作为文件名
|
|
||||||
file_name = f"image_{idx}{extension}"
|
|
||||||
file_path = os.path.join(directory_path, file_name)
|
|
||||||
|
|
||||||
# 将图片数据写入本地文件
|
|
||||||
with open(file_path, "wb") as file:
|
|
||||||
file.write(data)
|
|
||||||
|
|
||||||
print("图片保存成功!")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"图片保存失败:{e}")
|
|
||||||
|
|
||||||
|
|
||||||
def concatenate_images_vertically(img_obj_list, output_file):
|
def concatenate_images_vertically(img_obj_list, output_file):
|
||||||
try:
|
try:
|
||||||
# 计算拼接后的长图宽度和总高度
|
# 计算拼接后的长图宽度和总高度
|
||||||
@ -121,9 +94,9 @@ def concatenate_images_vertically(img_obj_list, output_file):
|
|||||||
# 保存拼接后的长图到本地
|
# 保存拼接后的长图到本地
|
||||||
long_image.save(output_file)
|
long_image.save(output_file)
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"拼接图片失败:{e}")
|
file_name = img_obj_list[0]["file_name"]
|
||||||
|
print(f"{file_name}, 拼接图片失败:{e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@ -135,24 +108,23 @@ def process_batch(lines):
|
|||||||
file_name, _, url = line.partition(" - ")
|
file_name, _, url = line.partition(" - ")
|
||||||
html_content = get_html(url)
|
html_content = get_html(url)
|
||||||
img_url_list = get_img_urls(html_content)
|
img_url_list = get_img_urls(html_content)
|
||||||
img_obj_list = create_img_obj_list(img_url_list)
|
img_obj_list = create_img_obj_list(img_url_list, file_name)
|
||||||
|
|
||||||
# 使用 ThreadPoolExecutor 创建线程池
|
# 使用 ThreadPoolExecutor 创建线程池
|
||||||
with ThreadPoolExecutor() as executor:
|
with ThreadPoolExecutor() as executor:
|
||||||
# 多线程处理图片下载和替换
|
# 多线程处理图片下载和替换
|
||||||
executor.map(set_img_obj_data, img_obj_list)
|
executor.map(set_img_obj_data, img_obj_list)
|
||||||
|
|
||||||
# save_images_to_directory(img_obj_list, directory_path="imgs")
|
|
||||||
concatenate_images_vertically(
|
concatenate_images_vertically(
|
||||||
img_obj_list, output_file=f"imgs/{file_name}.png"
|
img_obj_list, output_file=f"imgs/{file_name}.png"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
file_name = "input.txt"
|
task_file = "input.txt"
|
||||||
batch_size = 3 # 每个线程处理的行数
|
batch_size = 3 # 每个线程处理的行数
|
||||||
|
|
||||||
with open(file_name, "r", encoding="utf-8") as file:
|
with open(task_file, "r", encoding="utf-8") as file:
|
||||||
lines = file.readlines()
|
lines = file.readlines()
|
||||||
|
|
||||||
# 使用 ThreadPoolExecutor 创建线程池
|
# 使用 ThreadPoolExecutor 创建线程池
|
||||||
@ -161,3 +133,5 @@ if __name__ == "__main__":
|
|||||||
for i in range(0, len(lines), batch_size):
|
for i in range(0, len(lines), batch_size):
|
||||||
batch_lines = lines[i : i + batch_size]
|
batch_lines = lines[i : i + batch_size]
|
||||||
executor.submit(process_batch, batch_lines)
|
executor.submit(process_batch, batch_lines)
|
||||||
|
|
||||||
|
print("finish, 程序结束...")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user