优化代码

This commit is contained in:
vincent 2023-07-21 16:59:38 +08:00
parent 4d2635fba8
commit ffd9e70346

48
main.py
View File

@ -2,12 +2,8 @@ import os
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from PIL import Image from PIL import Image
import io import io
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
def get_html(url, max_retries=3): def get_html(url, max_retries=3):
@ -52,10 +48,11 @@ def get_img_urls(html_content):
return img_urls return img_urls
def create_img_obj_list(img_url_list): def create_img_obj_list(img_url_list, file_name):
img_obj_list = [] img_obj_list = []
for url in img_url_list: for url in img_url_list:
obj = dict() obj = dict()
obj["file_name"] = file_name
obj["url"] = url obj["url"] = url
obj["data"] = None obj["data"] = None
img_obj_list.append(obj) img_obj_list.append(obj)
@ -67,35 +64,11 @@ def set_img_obj_data(img_obj):
url = img_obj["url"] url = img_obj["url"]
data = download_image(url) data = download_image(url)
if data is None: if data is None:
raise Exception("下载图片失败") file_name = img_obj["file_name"]
raise Exception(f"{file_name}, 下载图片失败")
img_obj["data"] = data img_obj["data"] = data
def save_images_to_directory(img_obj_list, directory_path):
try:
# 创建保存图片的目录(如果不存在)
os.makedirs(directory_path, exist_ok=True)
for idx, img_obj in enumerate(img_obj_list):
url = img_obj["url"]
data = img_obj["data"]
# 获取图片的扩展名假设url以图片扩展名结尾
extension = os.path.splitext(url)[1]
# 图片文件名,这里用序号作为文件名
file_name = f"image_{idx}{extension}"
file_path = os.path.join(directory_path, file_name)
# 将图片数据写入本地文件
with open(file_path, "wb") as file:
file.write(data)
print("图片保存成功!")
except Exception as e:
print(f"图片保存失败:{e}")
def concatenate_images_vertically(img_obj_list, output_file): def concatenate_images_vertically(img_obj_list, output_file):
try: try:
# 计算拼接后的长图宽度和总高度 # 计算拼接后的长图宽度和总高度
@ -120,10 +93,10 @@ def concatenate_images_vertically(img_obj_list, output_file):
# 保存拼接后的长图到本地 # 保存拼接后的长图到本地
long_image.save(output_file) long_image.save(output_file)
except Exception as e: except Exception as e:
print(f"拼接图片失败:{e}") file_name = img_obj_list[0]["file_name"]
print(f"{file_name}, 拼接图片失败:{e}")
return None return None
@ -135,24 +108,23 @@ def process_batch(lines):
file_name, _, url = line.partition(" - ") file_name, _, url = line.partition(" - ")
html_content = get_html(url) html_content = get_html(url)
img_url_list = get_img_urls(html_content) img_url_list = get_img_urls(html_content)
img_obj_list = create_img_obj_list(img_url_list) img_obj_list = create_img_obj_list(img_url_list, file_name)
# 使用 ThreadPoolExecutor 创建线程池 # 使用 ThreadPoolExecutor 创建线程池
with ThreadPoolExecutor() as executor: with ThreadPoolExecutor() as executor:
# 多线程处理图片下载和替换 # 多线程处理图片下载和替换
executor.map(set_img_obj_data, img_obj_list) executor.map(set_img_obj_data, img_obj_list)
# save_images_to_directory(img_obj_list, directory_path="imgs")
concatenate_images_vertically( concatenate_images_vertically(
img_obj_list, output_file=f"imgs/{file_name}.png" img_obj_list, output_file=f"imgs/{file_name}.png"
) )
if __name__ == "__main__": if __name__ == "__main__":
file_name = "input.txt" task_file = "input.txt"
batch_size = 3 # 每个线程处理的行数 batch_size = 3 # 每个线程处理的行数
with open(file_name, "r", encoding="utf-8") as file: with open(task_file, "r", encoding="utf-8") as file:
lines = file.readlines() lines = file.readlines()
# 使用 ThreadPoolExecutor 创建线程池 # 使用 ThreadPoolExecutor 创建线程池
@ -161,3 +133,5 @@ if __name__ == "__main__":
for i in range(0, len(lines), batch_size): for i in range(0, len(lines), batch_size):
batch_lines = lines[i : i + batch_size] batch_lines = lines[i : i + batch_size]
executor.submit(process_batch, batch_lines) executor.submit(process_batch, batch_lines)
print("finish, 程序结束...")