From ffd9e70346e660bded33be293e551102c1538446 Mon Sep 17 00:00:00 2001 From: vincent Date: Fri, 21 Jul 2023 16:59:38 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 48 +++++++++++------------------------------------- 1 file changed, 11 insertions(+), 37 deletions(-) diff --git a/main.py b/main.py index d6fbf12..b7623a9 100644 --- a/main.py +++ b/main.py @@ -2,12 +2,8 @@ import os import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor - - from PIL import Image import io -from reportlab.lib.pagesizes import letter -from reportlab.pdfgen import canvas def get_html(url, max_retries=3): @@ -52,10 +48,11 @@ def get_img_urls(html_content): return img_urls -def create_img_obj_list(img_url_list): +def create_img_obj_list(img_url_list, file_name): img_obj_list = [] for url in img_url_list: obj = dict() + obj["file_name"] = file_name obj["url"] = url obj["data"] = None img_obj_list.append(obj) @@ -67,35 +64,11 @@ def set_img_obj_data(img_obj): url = img_obj["url"] data = download_image(url) if data is None: - raise Exception("下载图片失败") + file_name = img_obj["file_name"] + raise Exception(f"{file_name}, 下载图片失败") img_obj["data"] = data -def save_images_to_directory(img_obj_list, directory_path): - try: - # 创建保存图片的目录(如果不存在) - os.makedirs(directory_path, exist_ok=True) - - for idx, img_obj in enumerate(img_obj_list): - url = img_obj["url"] - data = img_obj["data"] - - # 获取图片的扩展名(假设url以图片扩展名结尾) - extension = os.path.splitext(url)[1] - - # 图片文件名,这里用序号作为文件名 - file_name = f"image_{idx}{extension}" - file_path = os.path.join(directory_path, file_name) - - # 将图片数据写入本地文件 - with open(file_path, "wb") as file: - file.write(data) - - print("图片保存成功!") - except Exception as e: - print(f"图片保存失败:{e}") - - def concatenate_images_vertically(img_obj_list, output_file): try: # 计算拼接后的长图宽度和总高度 @@ -120,10 +93,10 @@ def concatenate_images_vertically(img_obj_list, output_file): # 保存拼接后的长图到本地 long_image.save(output_file) - except Exception as e: - print(f"拼接图片失败:{e}") + file_name = img_obj_list[0]["file_name"] + print(f"{file_name}, 拼接图片失败:{e}") return None @@ -135,24 +108,23 @@ def process_batch(lines): file_name, _, url = line.partition(" - ") html_content = get_html(url) img_url_list = get_img_urls(html_content) - img_obj_list = create_img_obj_list(img_url_list) + img_obj_list = create_img_obj_list(img_url_list, file_name) # 使用 ThreadPoolExecutor 创建线程池 with ThreadPoolExecutor() as executor: # 多线程处理图片下载和替换 executor.map(set_img_obj_data, img_obj_list) - # save_images_to_directory(img_obj_list, directory_path="imgs") concatenate_images_vertically( img_obj_list, output_file=f"imgs/{file_name}.png" ) if __name__ == "__main__": - file_name = "input.txt" + task_file = "input.txt" batch_size = 3 # 每个线程处理的行数 - with open(file_name, "r", encoding="utf-8") as file: + with open(task_file, "r", encoding="utf-8") as file: lines = file.readlines() # 使用 ThreadPoolExecutor 创建线程池 @@ -161,3 +133,5 @@ if __name__ == "__main__": for i in range(0, len(lines), batch_size): batch_lines = lines[i : i + batch_size] executor.submit(process_batch, batch_lines) + + print("finish, 程序结束...")