From 9cab9ac77782c738042feada54a7b3ea0c9a60ff Mon Sep 17 00:00:00 2001 From: vincent Date: Fri, 21 Jul 2023 13:47:59 +0800 Subject: [PATCH] =?UTF-8?q?=E9=87=8C=E7=A8=8B=E7=A2=91v2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index 15409a6..aee6c9f 100644 --- a/main.py +++ b/main.py @@ -3,10 +3,12 @@ import requests from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor + from PIL import Image import io from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas +from utils import ArrayUtil def get_html(url, max_retries=3): @@ -24,7 +26,7 @@ def get_html(url, max_retries=3): raise Exception("获取网页html失败") -def download_image(img_url, max_retries=3): +def download_image(img_url, max_retries=5): for retry in range(max_retries): try: with requests.get(img_url, stream=True) as response: @@ -59,13 +61,10 @@ def create_img_obj_list(img_url_list): obj["data"] = None img_obj_list.append(obj) - # TODO remember to delete - if len(img_obj_list) > 2: - break return img_obj_list -def fill_img_obj(img_obj): +def set_img_obj_data(img_obj): url = img_obj["url"] data = download_image(url) if data is None: @@ -104,7 +103,7 @@ def generate_pdf_from_images(img_obj_list, output_file): for img_obj in img_obj_list: # 从图片对象的 data 字段中创建图像对象 - img_data = img_obj['data'] + img_data = img_obj["data"] img = Image.open(io.BytesIO(img_data)) # 将图像大小调整为 PDF 页面大小 @@ -125,6 +124,38 @@ def generate_pdf_from_images(img_obj_list, output_file): except Exception as e: print(f"PDF 生成失败:{e}") + +def concatenate_images_vertically(img_obj_list, output_file): + try: + # 计算拼接后的长图宽度和总高度 + max_width = max( + Image.open(io.BytesIO(img_obj["data"])).width for img_obj in img_obj_list + ) + total_height = sum( + Image.open(io.BytesIO(img_obj["data"])).height for img_obj in img_obj_list + ) + + # 创建一张新的长图 + long_image = Image.new("RGB", (max_width, total_height), color=(255, 255, 255)) + + # 依次将图片在垂直方向上拼接起来 + y_offset = 0 + for img_obj in img_obj_list: + img = Image.open(io.BytesIO(img_obj["data"])) + img_width, img_height = img.size + x_offset = (max_width - img_width) // 2 # 居中拼接 + long_image.paste(img, (x_offset, y_offset)) + y_offset += img_height + + # 保存拼接后的长图到本地 + # long_image.save(output_file) + + + except Exception as e: + print(f"拼接图片失败:{e}") + return None + + def process_batch(lines): for line in lines: line = line.strip() # 去掉每行开头和结尾的空白字符 @@ -138,10 +169,12 @@ def process_batch(lines): # 使用 ThreadPoolExecutor 创建线程池 with ThreadPoolExecutor() as executor: # 多线程处理图片下载和替换 - executor.map(fill_img_obj, img_obj_list) + executor.map(set_img_obj_data, img_obj_list) # save_images_to_directory(img_obj_list, directory_path="imgs") - generate_pdf_from_images(img_obj_list, output_file=f"imgs/{file_name}.pdf") + concatenate_images_vertically( + img_obj_list, output_file=f"imgs/{file_name}.pdf" + ) if __name__ == "__main__":