From cfb8c9e0786361e7ed289a72c14b540f4a0080d6 Mon Sep 17 00:00:00 2001 From: vincent Date: Sun, 20 Aug 2023 12:19:08 +0800 Subject: [PATCH] =?UTF-8?q?=E8=BF=81=E7=A7=BBweb=5Fimg=5Fmanager=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E5=88=B0web=5Fimg=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 4 +-- requester.py | 18 ----------- web_img.py | 77 ++++++++++++++++++++++++++++++++++++++++++++-- web_img_manager.py | 63 ------------------------------------- web_parser.py | 1 - 5 files changed, 76 insertions(+), 87 deletions(-) delete mode 100644 web_img_manager.py diff --git a/main.py b/main.py index 213cff9..d7d66dd 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,7 @@ from concurrent.futures import ThreadPoolExecutor from requester import Requester from web_parser import Parser -from web_img_manager import ImgManager +from web_img import ImgManager def pre_batch_task(lines: list[str]): @@ -17,7 +17,7 @@ def pre_batch_task(lines: list[str]): img_url_list = Parser(html_content).get_img_url_list() img_manager = ImgManager(img_url_list, task_name) - img_manager.batch_fetch_images() + img_manager.batch_fill_image_data() img_manager.save_long_image() print(f"{task_name}, 完成!!") diff --git a/requester.py b/requester.py index 27cf08b..3906f89 100644 --- a/requester.py +++ b/requester.py @@ -1,10 +1,5 @@ -import os import requests -from bs4 import BeautifulSoup from concurrent.futures import ThreadPoolExecutor -from PIL import Image -import io -from web_img import WebImg class Requester: @@ -63,16 +58,3 @@ class Requester: else: print("Failed to download image after multiple retries, skipping.") return None - - def fetch_images_to_img_obj(self, web_img: WebImg): - url = web_img.url - data = self.fetch_image(url) - if data is None: - task_name = web_img.task_name - print(f"{task_name}, 下载图片失败") - raise Exception(f"{task_name}, 下载图片失败") - web_img.data = data - - def batch_fetch_images_to_img_obj_list(self, web_img_list: list[WebImg]): - with ThreadPoolExecutor() as executor: - executor.map(self.fetch_images_to_img_obj, web_img_list) diff --git a/web_img.py b/web_img.py index 3550c7a..9da1bcc 100644 --- a/web_img.py +++ b/web_img.py @@ -1,5 +1,76 @@ +from PIL import Image +import io +from requester import Requester +from concurrent.futures import ThreadPoolExecutor + + class WebImg: - def __init__(self, file_name, url): - self.task_name = file_name + def __init__(self, task_name: str, url: str): + self.task_name = task_name self.url = url - self.data = None + self.data = bytearray() + + def fill_img_data(self): + requester = Requester() + url = self.url + data = requester.fetch_image(url) + if data is None: + print(f"{self.task_name}, 下载图片失败") + raise Exception(f"{self.task_name}, 下载图片失败") + self.data = data + + +class ImgManager: + def __init__(self, img_url_list: list[str], task_name: str): + self.img_url_list = img_url_list + self.task_name = task_name + self.img_list = self.__create_web_img_list() + + def __create_web_img_list(self): + img_list = [] + for url in self.img_url_list: + img = WebImg(self.task_name, url) + img_list.append(img) + return img_list + + def batch_fill_image_data(self): + with ThreadPoolExecutor() as executor: + executor.map(lambda web_img: web_img.fill_img_data(), self.img_list) + + def concatenate_images_vertically(self): + """ + 垂直拼接长图片 + """ + try: + # 计算拼接后的长图宽度和总高度 + max_width = max( + Image.open(io.BytesIO(web_img.data)).width for web_img in self.img_list + ) + total_height = sum( + Image.open(io.BytesIO(web_img.data)).height for web_img in self.img_list + ) + + # 创建一张新的长图 + long_image = Image.new( + "RGB", (max_width, total_height), color=(255, 255, 255) + ) + + # 依次将图片在垂直方向上拼接起来 + y_offset = 0 + for web_img in self.img_list: + img = Image.open(io.BytesIO(web_img.data)) + img_width, img_height = img.size + x_offset = (max_width - img_width) // 2 # 居中拼接 + long_image.paste(img, (x_offset, y_offset)) + y_offset += img_height + + return long_image + + except Exception as e: + task_name = self.img_list[0].task_name + print(f"{task_name}, 拼接图片失败:{e}") + return None + + def save_long_image(self): + long_image = self.concatenate_images_vertically() # 垂直拼接长图片 + long_image.save(f"output/{self.img_list[0].task_name}.png") # 保存长图到本地 diff --git a/web_img_manager.py b/web_img_manager.py deleted file mode 100644 index a6bccc0..0000000 --- a/web_img_manager.py +++ /dev/null @@ -1,63 +0,0 @@ -from requester import Requester -from web_img import WebImg - -from PIL import Image -import io -from web_img import WebImg -from requester import Requester - - -class ImgManager: - def __init__(self, img_url_list: list[str], task_name): - self.img_url_list = img_url_list - self.task_name = task_name - self.img_list = self.__create_web_img_list() - - def __create_web_img_list(self): - img_list = [] - for url in self.img_url_list: - img = WebImg(self.task_name, url) - img_list.append(img) - return img_list - - def batch_fetch_images(self): - requester = Requester() - requester.batch_fetch_images_to_img_obj_list(self.img_list) - - def concatenate_images_vertically(self): - """ - 垂直拼接长图片 - """ - try: - # 计算拼接后的长图宽度和总高度 - max_width = max( - Image.open(io.BytesIO(web_img.data)).width for web_img in self.img_list - ) - total_height = sum( - Image.open(io.BytesIO(web_img.data)).height for web_img in self.img_list - ) - - # 创建一张新的长图 - long_image = Image.new( - "RGB", (max_width, total_height), color=(255, 255, 255) - ) - - # 依次将图片在垂直方向上拼接起来 - y_offset = 0 - for web_img in self.img_list: - img = Image.open(io.BytesIO(web_img.data)) - img_width, img_height = img.size - x_offset = (max_width - img_width) // 2 # 居中拼接 - long_image.paste(img, (x_offset, y_offset)) - y_offset += img_height - - return long_image - - except Exception as e: - task_name = self.img_list[0].task_name - print(f"{task_name}, 拼接图片失败:{e}") - return None - - def save_long_image(self): - long_image = self.concatenate_images_vertically() # 垂直拼接长图片 - long_image.save(f"output/{self.img_list[0].task_name}.png") # 保存长图到本地 diff --git a/web_parser.py b/web_parser.py index 7d5c484..83b0eed 100644 --- a/web_parser.py +++ b/web_parser.py @@ -1,5 +1,4 @@ from bs4 import BeautifulSoup -from web_img import WebImg class Parser: