diff --git a/__pycache__/requester.cpython-310.pyc b/__pycache__/requester.cpython-310.pyc index 86d062f..5b90c81 100644 Binary files a/__pycache__/requester.cpython-310.pyc and b/__pycache__/requester.cpython-310.pyc differ diff --git a/__pycache__/web_img.cpython-310.pyc b/__pycache__/web_img.cpython-310.pyc index dd09ba5..7d88d0e 100644 Binary files a/__pycache__/web_img.cpython-310.pyc and b/__pycache__/web_img.cpython-310.pyc differ diff --git a/__pycache__/web_img_manager.cpython-310.pyc b/__pycache__/web_img_manager.cpython-310.pyc new file mode 100644 index 0000000..514bd33 Binary files /dev/null and b/__pycache__/web_img_manager.cpython-310.pyc differ diff --git a/__pycache__/web_parser.cpython-310.pyc b/__pycache__/web_parser.cpython-310.pyc index e33cb9c..2ca776b 100644 Binary files a/__pycache__/web_parser.cpython-310.pyc and b/__pycache__/web_parser.cpython-310.pyc differ diff --git a/img_manager.py b/img_manager.py new file mode 100644 index 0000000..090b2b3 --- /dev/null +++ b/img_manager.py @@ -0,0 +1,62 @@ +from requester import Requester +from web_img import WebImg + +from PIL import Image +import io +from web_img import WebImg +from requester import Requester + + +class ImgManager: + def __init__(self, img_url_list: list[str]): + self.img_url_list = img_url_list + self.img_list = self.__create_web_img_list() + + def __create_web_img_list(self): + img_list = [] + for url in self.img_url_list: + img = WebImg(self.task_name, url) + img_list.append(img) + return img_list + + def batch_fetch_images(self): + requester = Requester() + requester.batch_fetch_images_to_img_obj_list(self.img_list) + + def concatenate_images_vertically(self): + """ + 垂直拼接长图片 + """ + try: + # 计算拼接后的长图宽度和总高度 + max_width = max( + Image.open(io.BytesIO(web_img.data)).width for web_img in self.img_list + ) + total_height = sum( + Image.open(io.BytesIO(web_img.data)).height for web_img in self.img_list + ) + + # 创建一张新的长图 + long_image = Image.new( + "RGB", (max_width, total_height), color=(255, 255, 255) + ) + + # 依次将图片在垂直方向上拼接起来 + y_offset = 0 + for web_img in self.img_list: + img = Image.open(io.BytesIO(web_img.data)) + img_width, img_height = img.size + x_offset = (max_width - img_width) // 2 # 居中拼接 + long_image.paste(img, (x_offset, y_offset)) + y_offset += img_height + + return long_image + + except Exception as e: + task_name = self.img_list[0].task_name + print(f"{task_name}, 拼接图片失败:{e}") + return None + + def save_long_image(self): + long_image = self.concatenate_images_vertically() # 垂直拼接长图片 + long_image.save(f"output/{self.img_list[0].task_name}.png") # 保存长图到本地 diff --git a/main.py b/main.py index 132c6f3..213cff9 100644 --- a/main.py +++ b/main.py @@ -1,118 +1,7 @@ from concurrent.futures import ThreadPoolExecutor -from PIL import Image -import io -from web_img import WebImg from requester import Requester -from web_parser import WebParser - - -# def fetch_image(img_url: str, max_retries=5): -# """ -# 通过给定的图片URL下载图片内容。 - -# 参数: -# img_url (str): 图片的URL地址。 -# max_retries (int, 可选): 下载失败时的最大重试次数。默认为5次。 - -# 返回值: -# bytes or None: 成功下载图片的二进制数据,若下载失败则返回None。 - -# 注解: -# 这个函数通过发送HTTP请求下载图片文件。它使用`requests`库来获取URL返回的响应。 -# 如果下载成功,函数将返回图片的二进制内容(bytes格式)。 -# 如果下载失败,函数将尝试最多`max_retries`次重试,直到成功或达到重试次数上限。 -# 在每次重试之间,函数会打印错误消息来指示重试进度。 -# 如果重试次数用尽后仍然无法下载图片,函数将输出失败消息并返回None。 - -# 例子: -# ``` -# image_url = "https://example.com/image.jpg" -# image_data = download_image(image_url) -# if image_data: -# # 处理图片数据... -# else: -# print("无法下载图片,下载失败。") -# ``` -# """ -# for retry in range(max_retries): -# try: -# with requests.get(img_url, stream=True) as response: -# response.raise_for_status() -# return response.content -# except Exception as e: -# if retry < max_retries - 1: -# print( -# f"Failed to download image, retrying ({retry+1}/{max_retries})..." -# ) -# else: -# print("Failed to download image after multiple retries, skipping.") -# return None - - -def create_web_img_list(img_url_list, task_name): - img_obj_list = [] - for url in img_url_list: - img = WebImg(task_name, url) - img_obj_list.append(img) - - return img_obj_list - - -# def fetch_images_to_img_obj(web_img: WebImg): -# url = web_img.url -# data = fetch_image(url) -# if data is None: -# task_name = web_img.task_name -# print(f"{task_name}, 下载图片失败") -# raise Exception(f"{task_name}, 下载图片失败") -# web_img.data = data - - -# def batch_fetch_images_to_img_obj_list(web_img_list: list[WebImg]): -# """ -# 使用 ThreadPoolExecutor 创建线程池,对 img_obj_list 中的每个图片对象调用 set_img_obj_data 函数。 - -# Args: -# img_obj_list (list): 图片对象列表,每个对象包含图片的数据等信息。 - -# Returns: -# None -# """ -# with ThreadPoolExecutor() as executor: -# executor.map(fetch_images_to_img_obj, web_img_list) - - -def concatenate_images_vertically(web_img_list: list[WebImg]): - """ - 垂直拼接长图片 - """ - try: - # 计算拼接后的长图宽度和总高度 - max_width = max( - Image.open(io.BytesIO(web_img.data)).width for web_img in web_img_list - ) - total_height = sum( - Image.open(io.BytesIO(web_img.data)).height for web_img in web_img_list - ) - - # 创建一张新的长图 - long_image = Image.new("RGB", (max_width, total_height), color=(255, 255, 255)) - - # 依次将图片在垂直方向上拼接起来 - y_offset = 0 - for web_img in web_img_list: - img = Image.open(io.BytesIO(web_img.data)) - img_width, img_height = img.size - x_offset = (max_width - img_width) // 2 # 居中拼接 - long_image.paste(img, (x_offset, y_offset)) - y_offset += img_height - - return long_image - - except Exception as e: - task_name = web_img_list[0].task_name - print(f"{task_name}, 拼接图片失败:{e}") - return None +from web_parser import Parser +from web_img_manager import ImgManager def pre_batch_task(lines: list[str]): @@ -122,15 +11,14 @@ def pre_batch_task(lines: list[str]): for line in lines: line = line.strip() # 去掉每行开头和结尾的空白字符 if line: - requester = Requester() task_name, _, url = line.partition(" - ") # 解析出 HTML 文件名和 URL 地址 print(f"{task_name}, 开始下载") - html_content = requester.fetch_html(url, task_name) - img_url_list = WebParser(html_content).parse_img_urls() - web_img_list = create_web_img_list(img_url_list, task_name) - requester.batch_fetch_images_to_img_obj_list(web_img_list) - long_image = concatenate_images_vertically(web_img_list) # 垂直拼接长图片 - long_image.save(f"output/{task_name}.png") # 保存长图到本地 + html_content = Requester().fetch_html(url, task_name) + img_url_list = Parser(html_content).get_img_url_list() + + img_manager = ImgManager(img_url_list, task_name) + img_manager.batch_fetch_images() + img_manager.save_long_image() print(f"{task_name}, 完成!!") diff --git a/web_img_manager.py b/web_img_manager.py new file mode 100644 index 0000000..a6bccc0 --- /dev/null +++ b/web_img_manager.py @@ -0,0 +1,63 @@ +from requester import Requester +from web_img import WebImg + +from PIL import Image +import io +from web_img import WebImg +from requester import Requester + + +class ImgManager: + def __init__(self, img_url_list: list[str], task_name): + self.img_url_list = img_url_list + self.task_name = task_name + self.img_list = self.__create_web_img_list() + + def __create_web_img_list(self): + img_list = [] + for url in self.img_url_list: + img = WebImg(self.task_name, url) + img_list.append(img) + return img_list + + def batch_fetch_images(self): + requester = Requester() + requester.batch_fetch_images_to_img_obj_list(self.img_list) + + def concatenate_images_vertically(self): + """ + 垂直拼接长图片 + """ + try: + # 计算拼接后的长图宽度和总高度 + max_width = max( + Image.open(io.BytesIO(web_img.data)).width for web_img in self.img_list + ) + total_height = sum( + Image.open(io.BytesIO(web_img.data)).height for web_img in self.img_list + ) + + # 创建一张新的长图 + long_image = Image.new( + "RGB", (max_width, total_height), color=(255, 255, 255) + ) + + # 依次将图片在垂直方向上拼接起来 + y_offset = 0 + for web_img in self.img_list: + img = Image.open(io.BytesIO(web_img.data)) + img_width, img_height = img.size + x_offset = (max_width - img_width) // 2 # 居中拼接 + long_image.paste(img, (x_offset, y_offset)) + y_offset += img_height + + return long_image + + except Exception as e: + task_name = self.img_list[0].task_name + print(f"{task_name}, 拼接图片失败:{e}") + return None + + def save_long_image(self): + long_image = self.concatenate_images_vertically() # 垂直拼接长图片 + long_image.save(f"output/{self.img_list[0].task_name}.png") # 保存长图到本地 diff --git a/web_parser.py b/web_parser.py index 39d23ea..7d5c484 100644 --- a/web_parser.py +++ b/web_parser.py @@ -2,11 +2,11 @@ from bs4 import BeautifulSoup from web_img import WebImg -class WebParser: +class Parser: def __init__(self, html_content: str): self.html_content = html_content - def parse_img_urls(self): + def get_img_url_list(self): soup = BeautifulSoup(self.html_content, "html.parser") img_tags = soup.find("div", class_="reading-content").find_all("img") img_urls = [] @@ -14,5 +14,3 @@ class WebParser: img_url = img_tag.attrs["data-src"] img_urls.append(img_url) return img_urls - -