update

2023-08-20 01:05:56 +08:00 · 2023-08-20 01:05:56 +08:00 · 510b4bf0d4
commit 510b4bf0d4
parent dacae684d7
8 changed files with 135 additions and 124 deletions
--- a/pycache/requester.cpython-310.pyc
+++ b/pycache/requester.cpython-310.pyc
--- a/pycache/web_img.cpython-310.pyc
+++ b/pycache/web_img.cpython-310.pyc
--- a/pycache/web_img_manager.cpython-310.pyc
+++ b/pycache/web_img_manager.cpython-310.pyc
--- a/pycache/web_parser.cpython-310.pyc
+++ b/pycache/web_parser.cpython-310.pyc
--- a/img_manager.py
+++ b/img_manager.py
@ -0,0 +1,62 @@
+from requester import Requester
+from web_img import WebImg
+
+from PIL import Image
+import io
+from web_img import WebImg
+from requester import Requester
+
+
+class ImgManager:
+    def __init__(self, img_url_list: list[str]):
+        self.img_url_list = img_url_list
+        self.img_list = self.__create_web_img_list()
+
+    def __create_web_img_list(self):
+        img_list = []
+        for url in self.img_url_list:
+            img = WebImg(self.task_name, url)
+            img_list.append(img)
+        return img_list
+
+    def batch_fetch_images(self):
+        requester = Requester()
+        requester.batch_fetch_images_to_img_obj_list(self.img_list)
+
+    def concatenate_images_vertically(self):
+        """
+        垂直拼接长图片
+        """
+        try:
+            # 计算拼接后的长图宽度和总高度
+            max_width = max(
+                Image.open(io.BytesIO(web_img.data)).width for web_img in self.img_list
+            )
+            total_height = sum(
+                Image.open(io.BytesIO(web_img.data)).height for web_img in self.img_list
+            )
+
+            # 创建一张新的长图
+            long_image = Image.new(
+                "RGB", (max_width, total_height), color=(255, 255, 255)
+            )
+
+            # 依次将图片在垂直方向上拼接起来
+            y_offset = 0
+            for web_img in self.img_list:
+                img = Image.open(io.BytesIO(web_img.data))
+                img_width, img_height = img.size
+                x_offset = (max_width - img_width) // 2  # 居中拼接
+                long_image.paste(img, (x_offset, y_offset))
+                y_offset += img_height
+
+            return long_image
+
+        except Exception as e:
+            task_name = self.img_list[0].task_name
+            print(f"{task_name}, 拼接图片失败：{e}")
+            return None
+
+    def save_long_image(self):
+        long_image = self.concatenate_images_vertically()  # 垂直拼接长图片
+        long_image.save(f"output/{self.img_list[0].task_name}.png")  # 保存长图到本地
--- a/main.py
+++ b/main.py
@ -1,118 +1,7 @@
 from concurrent.futures import ThreadPoolExecutor
-from PIL import Image
-import io
-from web_img import WebImg
 from requester import Requester
-from web_parser import WebParser
-
-
-# def fetch_image(img_url: str, max_retries=5):
-#     """
-#     通过给定的图片URL下载图片内容。
-
-#     参数：
-#         img_url (str): 图片的URL地址。
-#         max_retries (int, 可选): 下载失败时的最大重试次数。默认为5次。
-
-#     返回值：
-#         bytes or None: 成功下载图片的二进制数据，若下载失败则返回None。
-
-#     注解：
-#         这个函数通过发送HTTP请求下载图片文件。它使用`requests`库来获取URL返回的响应。
-#         如果下载成功，函数将返回图片的二进制内容（bytes格式）。
-#         如果下载失败，函数将尝试最多`max_retries`次重试，直到成功或达到重试次数上限。
-#         在每次重试之间，函数会打印错误消息来指示重试进度。
-#         如果重试次数用尽后仍然无法下载图片，函数将输出失败消息并返回None。
-
-#         例子：
-#         ```
-#         image_url = "https://example.com/image.jpg"
-#         image_data = download_image(image_url)
-#         if image_data:
-#             # 处理图片数据...
-#         else:
-#             print("无法下载图片，下载失败。")
-#         ```
-#     """
-#     for retry in range(max_retries):
-#         try:
-#             with requests.get(img_url, stream=True) as response:
-#                 response.raise_for_status()
-#                 return response.content
-#         except Exception as e:
-#             if retry < max_retries - 1:
-#                 print(
-#                     f"Failed to download image, retrying ({retry+1}/{max_retries})..."
-#                 )
-#             else:
-#                 print("Failed to download image after multiple retries, skipping.")
-#                 return None
-
-
-def create_web_img_list(img_url_list, task_name):
-    img_obj_list = []
-    for url in img_url_list:
-        img = WebImg(task_name, url)
-        img_obj_list.append(img)
-
-    return img_obj_list
-
-
-# def fetch_images_to_img_obj(web_img: WebImg):
-#     url = web_img.url
-#     data = fetch_image(url)
-#     if data is None:
-#         task_name = web_img.task_name
-#         print(f"{task_name}, 下载图片失败")
-#         raise Exception(f"{task_name}, 下载图片失败")
-#     web_img.data = data
-
-
-# def batch_fetch_images_to_img_obj_list(web_img_list: list[WebImg]):
-#     """
-#     使用 ThreadPoolExecutor 创建线程池，对 img_obj_list 中的每个图片对象调用 set_img_obj_data 函数。
-
-#     Args:
-#         img_obj_list (list): 图片对象列表，每个对象包含图片的数据等信息。
-
-#     Returns:
-#         None
-#     """
-#     with ThreadPoolExecutor() as executor:
-#         executor.map(fetch_images_to_img_obj, web_img_list)
-
-
-def concatenate_images_vertically(web_img_list: list[WebImg]):
-    """
-    垂直拼接长图片
-    """
-    try:
-        # 计算拼接后的长图宽度和总高度
-        max_width = max(
-            Image.open(io.BytesIO(web_img.data)).width for web_img in web_img_list
-        )
-        total_height = sum(
-            Image.open(io.BytesIO(web_img.data)).height for web_img in web_img_list
-        )
-
-        # 创建一张新的长图
-        long_image = Image.new("RGB", (max_width, total_height), color=(255, 255, 255))
-
-        # 依次将图片在垂直方向上拼接起来
-        y_offset = 0
-        for web_img in web_img_list:
-            img = Image.open(io.BytesIO(web_img.data))
-            img_width, img_height = img.size
-            x_offset = (max_width - img_width) // 2  # 居中拼接
-            long_image.paste(img, (x_offset, y_offset))
-            y_offset += img_height
-
-        return long_image
-
-    except Exception as e:
-        task_name = web_img_list[0].task_name
-        print(f"{task_name}, 拼接图片失败：{e}")
-        return None
+from web_parser import Parser
+from web_img_manager import ImgManager


 def pre_batch_task(lines: list[str]):
@ -122,15 +11,14 @@ def pre_batch_task(lines: list[str]):
    for line in lines:
        line = line.strip()  # 去掉每行开头和结尾的空白字符
        if line:
-            requester = Requester()
            task_name, _, url = line.partition(" - ")  # 解析出 HTML 文件名和 URL 地址
            print(f"{task_name}, 开始下载")
-            html_content = requester.fetch_html(url, task_name)
-            img_url_list = WebParser(html_content).parse_img_urls()
-            web_img_list = create_web_img_list(img_url_list, task_name)
-            requester.batch_fetch_images_to_img_obj_list(web_img_list)
-            long_image = concatenate_images_vertically(web_img_list)  # 垂直拼接长图片
-            long_image.save(f"output/{task_name}.png")  # 保存长图到本地
+            html_content = Requester().fetch_html(url, task_name)
+            img_url_list = Parser(html_content).get_img_url_list()
+
+            img_manager = ImgManager(img_url_list, task_name)
+            img_manager.batch_fetch_images()
+            img_manager.save_long_image()
            print(f"{task_name}, 完成!!")


--- a/web_img_manager.py
+++ b/web_img_manager.py
@ -0,0 +1,63 @@
+from requester import Requester
+from web_img import WebImg
+
+from PIL import Image
+import io
+from web_img import WebImg
+from requester import Requester
+
+
+class ImgManager:
+    def __init__(self, img_url_list: list[str], task_name):
+        self.img_url_list = img_url_list
+        self.task_name = task_name
+        self.img_list = self.__create_web_img_list()
+
+    def __create_web_img_list(self):
+        img_list = []
+        for url in self.img_url_list:
+            img = WebImg(self.task_name, url)
+            img_list.append(img)
+        return img_list
+
+    def batch_fetch_images(self):
+        requester = Requester()
+        requester.batch_fetch_images_to_img_obj_list(self.img_list)
+
+    def concatenate_images_vertically(self):
+        """
+        垂直拼接长图片
+        """
+        try:
+            # 计算拼接后的长图宽度和总高度
+            max_width = max(
+                Image.open(io.BytesIO(web_img.data)).width for web_img in self.img_list
+            )
+            total_height = sum(
+                Image.open(io.BytesIO(web_img.data)).height for web_img in self.img_list
+            )
+
+            # 创建一张新的长图
+            long_image = Image.new(
+                "RGB", (max_width, total_height), color=(255, 255, 255)
+            )
+
+            # 依次将图片在垂直方向上拼接起来
+            y_offset = 0
+            for web_img in self.img_list:
+                img = Image.open(io.BytesIO(web_img.data))
+                img_width, img_height = img.size
+                x_offset = (max_width - img_width) // 2  # 居中拼接
+                long_image.paste(img, (x_offset, y_offset))
+                y_offset += img_height
+
+            return long_image
+
+        except Exception as e:
+            task_name = self.img_list[0].task_name
+            print(f"{task_name}, 拼接图片失败：{e}")
+            return None
+
+    def save_long_image(self):
+        long_image = self.concatenate_images_vertically()  # 垂直拼接长图片
+        long_image.save(f"output/{self.img_list[0].task_name}.png")  # 保存长图到本地
--- a/web_parser.py
+++ b/web_parser.py
@ -2,11 +2,11 @@ from bs4 import BeautifulSoup
 from web_img import WebImg


-class WebParser:
+class Parser:
    def __init__(self, html_content: str):
        self.html_content = html_content

-    def parse_img_urls(self):
+    def get_img_url_list(self):
        soup = BeautifulSoup(self.html_content, "html.parser")
        img_tags = soup.find("div", class_="reading-content").find_all("img")
        img_urls = []
@ -14,5 +14,3 @@ class WebParser:
            img_url = img_tag.attrs["data-src"]
            img_urls.append(img_url)
        return img_urls
-
-