from concurrent.futures import ThreadPoolExecutor from PIL import Image import io from web_img import WebImg from requester import Requester from web_parser import WebParser # def fetch_image(img_url: str, max_retries=5): # """ # 通过给定的图片URL下载图片内容。 # 参数: # img_url (str): 图片的URL地址。 # max_retries (int, 可选): 下载失败时的最大重试次数。默认为5次。 # 返回值: # bytes or None: 成功下载图片的二进制数据,若下载失败则返回None。 # 注解: # 这个函数通过发送HTTP请求下载图片文件。它使用`requests`库来获取URL返回的响应。 # 如果下载成功,函数将返回图片的二进制内容(bytes格式)。 # 如果下载失败,函数将尝试最多`max_retries`次重试,直到成功或达到重试次数上限。 # 在每次重试之间,函数会打印错误消息来指示重试进度。 # 如果重试次数用尽后仍然无法下载图片,函数将输出失败消息并返回None。 # 例子: # ``` # image_url = "https://example.com/image.jpg" # image_data = download_image(image_url) # if image_data: # # 处理图片数据... # else: # print("无法下载图片,下载失败。") # ``` # """ # for retry in range(max_retries): # try: # with requests.get(img_url, stream=True) as response: # response.raise_for_status() # return response.content # except Exception as e: # if retry < max_retries - 1: # print( # f"Failed to download image, retrying ({retry+1}/{max_retries})..." # ) # else: # print("Failed to download image after multiple retries, skipping.") # return None def create_web_img_list(img_url_list, task_name): img_obj_list = [] for url in img_url_list: img = WebImg(task_name, url) img_obj_list.append(img) return img_obj_list # def fetch_images_to_img_obj(web_img: WebImg): # url = web_img.url # data = fetch_image(url) # if data is None: # task_name = web_img.task_name # print(f"{task_name}, 下载图片失败") # raise Exception(f"{task_name}, 下载图片失败") # web_img.data = data # def batch_fetch_images_to_img_obj_list(web_img_list: list[WebImg]): # """ # 使用 ThreadPoolExecutor 创建线程池,对 img_obj_list 中的每个图片对象调用 set_img_obj_data 函数。 # Args: # img_obj_list (list): 图片对象列表,每个对象包含图片的数据等信息。 # Returns: # None # """ # with ThreadPoolExecutor() as executor: # executor.map(fetch_images_to_img_obj, web_img_list) def concatenate_images_vertically(web_img_list: list[WebImg]): """ 垂直拼接长图片 """ try: # 计算拼接后的长图宽度和总高度 max_width = max( Image.open(io.BytesIO(web_img.data)).width for web_img in web_img_list ) total_height = sum( Image.open(io.BytesIO(web_img.data)).height for web_img in web_img_list ) # 创建一张新的长图 long_image = Image.new("RGB", (max_width, total_height), color=(255, 255, 255)) # 依次将图片在垂直方向上拼接起来 y_offset = 0 for web_img in web_img_list: img = Image.open(io.BytesIO(web_img.data)) img_width, img_height = img.size x_offset = (max_width - img_width) // 2 # 居中拼接 long_image.paste(img, (x_offset, y_offset)) y_offset += img_height return long_image except Exception as e: task_name = web_img_list[0].task_name print(f"{task_name}, 拼接图片失败:{e}") return None def pre_batch_task(lines: list[str]): """ 每个线程的批次任务 """ for line in lines: line = line.strip() # 去掉每行开头和结尾的空白字符 if line: requester = Requester() task_name, _, url = line.partition(" - ") # 解析出 HTML 文件名和 URL 地址 print(f"{task_name}, 开始下载") html_content = requester.fetch_html(url, task_name) img_url_list = WebParser(html_content).parse_img_urls() web_img_list = create_web_img_list(img_url_list, task_name) requester.batch_fetch_images_to_img_obj_list(web_img_list) long_image = concatenate_images_vertically(web_img_list) # 垂直拼接长图片 long_image.save(f"output/{task_name}.png") # 保存长图到本地 print(f"{task_name}, 完成!!") def read_lines_from_file(task_file): """ 从文件中读取所有行并返回一个包含行的列表。 参数: task_file (file): 任务文件。 返回值: lines (list): 包含文件中所有行的列表。 """ with open(task_file, "r", encoding="utf-8") as file: lines = file.readlines() return lines def process_lines_in_batches(lines, batch_size): """ 将行数据按照指定的批次大小,利用线程池并行处理。 参数: lines (list): 包含所有行的列表。 batch_size (int): 每个批次处理的行数。 """ # 使用 ThreadPoolExecutor 创建线程池 with ThreadPoolExecutor() as executor: # 按照 batch_size 将行分批次处理 for i in range(0, len(lines), batch_size): batch_lines = lines[i : i + batch_size] executor.submit(pre_batch_task, batch_lines) if __name__ == "__main__": task_file = "input.txt" batch_size = 3 # 每个线程处理的行数 lines = read_lines_from_file(task_file) process_lines_in_batches(lines, batch_size) print("finish, 程序结束...")