from concurrent.futures import ThreadPoolExecutor from web import Requester from web import HtmlParser from web_img import ImgManager def pre_batch_task(lines: list[str]): """ 每个线程的批次任务 """ for line in lines: line = line.strip() # 去掉每行开头和结尾的空白字符 if not line or line.startswith("#"): continue task_name, _, url = line.partition(" - ") # 解析出 HTML 文件名和 URL 地址 run_task(task_name, url) def read_lines(task_file): """ 从文件中读取所有行并返回一个包含行的列表。 参数: task_file (file): 任务文件。 返回值: lines (list): 包含文件中所有行的列表。 """ with open(task_file, "r", encoding="utf-8") as file: lines = file.readlines() return lines def process_lines_in_batches(lines: list[str], batch_size: int): """ 将行数据按照指定的批次大小,利用线程池并行处理。 参数: lines (list): 包含所有行的列表。 batch_size (int): 每个批次处理的行数。 """ # 使用 ThreadPoolExecutor 创建线程池 with ThreadPoolExecutor() as executor: # 按照 batch_size 将行分批次处理 for i in range(0, len(lines), batch_size): batch_lines = lines[i : i + batch_size] executor.submit(pre_batch_task, batch_lines) def run_task(task_name: str, url: str): """ 执行任务 """ print(f"{task_name}, 开始下载") html_content = Requester().fetch_html(url, task_name) img_url_list = HtmlParser(html_content).get_img_url_list() img_manager = ImgManager(img_url_list, task_name) img_manager.batch_fill_image_data() img_manager.save_long_image() print(f"{task_name}, 完成!!") if __name__ == "__main__": task_file = "task.txt" per_thread_line_size = 3 # 每个线程处理的行数 lines = read_lines(task_file) process_lines_in_batches(lines, per_thread_line_size) print("finish, 程序结束...")