diff --git a/main.py b/main.py index d7d66dd..18812ea 100644 --- a/main.py +++ b/main.py @@ -10,19 +10,13 @@ def pre_batch_task(lines: list[str]): """ for line in lines: line = line.strip() # 去掉每行开头和结尾的空白字符 - if line: - task_name, _, url = line.partition(" - ") # 解析出 HTML 文件名和 URL 地址 - print(f"{task_name}, 开始下载") - html_content = Requester().fetch_html(url, task_name) - img_url_list = Parser(html_content).get_img_url_list() - - img_manager = ImgManager(img_url_list, task_name) - img_manager.batch_fill_image_data() - img_manager.save_long_image() - print(f"{task_name}, 完成!!") + if not line or line.startswith("#"): + continue + task_name, _, url = line.partition(" - ") # 解析出 HTML 文件名和 URL 地址 + run_task(task_name, url) -def read_lines_from_file(task_file): +def read_lines(task_file): """ 从文件中读取所有行并返回一个包含行的列表。 @@ -37,7 +31,7 @@ def read_lines_from_file(task_file): return lines -def process_lines_in_batches(lines, batch_size): +def process_lines_in_batches(lines: list[str], batch_size: int): """ 将行数据按照指定的批次大小,利用线程池并行处理。 @@ -54,10 +48,24 @@ def process_lines_in_batches(lines, batch_size): executor.submit(pre_batch_task, batch_lines) +def run_task(task_name: str, url: str): + """ + 执行任务 + """ + print(f"{task_name}, 开始下载") + html_content = Requester().fetch_html(url, task_name) + img_url_list = Parser(html_content).get_img_url_list() + + img_manager = ImgManager(img_url_list, task_name) + img_manager.batch_fill_image_data() + img_manager.save_long_image() + print(f"{task_name}, 完成!!") + + if __name__ == "__main__": task_file = "input.txt" batch_size = 3 # 每个线程处理的行数 - lines = read_lines_from_file(task_file) + lines = read_lines(task_file) process_lines_in_batches(lines, batch_size) print("finish, 程序结束...")