comic_book_downloader/main.py

from concurrent.futures import ThreadPoolExecutor
from web import Requester
from web import HtmlParser
from web_img import ImgManager


def pre_batch_task(lines: list[str]):
    """
    每个线程的批次任务
    """
    for line in lines:
        line = line.strip()  # 去掉每行开头和结尾的空白字符
        if not line or line.startswith("#"):
            continue
        task_name, _, url = line.partition(" - ")  # 解析出 HTML 文件名和 URL 地址
        run_task(task_name, url)


def read_lines(task_file):
    """
    从文件中读取所有行并返回一个包含行的列表。

    参数：
        task_file (file): 任务文件。

    返回值：
        lines (list): 包含文件中所有行的列表。
    """
    with open(task_file, "r", encoding="utf-8") as file:
        lines = file.readlines()
    return lines


def process_lines_in_batches(lines: list[str], batch_size: int):
    """
    将行数据按照指定的批次大小，利用线程池并行处理。

    参数：
        lines (list): 包含所有行的列表。
        batch_size (int): 每个批次处理的行数。

    """
    # 使用 ThreadPoolExecutor 创建线程池
    with ThreadPoolExecutor() as executor:
        # 按照 batch_size 将行分批次处理
        for i in range(0, len(lines), batch_size):
            batch_lines = lines[i : i + batch_size]
            executor.submit(pre_batch_task, batch_lines)


def run_task(task_name: str, url: str):
    """
    执行任务
    """
    print(f"{task_name}, 开始下载")
    html_content = Requester().fetch_html(url, task_name)
    img_url_list = HtmlParser(html_content).get_img_url_list()

    img_manager = ImgManager(img_url_list, task_name)
    img_manager.batch_fill_image_data()
    img_manager.save_long_image()
    print(f"{task_name}, 完成!!")


if __name__ == "__main__":
    task_file = "task.txt"
    per_thread_line_size = 3  # 每个线程处理的行数

    lines = read_lines(task_file)
    process_lines_in_batches(lines, per_thread_line_size)
    print("finish, 程序结束...")