import requests
from bs4 import BeautifulSoup


class HtmlParser:
    def __init__(self, html_content: str):
        self.html_content = html_content

    def get_img_url_list(self):
        soup = BeautifulSoup(self.html_content, "html.parser")
        img_tags = soup.find("div", class_="reading-content").find_all("img")
        return [img_tag.attrs["data-src"] for img_tag in img_tags]


class Requester:
    def fetch_html(self, url: str, task_name: str, max_retries=3):
        session = requests.Session()
        adapter = requests.adapters.HTTPAdapter(max_retries=max_retries)
        session.mount("http://", adapter)
        session.mount("https://", adapter)

        try:
            response = session.get(url)
            response.raise_for_status()
            return response.text
        except Exception as e:
            print(f"Error occurred while fetching HTML from {url}: {e}")
            raise Exception(f"{task_name}, 获取网页html失败")

    def fetch_image(self, img_url: str, max_retries=5):
        """
        通过给定的图片URL下载图片内容。

        参数：
                        img_url (str): 图片的URL地址。
                        max_retries (int, 可选): 下载失败时的最大重试次数。默认为5次。

        返回值：
                        bytes or None: 成功下载图片的二进制数据，若下载失败则返回None。

        注解：
                        这个函数通过发送HTTP请求下载图片文件。它使用`requests`库来获取URL返回的响应。
                        如果下载成功，函数将返回图片的二进制内容（bytes格式）。
                        如果下载失败，函数将尝试最多`max_retries`次重试，直到成功或达到重试次数上限。
                        在每次重试之间，函数会打印错误消息来指示重试进度。
                        如果重试次数用尽后仍然无法下载图片，函数将输出失败消息并返回None。

                        例子：
                        ```
                        image_url = "https://example.com/image.jpg"
                        image_data = download_image(image_url)
                        if image_data:
                                        # 处理图片数据...
                        else:
                                        print("无法下载图片，下载失败。")
                        ```
        """
        for retry in range(max_retries):
            try:
                with requests.get(img_url, stream=True) as response:
                    response.raise_for_status()
                    return response.content
            except Exception as e:
                if retry < max_retries - 1:
                    print(
                        f"Failed to download image, retrying ({retry+1}/{max_retries})..."
                    )
                else:
                    print("Failed to download image after multiple retries, skipping.")
                    return None