comic_book_downloader/web_parser.py
2023-08-20 14:36:26 +08:00

16 lines
472 B
Python

from bs4 import BeautifulSoup
class HtmlParser:
def __init__(self, html_content: str):
self.html_content = html_content
def get_img_url_list(self):
soup = BeautifulSoup(self.html_content, "html.parser")
img_tags = soup.find("div", class_="reading-content").find_all("img")
img_urls = []
for img_tag in img_tags:
img_url = img_tag.attrs["data-src"]
img_urls.append(img_url)
return img_urls