comic_book_downloader/web_parser.py
2023-08-19 23:39:18 +08:00

19 lines
498 B
Python

from bs4 import BeautifulSoup
from web_img import WebImg
class WebParser:
def __init__(self, html_content: str):
self.html_content = html_content
def parse_img_urls(self):
soup = BeautifulSoup(self.html_content, "html.parser")
img_tags = soup.find("div", class_="reading-content").find_all("img")
img_urls = []
for img_tag in img_tags:
img_url = img_tag.attrs["data-src"]
img_urls.append(img_url)
return img_urls