16 lines
472 B
Python
16 lines
472 B
Python
from bs4 import BeautifulSoup
|
|
|
|
|
|
class HtmlParser:
|
|
def __init__(self, html_content: str):
|
|
self.html_content = html_content
|
|
|
|
def get_img_url_list(self):
|
|
soup = BeautifulSoup(self.html_content, "html.parser")
|
|
img_tags = soup.find("div", class_="reading-content").find_all("img")
|
|
img_urls = []
|
|
for img_tag in img_tags:
|
|
img_url = img_tag.attrs["data-src"]
|
|
img_urls.append(img_url)
|
|
return img_urls
|