From bec35a50385e79dd6565badb11682d63a3c23c5c Mon Sep 17 00:00:00 2001 From: vincent Date: Sun, 20 Aug 2023 14:39:19 +0800 Subject: [PATCH] =?UTF-8?q?=E8=BF=81=E7=A7=BB=E7=B1=BB=E5=88=B0=E5=88=AB?= =?UTF-8?q?=E7=9A=84=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 4 ++-- requester.py => web.py | 15 +++++++++++++++ web_img.py | 2 +- web_parser.py | 15 --------------- 4 files changed, 18 insertions(+), 18 deletions(-) rename requester.py => web.py (85%) delete mode 100644 web_parser.py diff --git a/main.py b/main.py index faffbb5..184207d 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,6 @@ from concurrent.futures import ThreadPoolExecutor -from requester import Requester -from web_parser import HtmlParser +from web import Requester +from web import HtmlParser from web_img import ImgManager diff --git a/requester.py b/web.py similarity index 85% rename from requester.py rename to web.py index 439b6ca..b5bbdfc 100644 --- a/requester.py +++ b/web.py @@ -1,4 +1,19 @@ import requests +from bs4 import BeautifulSoup + + +class HtmlParser: + def __init__(self, html_content: str): + self.html_content = html_content + + def get_img_url_list(self): + soup = BeautifulSoup(self.html_content, "html.parser") + img_tags = soup.find("div", class_="reading-content").find_all("img") + img_urls = [] + for img_tag in img_tags: + img_url = img_tag.attrs["data-src"] + img_urls.append(img_url) + return img_urls class Requester: diff --git a/web_img.py b/web_img.py index 2981383..d9a0895 100644 --- a/web_img.py +++ b/web_img.py @@ -1,6 +1,6 @@ from PIL import Image import io -from requester import Requester +from web import Requester from concurrent.futures import ThreadPoolExecutor diff --git a/web_parser.py b/web_parser.py deleted file mode 100644 index 3169b9b..0000000 --- a/web_parser.py +++ /dev/null @@ -1,15 +0,0 @@ -from bs4 import BeautifulSoup - - -class HtmlParser: - def __init__(self, html_content: str): - self.html_content = html_content - - def get_img_url_list(self): - soup = BeautifulSoup(self.html_content, "html.parser") - img_tags = soup.find("div", class_="reading-content").find_all("img") - img_urls = [] - for img_tag in img_tags: - img_url = img_tag.attrs["data-src"] - img_urls.append(img_url) - return img_urls