update
This commit is contained in:
parent
dacae684d7
commit
510b4bf0d4
Binary file not shown.
Binary file not shown.
BIN
__pycache__/web_img_manager.cpython-310.pyc
Normal file
BIN
__pycache__/web_img_manager.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
62
img_manager.py
Normal file
62
img_manager.py
Normal file
@ -0,0 +1,62 @@
|
||||
from requester import Requester
|
||||
from web_img import WebImg
|
||||
|
||||
from PIL import Image
|
||||
import io
|
||||
from web_img import WebImg
|
||||
from requester import Requester
|
||||
|
||||
|
||||
class ImgManager:
|
||||
def __init__(self, img_url_list: list[str]):
|
||||
self.img_url_list = img_url_list
|
||||
self.img_list = self.__create_web_img_list()
|
||||
|
||||
def __create_web_img_list(self):
|
||||
img_list = []
|
||||
for url in self.img_url_list:
|
||||
img = WebImg(self.task_name, url)
|
||||
img_list.append(img)
|
||||
return img_list
|
||||
|
||||
def batch_fetch_images(self):
|
||||
requester = Requester()
|
||||
requester.batch_fetch_images_to_img_obj_list(self.img_list)
|
||||
|
||||
def concatenate_images_vertically(self):
|
||||
"""
|
||||
垂直拼接长图片
|
||||
"""
|
||||
try:
|
||||
# 计算拼接后的长图宽度和总高度
|
||||
max_width = max(
|
||||
Image.open(io.BytesIO(web_img.data)).width for web_img in self.img_list
|
||||
)
|
||||
total_height = sum(
|
||||
Image.open(io.BytesIO(web_img.data)).height for web_img in self.img_list
|
||||
)
|
||||
|
||||
# 创建一张新的长图
|
||||
long_image = Image.new(
|
||||
"RGB", (max_width, total_height), color=(255, 255, 255)
|
||||
)
|
||||
|
||||
# 依次将图片在垂直方向上拼接起来
|
||||
y_offset = 0
|
||||
for web_img in self.img_list:
|
||||
img = Image.open(io.BytesIO(web_img.data))
|
||||
img_width, img_height = img.size
|
||||
x_offset = (max_width - img_width) // 2 # 居中拼接
|
||||
long_image.paste(img, (x_offset, y_offset))
|
||||
y_offset += img_height
|
||||
|
||||
return long_image
|
||||
|
||||
except Exception as e:
|
||||
task_name = self.img_list[0].task_name
|
||||
print(f"{task_name}, 拼接图片失败:{e}")
|
||||
return None
|
||||
|
||||
def save_long_image(self):
|
||||
long_image = self.concatenate_images_vertically() # 垂直拼接长图片
|
||||
long_image.save(f"output/{self.img_list[0].task_name}.png") # 保存长图到本地
|
||||
128
main.py
128
main.py
@ -1,118 +1,7 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from PIL import Image
|
||||
import io
|
||||
from web_img import WebImg
|
||||
from requester import Requester
|
||||
from web_parser import WebParser
|
||||
|
||||
|
||||
# def fetch_image(img_url: str, max_retries=5):
|
||||
# """
|
||||
# 通过给定的图片URL下载图片内容。
|
||||
|
||||
# 参数:
|
||||
# img_url (str): 图片的URL地址。
|
||||
# max_retries (int, 可选): 下载失败时的最大重试次数。默认为5次。
|
||||
|
||||
# 返回值:
|
||||
# bytes or None: 成功下载图片的二进制数据,若下载失败则返回None。
|
||||
|
||||
# 注解:
|
||||
# 这个函数通过发送HTTP请求下载图片文件。它使用`requests`库来获取URL返回的响应。
|
||||
# 如果下载成功,函数将返回图片的二进制内容(bytes格式)。
|
||||
# 如果下载失败,函数将尝试最多`max_retries`次重试,直到成功或达到重试次数上限。
|
||||
# 在每次重试之间,函数会打印错误消息来指示重试进度。
|
||||
# 如果重试次数用尽后仍然无法下载图片,函数将输出失败消息并返回None。
|
||||
|
||||
# 例子:
|
||||
# ```
|
||||
# image_url = "https://example.com/image.jpg"
|
||||
# image_data = download_image(image_url)
|
||||
# if image_data:
|
||||
# # 处理图片数据...
|
||||
# else:
|
||||
# print("无法下载图片,下载失败。")
|
||||
# ```
|
||||
# """
|
||||
# for retry in range(max_retries):
|
||||
# try:
|
||||
# with requests.get(img_url, stream=True) as response:
|
||||
# response.raise_for_status()
|
||||
# return response.content
|
||||
# except Exception as e:
|
||||
# if retry < max_retries - 1:
|
||||
# print(
|
||||
# f"Failed to download image, retrying ({retry+1}/{max_retries})..."
|
||||
# )
|
||||
# else:
|
||||
# print("Failed to download image after multiple retries, skipping.")
|
||||
# return None
|
||||
|
||||
|
||||
def create_web_img_list(img_url_list, task_name):
|
||||
img_obj_list = []
|
||||
for url in img_url_list:
|
||||
img = WebImg(task_name, url)
|
||||
img_obj_list.append(img)
|
||||
|
||||
return img_obj_list
|
||||
|
||||
|
||||
# def fetch_images_to_img_obj(web_img: WebImg):
|
||||
# url = web_img.url
|
||||
# data = fetch_image(url)
|
||||
# if data is None:
|
||||
# task_name = web_img.task_name
|
||||
# print(f"{task_name}, 下载图片失败")
|
||||
# raise Exception(f"{task_name}, 下载图片失败")
|
||||
# web_img.data = data
|
||||
|
||||
|
||||
# def batch_fetch_images_to_img_obj_list(web_img_list: list[WebImg]):
|
||||
# """
|
||||
# 使用 ThreadPoolExecutor 创建线程池,对 img_obj_list 中的每个图片对象调用 set_img_obj_data 函数。
|
||||
|
||||
# Args:
|
||||
# img_obj_list (list): 图片对象列表,每个对象包含图片的数据等信息。
|
||||
|
||||
# Returns:
|
||||
# None
|
||||
# """
|
||||
# with ThreadPoolExecutor() as executor:
|
||||
# executor.map(fetch_images_to_img_obj, web_img_list)
|
||||
|
||||
|
||||
def concatenate_images_vertically(web_img_list: list[WebImg]):
|
||||
"""
|
||||
垂直拼接长图片
|
||||
"""
|
||||
try:
|
||||
# 计算拼接后的长图宽度和总高度
|
||||
max_width = max(
|
||||
Image.open(io.BytesIO(web_img.data)).width for web_img in web_img_list
|
||||
)
|
||||
total_height = sum(
|
||||
Image.open(io.BytesIO(web_img.data)).height for web_img in web_img_list
|
||||
)
|
||||
|
||||
# 创建一张新的长图
|
||||
long_image = Image.new("RGB", (max_width, total_height), color=(255, 255, 255))
|
||||
|
||||
# 依次将图片在垂直方向上拼接起来
|
||||
y_offset = 0
|
||||
for web_img in web_img_list:
|
||||
img = Image.open(io.BytesIO(web_img.data))
|
||||
img_width, img_height = img.size
|
||||
x_offset = (max_width - img_width) // 2 # 居中拼接
|
||||
long_image.paste(img, (x_offset, y_offset))
|
||||
y_offset += img_height
|
||||
|
||||
return long_image
|
||||
|
||||
except Exception as e:
|
||||
task_name = web_img_list[0].task_name
|
||||
print(f"{task_name}, 拼接图片失败:{e}")
|
||||
return None
|
||||
from web_parser import Parser
|
||||
from web_img_manager import ImgManager
|
||||
|
||||
|
||||
def pre_batch_task(lines: list[str]):
|
||||
@ -122,15 +11,14 @@ def pre_batch_task(lines: list[str]):
|
||||
for line in lines:
|
||||
line = line.strip() # 去掉每行开头和结尾的空白字符
|
||||
if line:
|
||||
requester = Requester()
|
||||
task_name, _, url = line.partition(" - ") # 解析出 HTML 文件名和 URL 地址
|
||||
print(f"{task_name}, 开始下载")
|
||||
html_content = requester.fetch_html(url, task_name)
|
||||
img_url_list = WebParser(html_content).parse_img_urls()
|
||||
web_img_list = create_web_img_list(img_url_list, task_name)
|
||||
requester.batch_fetch_images_to_img_obj_list(web_img_list)
|
||||
long_image = concatenate_images_vertically(web_img_list) # 垂直拼接长图片
|
||||
long_image.save(f"output/{task_name}.png") # 保存长图到本地
|
||||
html_content = Requester().fetch_html(url, task_name)
|
||||
img_url_list = Parser(html_content).get_img_url_list()
|
||||
|
||||
img_manager = ImgManager(img_url_list, task_name)
|
||||
img_manager.batch_fetch_images()
|
||||
img_manager.save_long_image()
|
||||
print(f"{task_name}, 完成!!")
|
||||
|
||||
|
||||
|
||||
63
web_img_manager.py
Normal file
63
web_img_manager.py
Normal file
@ -0,0 +1,63 @@
|
||||
from requester import Requester
|
||||
from web_img import WebImg
|
||||
|
||||
from PIL import Image
|
||||
import io
|
||||
from web_img import WebImg
|
||||
from requester import Requester
|
||||
|
||||
|
||||
class ImgManager:
|
||||
def __init__(self, img_url_list: list[str], task_name):
|
||||
self.img_url_list = img_url_list
|
||||
self.task_name = task_name
|
||||
self.img_list = self.__create_web_img_list()
|
||||
|
||||
def __create_web_img_list(self):
|
||||
img_list = []
|
||||
for url in self.img_url_list:
|
||||
img = WebImg(self.task_name, url)
|
||||
img_list.append(img)
|
||||
return img_list
|
||||
|
||||
def batch_fetch_images(self):
|
||||
requester = Requester()
|
||||
requester.batch_fetch_images_to_img_obj_list(self.img_list)
|
||||
|
||||
def concatenate_images_vertically(self):
|
||||
"""
|
||||
垂直拼接长图片
|
||||
"""
|
||||
try:
|
||||
# 计算拼接后的长图宽度和总高度
|
||||
max_width = max(
|
||||
Image.open(io.BytesIO(web_img.data)).width for web_img in self.img_list
|
||||
)
|
||||
total_height = sum(
|
||||
Image.open(io.BytesIO(web_img.data)).height for web_img in self.img_list
|
||||
)
|
||||
|
||||
# 创建一张新的长图
|
||||
long_image = Image.new(
|
||||
"RGB", (max_width, total_height), color=(255, 255, 255)
|
||||
)
|
||||
|
||||
# 依次将图片在垂直方向上拼接起来
|
||||
y_offset = 0
|
||||
for web_img in self.img_list:
|
||||
img = Image.open(io.BytesIO(web_img.data))
|
||||
img_width, img_height = img.size
|
||||
x_offset = (max_width - img_width) // 2 # 居中拼接
|
||||
long_image.paste(img, (x_offset, y_offset))
|
||||
y_offset += img_height
|
||||
|
||||
return long_image
|
||||
|
||||
except Exception as e:
|
||||
task_name = self.img_list[0].task_name
|
||||
print(f"{task_name}, 拼接图片失败:{e}")
|
||||
return None
|
||||
|
||||
def save_long_image(self):
|
||||
long_image = self.concatenate_images_vertically() # 垂直拼接长图片
|
||||
long_image.save(f"output/{self.img_list[0].task_name}.png") # 保存长图到本地
|
||||
@ -2,11 +2,11 @@ from bs4 import BeautifulSoup
|
||||
from web_img import WebImg
|
||||
|
||||
|
||||
class WebParser:
|
||||
class Parser:
|
||||
def __init__(self, html_content: str):
|
||||
self.html_content = html_content
|
||||
|
||||
def parse_img_urls(self):
|
||||
def get_img_url_list(self):
|
||||
soup = BeautifulSoup(self.html_content, "html.parser")
|
||||
img_tags = soup.find("div", class_="reading-content").find_all("img")
|
||||
img_urls = []
|
||||
@ -14,5 +14,3 @@ class WebParser:
|
||||
img_url = img_tag.attrs["data-src"]
|
||||
img_urls.append(img_url)
|
||||
return img_urls
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user