update,补充注释
This commit is contained in:
parent
1653bfec9a
commit
f2bc3a221a
65
main.py
65
main.py
@ -22,6 +22,33 @@ def get_html(url, file_name, max_retries=3):
|
|||||||
|
|
||||||
|
|
||||||
def download_image(img_url, max_retries=5):
|
def download_image(img_url, max_retries=5):
|
||||||
|
"""
|
||||||
|
通过给定的图片URL下载图片内容。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
img_url (str): 图片的URL地址。
|
||||||
|
max_retries (int, 可选): 下载失败时的最大重试次数。默认为5次。
|
||||||
|
|
||||||
|
返回值:
|
||||||
|
bytes or None: 成功下载图片的二进制数据,若下载失败则返回None。
|
||||||
|
|
||||||
|
注解:
|
||||||
|
这个函数通过发送HTTP请求下载图片文件。它使用`requests`库来获取URL返回的响应。
|
||||||
|
如果下载成功,函数将返回图片的二进制内容(bytes格式)。
|
||||||
|
如果下载失败,函数将尝试最多`max_retries`次重试,直到成功或达到重试次数上限。
|
||||||
|
在每次重试之间,函数会打印错误消息来指示重试进度。
|
||||||
|
如果重试次数用尽后仍然无法下载图片,函数将输出失败消息并返回None。
|
||||||
|
|
||||||
|
例子:
|
||||||
|
```
|
||||||
|
image_url = "https://example.com/image.jpg"
|
||||||
|
image_data = download_image(image_url)
|
||||||
|
if image_data:
|
||||||
|
# 处理图片数据...
|
||||||
|
else:
|
||||||
|
print("无法下载图片,下载失败。")
|
||||||
|
```
|
||||||
|
"""
|
||||||
for retry in range(max_retries):
|
for retry in range(max_retries):
|
||||||
try:
|
try:
|
||||||
with requests.get(img_url, stream=True) as response:
|
with requests.get(img_url, stream=True) as response:
|
||||||
@ -65,6 +92,7 @@ def set_img_obj_data(img_obj):
|
|||||||
data = download_image(url)
|
data = download_image(url)
|
||||||
if data is None:
|
if data is None:
|
||||||
file_name = img_obj["file_name"]
|
file_name = img_obj["file_name"]
|
||||||
|
print(f"{file_name}, 下载图片失败")
|
||||||
raise Exception(f"{file_name}, 下载图片失败")
|
raise Exception(f"{file_name}, 下载图片失败")
|
||||||
img_obj["data"] = data
|
img_obj["data"] = data
|
||||||
|
|
||||||
@ -116,7 +144,10 @@ def concatenate_images_vertically(img_obj_list):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def process_batch(lines):
|
def pre_batch_task(lines):
|
||||||
|
"""
|
||||||
|
批次任务
|
||||||
|
"""
|
||||||
for line in lines:
|
for line in lines:
|
||||||
line = line.strip() # 去掉每行开头和结尾的空白字符
|
line = line.strip() # 去掉每行开头和结尾的空白字符
|
||||||
if line:
|
if line:
|
||||||
@ -130,18 +161,42 @@ def process_batch(lines):
|
|||||||
long_image.save(f"imgs/{file_name}.png") # 保存长图到本地
|
long_image.save(f"imgs/{file_name}.png") # 保存长图到本地
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def read_lines_from_file(task_file):
|
||||||
task_file = "input.txt"
|
"""
|
||||||
batch_size = 3 # 每个线程处理的行数
|
从文件中读取所有行并返回一个包含行的列表。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
file_name (str): 要读取的文件名。
|
||||||
|
|
||||||
|
返回值:
|
||||||
|
lines (list): 包含文件中所有行的列表。
|
||||||
|
"""
|
||||||
with open(task_file, "r", encoding="utf-8") as file:
|
with open(task_file, "r", encoding="utf-8") as file:
|
||||||
lines = file.readlines()
|
lines = file.readlines()
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def process_lines_in_batches(lines, batch_size):
|
||||||
|
"""
|
||||||
|
将行数据按照指定的批次大小,利用线程池并行处理。
|
||||||
|
|
||||||
|
参数:
|
||||||
|
lines (list): 包含所有行的列表。
|
||||||
|
batch_size (int): 每个批次处理的行数。
|
||||||
|
|
||||||
|
"""
|
||||||
# 使用 ThreadPoolExecutor 创建线程池
|
# 使用 ThreadPoolExecutor 创建线程池
|
||||||
with ThreadPoolExecutor() as executor:
|
with ThreadPoolExecutor() as executor:
|
||||||
# 按照 batch_size 将行分批次处理
|
# 按照 batch_size 将行分批次处理
|
||||||
for i in range(0, len(lines), batch_size):
|
for i in range(0, len(lines), batch_size):
|
||||||
batch_lines = lines[i : i + batch_size]
|
batch_lines = lines[i : i + batch_size]
|
||||||
executor.submit(process_batch, batch_lines)
|
executor.submit(pre_batch_task, batch_lines)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
task_file = "input.txt"
|
||||||
|
batch_size = 3 # 每个线程处理的行数
|
||||||
|
lines = read_lines_from_file(task_file)
|
||||||
|
process_lines_in_batches(lines, batch_size)
|
||||||
|
|
||||||
print("finish, 程序结束...")
|
print("finish, 程序结束...")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user