! [ -e /content ] && pip install -Uqq fastai # 在Colab上升级fastai视觉工具
from __future__ import annotations
import uuid
from fastai.torch_basics import *
from fastai.data.all import *
from fastai.vision.core import *
from fastdownload import download_url
from pathlib import Pathfrom nbdev.showdoc import *一些实用工具函数,用于快速下载一批图片,检查它们并进行预处理调整大小
def _get_downloaded_image_filename(dest, name, suffix):
start_index = 1
candidate_name = name
while (dest/f"{candidate_name}{suffix}").is_file():
candidate_name = f"{candidate_name}{start_index}"
start_index += 1
return candidate_namedef _download_image_inner(dest, inp, timeout=4, preserve_filename=False):
i,url = inp
url = url.split("?")[0]
url_path = Path(url)
suffix = url_path.suffix if url_path.suffix else '.jpg'
name = _get_downloaded_image_filename(dest, url_path.stem, suffix) if preserve_filename else str(uuid.uuid4())
try: download_url(url, dest/f"{name}{suffix}", show_progress=False, timeout=timeout)
except Exception as e: f"Couldn't download {url}."with tempfile.TemporaryDirectory() as d:
d = Path(d)
url = "https://www.fast.ai/images/jh-head.jpg"
_download_image_inner(d, (125,url))
test_eq(len(d.ls()), 1)
with tempfile.TemporaryDirectory() as d:
d = Path(d)
url = "https://www.fast.ai/images/jh-head.jpg"
_download_image_inner(d, (125,url), preserve_filename=True)
assert (d/'jh-head.jpg').is_file()
assert not (d/'jh-head.jpg1').exists()
_download_image_inner(d, (125,url), preserve_filename=True)
assert (d/'jh-head.jpg').is_file()
assert (d/'jh-head1.jpg').is_file()def download_images(dest, url_file=None, urls=None, max_pics=1000, n_workers=8, timeout=4, preserve_filename=False):
"Download images listed in text file `url_file` to path `dest`, at most `max_pics`"
if urls is None: urls = url_file.read_text().strip().split("\n")[:max_pics]
dest = Path(dest)
dest.mkdir(exist_ok=True)
parallel(partial(_download_image_inner, dest, timeout=timeout, preserve_filename=preserve_filename),
list(enumerate(urls)), n_workers=n_workers, threadpool=True)with tempfile.TemporaryDirectory() as d:
d = Path(d)
url_file = d/'urls.txt'
url_file.write_text("\n".join([f"https://www.fast.ai/images/{n}" for n in "jh-head.jpg headshot-small.jpg".split()]))
download_images(d, url_file, preserve_filename=True)
assert (d/'jh-head.jpg').is_file()
assert (d/'headshot-small.jpg').is_file()
assert not (d/'jh-head1.jpg').exists()def resize_to(img, targ_sz, use_min=False):
"Size to resize to, to hit `targ_sz` at same aspect ratio, in PIL coords (i.e w*h)"
w,h = img.size
min_sz = (min if use_min else max)(w,h)
ratio = targ_sz/min_sz
return int(w*ratio),int(h*ratio)class _FakeImg():
def __init__(self, size): self.size=size
img = _FakeImg((200,500))
test_eq(resize_to(img, 400), [160,400])
test_eq(resize_to(img, 400, use_min=True), [400,1000])def verify_image(fn):
"Confirm that `fn` can be opened"
try:
im = Image.open(fn)
im.draft(im.mode, (32,32))
im.load()
return True
except: return Falsedef verify_images(fns):
"Find images in `fns` that can't be opened"
return L(fns[i] for i,o in enumerate(parallel(verify_image, fns)) if not o)def resize_image(file, dest, src='.', max_size=None, n_channels=3, ext=None,
img_format=None, resample=BILINEAR, resume=False, **kwargs ):
"Resize file to dest to max_size"
dest = Path(dest)
dest_fname = dest/file
dest_fname.parent.mkdir(exist_ok=True, parents=True)
file = Path(src)/file
if resume and dest_fname.exists(): return
if not verify_image(file): return
img = Image.open(file)
imgarr = np.array(img)
img_channels = 1 if len(imgarr.shape) == 2 else imgarr.shape[2]
if ext is not None: dest_fname=dest_fname.with_suffix(ext)
if (max_size is not None and (img.height > max_size or img.width > max_size)) or img_channels != n_channels:
if max_size is not None:
new_sz = resize_to(img, max_size)
img = img.resize(new_sz, resample=resample)
if n_channels == 3: img = img.convert("RGB")
img.save(dest_fname, img_format, **kwargs)
elif file != dest_fname : shutil.copy2(file, dest_fname)file = 'puppy.jpg'
dest = Path('.')
resize_image(file, dest, src='images', max_size=400)
im = Image.open(dest/file)
test_eq(im.shape[1],400)
(dest/file).unlink()file = 'puppy.jpg'
dest = Path('images')
resize_image(file, dest, src=dest, max_size=None)def resize_images(path, max_workers=defaults.cpus, max_size=None, recurse=False,
dest=Path('.'), n_channels=3, ext=None, img_format=None, resample=BILINEAR,
resume=None, **kwargs):
"Resize files on path recursively to dest to max_size"
path = Path(path)
if resume is None and dest != Path('.'): resume=False
os.makedirs(dest, exist_ok=True)
files = get_image_files(path, recurse=recurse)
files = [o.relative_to(path) for o in files]
parallel(resize_image, files, src=path, n_workers=max_workers, max_size=max_size, dest=dest, n_channels=n_channels, ext=ext,
img_format=img_format, resample=resample, resume=resume, **kwargs)with tempfile.TemporaryDirectory() as d:
dest = Path(d)/'resized_images'
resize_images('images', max_size=100, dest=dest, max_workers=0, recurse=True)导出 -
from nbdev import nbdev_export
nbdev_export()