from fastai.data.external import *
#TODO: investigate
im = Image.open(TEST_IMAGE).resize((30,20))
test_eq(im.n_px, 30*20)
test_eq(im.shape, (20,30))
test_eq(im.aspect, 30/20)
test_eq(im.reshape(12,10).shape, (12,10))
test_eq(im.resize_max(max_px=20*30).shape, (20,30))
test_eq(im.resize_max(max_px=300).n_px, 294)
test_eq(im.resize_max(max_px=500, max_h=10, max_w=20).shape, (10,15))
test_eq(im.resize_max(max_h=14, max_w=15).shape, (10,15))
test_eq(im.resize_max(max_px=300, max_h=10, max_w=25).shape, (10,15))
This section regroups the basic types used in vision with the transform that create objects of those types.
im = PILImage.create(TEST_IMAGE)
test_eq(type(im), PILImage)
test_eq(im.mode, 'RGB')
test_eq(str(im), 'PILImage mode=RGB size=1200x803')
im.resize((64,64))
ax = im.show(figsize=(1,1))
test_fig_exists(ax)
timg = TensorImage(image2tensor(im))
tpil = PILImage.create(timg)
tpil.resize((64,64))
im = PILMask.create(TEST_IMAGE)
test_eq(type(im), PILMask)
test_eq(im.mode, 'L')
test_eq(str(im), 'PILMask mode=L size=1200x803')
mnist = untar_data(URLs.MNIST_TINY)
fns = get_image_files(mnist)
mnist_fn = TEST_IMAGE_BW
timg = Transform(PILImageBW.create)
mnist_img = timg(mnist_fn)
test_eq(mnist_img.size, (28,28))
assert isinstance(mnist_img, PILImageBW)
mnist_img
camvid = untar_data(URLs.CAMVID_TINY)
fns = get_image_files(camvid/'images')
cam_fn = fns[0]
mask_fn = camvid/'labels'/f'{cam_fn.stem}_P{cam_fn.suffix}'
cam_img = PILImage.create(cam_fn)
test_eq(cam_img.size, (128,96))
tmask = Transform(PILMask.create)
mask = tmask(mask_fn)
test_eq(type(mask), PILMask)
test_eq(mask.size, (128,96))
_,axs = plt.subplots(1,3, figsize=(12,3))
cam_img.show(ctx=axs[0], title='image')
mask.show(alpha=1, ctx=axs[1], vmin=1, vmax=30, title='mask')
cam_img.show(ctx=axs[2], title='superimposed')
mask.show(ctx=axs[2], vmin=1, vmax=30);
Points are expected to come as an array/tensor of shape (n,2)
or as a list of lists with two elements. Unless you change the defaults in PointScaler
(see later on), coordinates should go from 0 to width/height, with the first one being the column index (so from 0 to width) and the second one being the row index (so from 0 to height).
F.grid_sample
.pnt_img = TensorImage(mnist_img.resize((28,35)))
pnts = np.array([[0,0], [0,35], [28,0], [28,35], [9, 17]])
tfm = Transform(TensorPoint.create)
tpnts = tfm(pnts)
test_eq(tpnts.shape, [5,2])
test_eq(tpnts.dtype, torch.float32)
ctx = pnt_img.show(figsize=(1,1), cmap='Greys')
tpnts.show(ctx=ctx);
Bounding boxes are expected to come as tuple with an array/tensor of shape (n,4)
or as a list of lists with four elements and a list of corresponding labels. Unless you change the defaults in PointScaler
(see later on), coordinates for each bounding box should go from 0 to width/height, with the following convention: x1, y1, x2, y2 where (x1,y1) is your top-left corner and (x2,y2) is your bottom-right corner.
coco = untar_data(URLs.COCO_TINY)
images, lbl_bbox = get_annotations(coco/'train.json')
idx=2
coco_fn,bbox = coco/'train'/images[idx],lbl_bbox[idx]
coco_img = timg(coco_fn)
tbbox = LabeledBBox(TensorBBox(bbox[0]), bbox[1])
ctx = coco_img.show(figsize=(3,3), cmap='Greys')
tbbox.show(ctx=ctx);
Unless specifically mentioned, all the following transforms can be used as single-item transforms (in one of the list in the tfms
you pass to a TfmdDS
or a Datasource
) or tuple transforms (in the tuple_tfms
you pass to a TfmdDS
or a Datasource
). The safest way that will work across applications is to always use them as tuple_tfms
. For instance, if you have points or bounding boxes as targets and use Resize
as a single-item transform, when you get to PointScaler
(which is a tuple transform) you won't have the correct size of the image to properly scale your points.
Any data augmentation transform that runs on PIL Images must be run before this transform.
tfm = ToTensor()
print(tfm)
print(type(mnist_img))
print(type(tfm(mnist_img)))
tfm = ToTensor()
test_eq(tfm(mnist_img).shape, (1,28,28))
test_eq(type(tfm(mnist_img)), TensorImageBW)
test_eq(tfm(mask).shape, (96,128))
test_eq(type(tfm(mask)), TensorMask)
Let's confirm we can pipeline this with PILImage.create
.
pipe_img = Pipeline([PILImageBW.create, ToTensor()])
img = pipe_img(mnist_fn)
test_eq(type(img), TensorImageBW)
pipe_img.show(img, figsize=(1,1));
def _cam_lbl(x): return mask_fn
cam_tds = Datasets([cam_fn], [[PILImage.create, ToTensor()], [_cam_lbl, PILMask.create, ToTensor()]])
show_at(cam_tds, 0);
To work with data augmentation, and in particular the grid_sample
method, points need to be represented with coordinates going from -1 to 1 (-1 being top or left, 1 bottom or right), which will be done unless you pass do_scale=False
. We also need to make sure they are following our convention of points being x,y coordinates, so pass along y_first=True
if you have your data in an y,x format to add a flip.
To work with data augmentation, and in particular the grid_sample
method, points need to be represented with coordinates going from -1 to 1 (-1 being top or left, 1 bottom or right), which will be done unless you pass do_scale=False
. We also need to make sure they are following our convention of points being x,y coordinates, so pass along y_first=True
if you have your data in an y,x format to add a flip.
TensorPoint
object and embeds it in them. For this to work, those images need to be before any points in the order of your final tuple. If you don’t have such images, you need to embed the size of the corresponding image when creating a TensorPoint
by passing it with sz=...
.def _pnt_lbl(x): return TensorPoint.create(pnts)
def _pnt_open(fn): return PILImage(PILImage.create(fn).resize((28,35)))
pnt_tds = Datasets([mnist_fn], [_pnt_open, [_pnt_lbl]])
pnt_tdl = TfmdDL(pnt_tds, bs=1, after_item=[PointScaler(), ToTensor()])
test_eq(pnt_tdl.after_item.c, 10)
x,y = pnt_tdl.one_batch()
#Scaling and flipping properly done
#NB: we added a point earlier at (9,17); formula below scales to (-1,1) coords
test_close(y[0], tensor([[-1., -1.], [-1., 1.], [1., -1.], [1., 1.], [9/14-1, 17/17.5-1]]))
a,b = pnt_tdl.decode_batch((x,y))[0]
test_eq(b, tensor(pnts).float())
#Check types
test_eq(type(x), TensorImage)
test_eq(type(y), TensorPoint)
test_eq(type(a), TensorImage)
test_eq(type(b), TensorPoint)
test_eq(b.get_meta('img_size'), (28,35)) #Automatically picked the size of the input
pnt_tdl.show_batch(figsize=(2,2), cmap='Greys');
def _coco_bb(x): return TensorBBox.create(bbox[0])
def _coco_lbl(x): return bbox[1]
coco_tds = Datasets([coco_fn], [PILImage.create, [_coco_bb], [_coco_lbl, MultiCategorize(add_na=True)]], n_inp=1)
coco_tdl = TfmdDL(coco_tds, bs=1, after_item=[BBoxLabeler(), PointScaler(), ToTensor()])
Categorize(add_na=True)
coco_tds.tfms
x,y,z
x,y,z = coco_tdl.one_batch()
test_close(y[0], -1+tensor(bbox[0])/64)
test_eq(z[0], tensor([1,1,1]))
a,b,c = coco_tdl.decode_batch((x,y,z))[0]
test_close(b, tensor(bbox[0]).float())
test_eq(c.bbox, b)
test_eq(c.lbl, bbox[1])
#Check types
test_eq(type(x), TensorImage)
test_eq(type(y), TensorBBox)
test_eq(type(z), TensorMultiCategory)
test_eq(type(a), TensorImage)
test_eq(type(b), TensorBBox)
test_eq(type(c), LabeledBBox)
test_eq(y.get_meta('img_size'), (128,128))
coco_tdl.show_batch();