123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532 |
- ################################################################################
- # Copyright (c) 2021 ContinualAI. #
- # Copyrights licensed under the MIT License. #
- # See the accompanying LICENSE file for terms. #
- # #
- # Date: 21-06-2020 #
- # Author(s): Lorenzo Pellegrini, Vincenzo Lomonaco #
- # E-mail: contact@continualai.org #
- # Website: continualai.org #
- ################################################################################
- """ This module contains useful utility functions and classes to generate
- pytorch datasets based on filelists (Caffe style) """
- from pathlib import Path
- from typing import Tuple, Sequence, Optional
- import torch.utils.data as data
- from PIL import Image
- import os
- import os.path
- from torch import Tensor
- from torchvision.transforms.functional import crop
- from avalanche.benchmarks.utils import AvalancheDataset
- def default_image_loader(path):
- """
- Sets the default image loader for the Pytorch Dataset.
- :param path: relative or absolute path of the file to load.
- :returns: Returns the image as a RGB PIL image.
- """
- return Image.open(path).convert('RGB')
- def default_flist_reader(flist):
- """
- This reader reads a filelist and return a list of paths.
- :param flist: path of the flislist to read. The flist format should be:
- impath label, impath label, ...(same to caffe's filelist)
- :returns: Returns a list of paths (the examples to be loaded).
- """
- imlist = []
- with open(flist, 'r') as rf:
- for line in rf.readlines():
- impath, imlabel = line.strip().split()
- imlist.append((impath, int(imlabel)))
- return imlist
- class PathsDataset(data.Dataset):
- """
- This class extends the basic Pytorch Dataset class to handle list of paths
- as the main data source.
- """
- def __init__(
- self, root, files, transform=None, target_transform=None,
- loader=default_image_loader):
- """
- Creates a File Dataset from a list of files and labels.
- :param root: root path where the data to load are stored. May be None.
- :param files: list of tuples. Each tuple must contain two elements: the
- full path to the pattern and its class label. Optionally, the tuple
- may contain a third element describing the bounding box to use for
- cropping (top, left, height, width).
- :param transform: eventual transformation to add to the input data (x)
- :param target_transform: eventual transformation to add to the targets
- (y)
- :param loader: loader function to use (for the real data) given path.
- """
- if root is not None:
- root = Path(root)
- self.root: Optional[Path] = root
- self.imgs = files
- self.targets = [img_data[1] for img_data in self.imgs]
- self.paths = [img_data[0] for img_data in self.imgs]
- self.transform = transform
- self.target_transform = target_transform
- self.loader = loader
- def __getitem__(self, index):
- """
- Returns next element in the dataset given the current index.
- :param index: index of the data to get.
- :return: loaded item.
- """
- img_description = self.imgs[index]
- impath = img_description[0]
- target = img_description[1]
- bbox = None
- if len(img_description) > 2:
- bbox = img_description[2]
- if self.root is not None:
- impath = self.root / impath
- success = False
- while not success:
- try:
- img = Image.open(impath).convert('RGB')
- success = True
- except:
- print('image could not be loaded!')
- print(impath)
- continue
-
- # If a bounding box is provided, crop the image before passing it to
- # any user-defined transformation.
- if bbox is not None:
- if isinstance(bbox, Tensor):
- bbox = bbox.tolist()
- img = crop(img, *bbox)
- if self.transform is not None:
- img = self.transform(img)
- if self.target_transform is not None:
- target = self.target_transform(target)
- return img, target
- def __len__(self):
- """
- Returns the total number of elements in the dataset.
- :return: Total number of dataset items.
- """
- return len(self.imgs)
- class SeqPathsDataset(data.Dataset):
- """
- This class extends the basic Pytorch Dataset class to handle list of (path, target, seq_id) tupel
- as the main data source.
- """
- def __init__(
- self, root, files, transform=None, target_transform=None,
- loader=default_image_loader):
- """
- Creates a File Dataset from a list of files and labels.
- :param root: root path where the data to load are stored. May be None.
- :param files: list of tuples. Each tuple must contain two elements: the
- full path to the pattern and its class label. Optionally, the tuple
- may contain a third element describing the bounding box to use for
- cropping (top, left, height, width).
- :param transform: eventual transformation to add to the input data (x)
- :param target_transform: eventual transformation to add to the targets
- (y)
- :param loader: loader function to use (for the real data) given path.
- """
- if root is not None:
- root = Path(root)
- self.root= root
- self.imgs = files
- self.targets = [img_data[1] for img_data in self.imgs]
- self.paths = [img_data[0] for img_data in self.imgs]
- self.transform = transform
- self.target_transform = target_transform
- self.loader = loader
- def __getitem__(self, index):
- """
- Returns next element in the dataset given the current index.
- :param index: index of the data to get.
- :return: loaded item.
- """
-
- img_description = self.imgs[index]
- impath = img_description[0]
- target = img_description[1]
- seq_code = img_description[2]
- if self.root is not None:
- impath = self.root / impath
- success = False
- while not success:
- try:
- img = Image.open(impath).convert('RGB')
- success = True
- except:
- print('image could not be loaded!')
- print(impath)
- continue
-
- if self.transform is not None:
- img = self.transform(img)
- if self.target_transform is not None:
- target = self.target_transform(target)
-
- return img, target, str(impath), seq_code
- def __len__(self):
- """
- Returns the total number of elements in the dataset.
- :return: Total number of dataset items.
- """
- return len(self.imgs)
- class FilelistDataset(PathsDataset):
- """
- This class extends the basic Pytorch Dataset class to handle filelists as
- main data source.
- """
- def __init__(
- self, root, flist, transform=None, target_transform=None,
- flist_reader=default_flist_reader, loader=default_image_loader):
- """
- This reader reads a filelist and return a list of paths.
- :param root: root path where the data to load are stored. May be None.
- :param flist: path of the flislist to read. The flist format should be:
- impath label\nimpath label\n ...(same to caffe's filelist).
- :param transform: eventual transformation to add to the input data (x).
- :param target_transform: eventual transformation to add to the targets
- (y).
- :param flist_reader: loader function to use (for the filelists) given
- path.
- :param loader: loader function to use (for the real data) given path.
- """
- flist = str(flist) # Manages Path objects
- files_and_labels = flist_reader(flist)
- super().__init__(root, files_and_labels, transform=transform,
- target_transform=target_transform, loader=loader)
- def datasets_from_filelists(root, train_filelists, test_filelists,
- complete_test_set_only=False,
- train_transform=None, train_target_transform=None,
- test_transform=None, test_target_transform=None):
- """
- This reader reads a list of Caffe-style filelists and returns the proper
- Dataset objects.
- A Caffe-style list is just a text file where, for each line, two elements
- are described: the path to the pattern (relative to the root parameter)
- and its class label. Those two elements are separated by a single white
- space.
- This method reads each file list and returns a separate
- dataset for each of them.
- Beware that the parameters must be **list of paths to Caffe-style
- filelists**. If you need to create a dataset given a list of
- **pattern paths**, use `datasets_from_paths` instead.
- :param root: root path where the data to load are stored. May be None.
- :param train_filelists: list of paths to train filelists. The flist format
- should be: impath label\\nimpath label\\n ...(same to Caffe's filelist).
- :param test_filelists: list of paths to test filelists. It can be also a
- single path when the datasets is the same for each batch.
- :param complete_test_set_only: if True, test_filelists must contain
- the path to a single filelist that will serve as the complete test set.
- Alternatively, test_filelists can be the path (str) to the complete test
- set filelist. If False, train_filelists and test_filelists must contain
- the same amount of filelists paths. Defaults to False.
- :param train_transform: The transformation to apply to training patterns.
- Defaults to None.
- :param train_target_transform: The transformation to apply to training
- patterns targets. Defaults to None.
- :param test_transform: The transformation to apply to test patterns.
- Defaults to None.
- :param test_target_transform: The transformation to apply to test
- patterns targets. Defaults to None.
- :return: list of tuples (train dataset, test dataset) for each train
- filelist in the list.
- """
- if complete_test_set_only:
- if not (isinstance(test_filelists, str) or
- isinstance(test_filelists, Path)):
- if len(test_filelists) > 1:
- raise ValueError(
- 'When complete_test_set_only is True, test_filelists must '
- 'be a str, Path or a list with a single element describing '
- 'the path to the complete test set.')
- else:
- test_filelists = test_filelists[0]
- else:
- test_filelists = [test_filelists]
- else:
- if len(test_filelists) != len(train_filelists):
- raise ValueError(
- 'When complete_test_set_only is False, test_filelists and '
- 'train_filelists must contain the same number of elements.')
- transform_groups = dict(train=(train_transform, train_target_transform),
- eval=(test_transform, test_target_transform))
- train_inc_datasets = \
- [AvalancheDataset(FilelistDataset(root, tr_flist),
- transform_groups=transform_groups,
- initial_transform_group='train')
- for tr_flist in train_filelists]
- test_inc_datasets = \
- [AvalancheDataset(FilelistDataset(root, te_flist),
- transform_groups=transform_groups,
- initial_transform_group='eval')
- for te_flist in test_filelists]
- return train_inc_datasets, test_inc_datasets
- def datasets_from_paths(
- train_list, test_list, complete_test_set_only=False,
- train_transform=None, train_target_transform=None,
- test_transform=None, test_target_transform=None):
- """
- This utility takes, for each dataset to generate, a list of tuples each
- containing two elements: the full path to the pattern and its class label.
- Optionally, the tuple may contain a third element describing the bounding
- box to use for cropping.
- This is equivalent to `datasets_from_filelists`, which description
- contains more details on the behaviour of this utility. The two utilities
- differ in which `datasets_from_filelists` accepts paths to Caffe-style
- filelists while this one is able to create the datasets from an in-memory
- list.
- Note: this utility may try to detect (and strip) the common root path of
- all patterns in order to save some RAM memory.
- :param train_list: list of lists. Each list must contain tuples of two
- elements: the full path to the pattern and its class label. Optionally,
- the tuple may contain a third element describing the bounding box to use
- for cropping (top, left, height, width).
- :param test_list: list of lists. Each list must contain tuples of two
- elements: the full path to the pattern and its class label. Optionally,
- the tuple may contain a third element describing the bounding box to use
- for cropping (top, left, height, width). It can be also a single list
- when the test dataset is the same for each experience.
- :param complete_test_set_only: if True, test_list must contain a single list
- that will serve as the complete test set. If False, train_list and
- test_list must describe the same amount of datasets. Defaults to False.
- :param train_transform: The transformation to apply to training patterns.
- Defaults to None.
- :param train_target_transform: The transformation to apply to training
- patterns targets. Defaults to None.
- :param test_transform: The transformation to apply to test patterns.
- Defaults to None.
- :param test_target_transform: The transformation to apply to test
- patterns targets. Defaults to None.
- :return: A list of tuples (train dataset, test dataset).
- """
- if complete_test_set_only:
- # Check if the single dataset was passed as [Tuple1, Tuple2, ...]
- # or as [[Tuple1, Tuple2, ...]]
- if not isinstance(test_list[0], Tuple):
- if len(test_list) > 1:
- raise ValueError(
- 'When complete_test_set_only is True, test_list must '
- 'be a single list of tuples or a nested list containing '
- 'a single lis of tuples')
- else:
- test_list = test_list[0]
- else:
- test_list = [test_list]
- else:
- if len(test_list) != len(train_list):
- raise ValueError(
- 'When complete_test_set_only is False, test_list and '
- 'train_list must contain the same number of elements.')
- transform_groups = dict(train=(train_transform, train_target_transform),
- eval=(test_transform, test_target_transform))
- common_root = None
- # Detect common root
- try:
- all_paths = [pattern_tuple[0] for exp_list in train_list
- for pattern_tuple in exp_list] + \
- [pattern_tuple[0] for exp_list in test_list
- for pattern_tuple in exp_list]
- common_root = os.path.commonpath(all_paths)
- except ValueError:
- # commonpath may throw a ValueError in different situations!
- # See the official documentation for more details
- pass
- if common_root is not None and len(common_root) > 0 and \
- common_root != '/':
- has_common_root = True
- common_root = str(common_root)
- else:
- has_common_root = False
- common_root = None
- if has_common_root:
- # print(f'Common root found: {common_root}!')
- # All paths have a common filesystem root
- # Remove it from all paths!
- single_path_case = False
- tr_list = list()
- te_list = list()
- for idx_exp_list in range(len(train_list)):
- if single_path_case:
- break
- st_list = list()
- for x in train_list[idx_exp_list]:
- rel = os.path.relpath(x[0], common_root)
- if len(rel) == 0 or rel == '.':
- # May happen if the dataset has a single path
- single_path_case = True
- break
- st_list.append((rel, *x[1:]))
- tr_list.append(st_list)
- for idx_exp_list in range(len(test_list)):
- if single_path_case:
- break
- st_list = list()
- for x in test_list[idx_exp_list]:
- rel = os.path.relpath(x[0], common_root)
- if len(rel) == 0 or rel == '.':
- # May happen if the dataset has a single path
- single_path_case = True
- break
- st_list.append((rel, *x[1:]))
- te_list.append(st_list)
- if not single_path_case:
- train_list = tr_list
- test_list = te_list
- else:
- has_common_root = False
- common_root = None
- train_inc_datasets = \
- [AvalancheDataset(PathsDataset(common_root, tr_flist),
- transform_groups=transform_groups,
- initial_transform_group='train')
- for tr_flist in train_list]
- test_inc_datasets = \
- [AvalancheDataset(PathsDataset(common_root, te_flist),
- transform_groups=transform_groups,
- initial_transform_group='eval')
- for te_flist in test_list]
- return train_inc_datasets, test_inc_datasets
- def common_paths_root(exp_list):
- common_root = None
- # Detect common root
- try:
- all_paths = [pattern_tuple[0] for pattern_tuple in exp_list]
- common_root = os.path.commonpath(all_paths)
- except ValueError:
- # commonpath may throw a ValueError in different situations!
- # See the official documentation for more details
- pass
- if common_root is not None and len(common_root) > 0 and \
- common_root != '/':
- has_common_root = True
- common_root = str(common_root)
- else:
- has_common_root = False
- common_root = None
- if has_common_root:
- # print(f'Common root found: {common_root}!')
- # All paths have a common filesystem root
- # Remove it from all paths!
- single_path_case = False
- exp_tuples = list()
- for x in exp_list:
- if single_path_case:
- break
- rel = os.path.relpath(x[0], common_root)
- if len(rel) == 0 or rel == '.':
- # May happen if the dataset has a single path
- single_path_case = True
- break
- exp_tuples.append((rel, *x[1:]))
- if not single_path_case:
- exp_list = exp_tuples
- else:
- common_root = None
- return common_root, exp_list
- __all__ = [
- 'default_image_loader',
- 'default_flist_reader',
- 'PathsDataset',
- 'SeqPathsDataset',
- 'FilelistDataset',
- 'datasets_from_filelists',
- 'datasets_from_paths',
- 'common_paths_root'
- ]
|