Pārlūkot izejas kodu

BOW experiments, autoencoder KDE

Felix Kleinsteuber 3 gadi atpakaļ
vecāks
revīzija
3a72516fae

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 4 - 4
approach3_local_features.ipynb


Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 59 - 45
approach4_autoencoder2.ipynb


+ 0 - 69
eval_autoencoder.py

@@ -1,69 +0,0 @@
-import argparse
-import os
-from tqdm import tqdm
-import numpy as np
-import torch
-from torch import nn
-from torch.autograd import Variable
-from torch.utils.data import DataLoader
-from torchvision.utils import save_image
-from torchinfo import summary
-
-from py.PyTorchData import create_dataloader, model_output_to_image
-from py.Autoencoder2 import Autoencoder
-
-def eval_autoencoder(model: Autoencoder, dataloader: DataLoader, name: str, set_name: str, device: str = "cpu", criterion = nn.MSELoss()):
-    model = model.to(device)
-    print(f"Using {device} device")
-
-    print(f"Saving evaluation results to ./ae_train_NoBackup/{name}/eval")
-    os.makedirs(f"./ae_train_NoBackup/{name}/eval", exist_ok=True)
-
-    labels = []
-    encodeds = []
-    losses = []
-
-
-    for img, labels in tqdm(dataloader):
-        img_batch = Variable(img_batch).to(device)
-        # ===================forward=====================
-        encoded = model.encoder(img)
-        encoded_flat = encoded.detach().numpy().reshape(encoded.size()[0], -1)
-        output_batch = model.decoder(encoded)
-
-        for input, output, label, enc_flat in zip(img, output_batch, labels, encoded_flat):
-            losses.append(criterion(input, output))
-            encodeds.append(enc_flat)
-            labels.append(label)
-    np.save(f"./ae_train_NoBackup/{name}/eval/{set_name}.npy")
-
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Autoencoder eval script")
-    parser.add_argument("name", type=str, help="Name of the training session (name of the save folder)")
-    parser.add_argument("model_name", type=str, help="Filename of the model (e.g. model_120.pth)")
-    parser.add_argument("set_name", type=str, help="Name of the dataset (e.g. train or test)")
-    parser.add_argument("img_folder", type=str, help="Path to directory containing train images (may contain subfolders)")
-    parser.add_argument("--device", type=str, help="PyTorch device to train on (cpu or cuda)", default="cpu")
-    parser.add_argument("--batch_size", type=int, help="Batch size (>=1)", default=32)
-    parser.add_argument("--image_transforms", action="store_true", help="Truncate and resize images (only enable if the input images have not been truncated resized to the target size already)")
-    
-    args = parser.parse_args()
-
-    if args.image_transforms:
-        print("Image transforms enabled: Images will be truncated and resized.")
-    else:
-        print("Image transforms disabled: Images are expected to be of the right size.")
-    
-    dataloader = create_dataloader(args.img_folder, batch_size=args.batch_size, skip_transforms=not args.image_transforms)
-    model = Autoencoder()
-    print("Model:")
-    summary(model, (args.batch_size, 3, 256, 256))
-    print("Is CUDA available:", torch.cuda.is_available())
-    print(f"Devices: ({torch.cuda.device_count()})")
-    for i in range(torch.cuda.device_count()):
-        print(torch.cuda.get_device_name(i))
-    if args.noise:
-        print("Adding Gaussian noise to model input")
-    eval_autoencoder(model, dataloader, args.model_name, args.set_name, args.device)

+ 52 - 0
eval_bow.py

@@ -0,0 +1,52 @@
+import argparse
+import os
+import numpy as np
+from sklearn import svm
+
+from py.Dataset import Dataset
+from py.LocalFeatures import generate_bow_features
+
+def main():
+    parser = argparse.ArgumentParser(description="BOW train script")
+    parser.add_argument("dataset_dir", type=str, help="Directory of the dataset containing all session folders")
+    parser.add_argument("session_name", type=str, help="Name of the session to use for Lapse images (e.g. marten_01)")
+    parser.add_argument("--clusters", type=int, help="Number of clusters / BOW vocabulary size", default=1024)
+
+    args = parser.parse_args()
+
+    ds = Dataset(args.dataset_dir)
+    session = ds.create_session(args.session_name)
+    save_dir = f"./bow_train_NoBackup/{session.name}"
+
+    # Lapse DSIFT descriptors
+
+    dictionary_file = os.path.join(save_dir, f"bow_dict_{args.clusters}.npy")
+    train_feat_file = os.path.join(save_dir, f"bow_train_{args.clusters}.npy")
+    eval_file = os.path.join(save_dir, f"bow_eval_{args.clusters}.csv")
+
+    if not os.path.isfile(dictionary_file):
+        print(f"ERROR: BOW dictionary missing! ({dictionary_file})")
+    elif not os.path.isfile(train_feat_file):
+        print(f"ERROR: Train data file missing! ({train_feat_file})")
+    elif os.path.isfile(eval_file):
+        print(f"ERROR: Eval file already exists! ({eval_file})")
+    else:
+        print(f"Loading dictionary from {dictionary_file}...")
+        dictionary = np.load(dictionary_file)
+        print(f"Loading training data from {train_feat_file}...")
+        train_data = np.load(train_feat_file).squeeze()
+        
+        print(f"Fitting one-class SVM...")
+        clf = svm.OneClassSVM().fit(train_data)
+
+        print("Evaluating...")
+        with open(eval_file, "a+") as f:
+            for filename, feat in generate_bow_features(list(session.generate_motion_images()), dictionary):
+                y = clf.decision_function(feat)[0]
+                f.write(f"{filename},{y}\n")
+                f.flush()
+
+        print("Complete!")
+
+if __name__ == "__main__":
+    main()

BIN
plots/approach3/beaver_01_bow_1024_tar_vs_tnr.png


BIN
plots/approach3/beaver_01_bow_2048_tar_vs_tnr.png


BIN
plots/approach3/beaver_01_bow_512_tar_vs_tnr.png


BIN
plots/approach3/marten_01_bow_1024_tar_vs_tnr.png


BIN
plots/approach3/marten_01_bow_2048_tar_vs_tnr.png


BIN
plots/approach3/marten_01_bow_512_tar_vs_tnr.png


+ 0 - 0
plots/approach3_sift200_cluster16_tar_vs_tnr.png → plots/approach3/sift200_cluster16_tar_vs_tnr.png


+ 0 - 0
plots/approach3_sift200_cluster32_tar_vs_tnr.png → plots/approach3/sift200_cluster32_tar_vs_tnr.png


+ 0 - 0
plots/approach3_sift200_cluster64_tar_vs_tnr.png → plots/approach3/sift200_cluster64_tar_vs_tnr.png


+ 42 - 10
py/Autoencoder2.py

@@ -1,36 +1,68 @@
 from torch import nn
 
 class Autoencoder(nn.Module):
-    def __init__(self):
+    def __init__(self, dropout=0.1, latent_channels=32):
         super(Autoencoder, self).__init__()
         self.encoder = nn.Sequential(
-            nn.Conv2d(3, 128, kernel_size=7, stride=4, padding=2),
+            nn.Dropout(dropout),
+            nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3),
             nn.ReLU(True),
-            nn.Conv2d(128, 64, kernel_size=3, stride=2, padding=1),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),
             nn.ReLU(True),
-            nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
             nn.ReLU(True),
-            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
             nn.ReLU(True),
+
+            nn.Dropout(dropout),
             nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
             nn.ReLU(True),
-            nn.Conv2d(128, 64, kernel_size=3, padding="same"),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(128, latent_channels, kernel_size=3, padding="same"),
             nn.ReLU(True),
         )
         self.decoder = nn.Sequential(
-            nn.Conv2d(64, 128, kernel_size=3, padding="same"),
+            nn.Dropout(dropout),
+            nn.Conv2d(latent_channels, 128, kernel_size=3, padding="same"),
             nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(128, 128, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
             nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
             nn.ReLU(True),
+
+            nn.Dropout(dropout),
             nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
             nn.ReLU(True),
+
+            nn.Dropout(dropout),
             nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
             nn.ReLU(True),
-            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(64, 32, kernel_size=6, stride=2, padding=2),
             nn.ReLU(True),
-            nn.ConvTranspose2d(32, 32, kernel_size=8, stride=4, padding=2),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(32, 16, kernel_size=8, stride=2, padding=3),
             nn.ReLU(True),
-            nn.Conv2d(32, 3, kernel_size=3, stride=1, padding="same"),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(16, 3, kernel_size=3, stride=1, padding="same"),
             nn.Tanh(),
         )
     

+ 6 - 0
py/Labels.py

@@ -3,6 +3,12 @@ LABELS = {
     "Beaver_01": {
         "normal": [3, 4, 5, 48, 49, 50, 51, 52, 53, 54, 55, 78, 79, 80, 106, 107, 108, 109, 110, 113, 115, 132, 145, 147, 148, 149, 150, 193, 194, 195, 196, 197, 198, 199, 200, 217, 218, 219, 220, 372, 373, 374, 375, 399, 400, 401, 402, 403, 404, 405, 418, 419, 420, 425, 460, 463, 464, 465, 477, 478, 479, 480, 645, 665, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690],
         "small": list(range(86, 106)) + list(range(151, 156)) + [204, 205, 371, 392, 393, 394, 395, 416, 417, 459, 462, 476, 630, 631, 644, 677],
+        "max": 600,
+    },
+    "Marten_01": {
+        "normal": list(range(11, 166)) + list(range(177, 181)) + list(range(209, 301)),
+        "anomalous": list(range(1, 11)) + list(range(166, 177)) + list(range(181, 209)),
+        "max": 300,
     }
 }
 

+ 81 - 0
py/LocalFeatures.py

@@ -0,0 +1,81 @@
+# Functions related to approach 3 (local features).
+# For training and evaluation scripts, see ./train_bow.py and ./eval_bow.py.
+
+import cv2 as cv
+import numpy as np
+from tqdm import tqdm
+
+from py.Session import SessionImage
+
+def dense_keypoints(img, step=30, off=(15, 12)):
+    """Generates a list of densely sampled keypoints on img.
+
+    Args:
+        img (_type_): Image to sample from. (only the shape is relevant)
+        step (int, optional): Vertical and horizontal step size between and size of keypoints. Defaults to 30.
+        off (tuple, optional): y and x offset of the first keypoint in the grid. Defaults to (15, 12).
+
+    Returns:
+        list[cv.KeyPoint]: List of keypoints
+    """
+    border_dist = (step + 1) // 2
+    return [cv.KeyPoint(x, y, step) for y in range(border_dist + off[0], img.shape[0] - border_dist, step) 
+                                    for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]
+
+
+def extract_descriptors(images: list[SessionImage]):
+    """Extracts DSIFT descriptors from the provided images and returns them in a single array.
+
+    Args:
+        images (list[SessionImage]): List of images to read and compute descriptors from.
+
+    Returns:
+        np.array, shape=(len(images)*keypoints_per_image, 128): DSIFT descriptors.
+    """
+    sift = cv.SIFT_create()
+    dscs = []
+    for image in tqdm(images):
+        img = image.read_opencv(gray=True)
+        kp = dense_keypoints(img)
+        kp, des = sift.compute(img, kp)
+        dscs.append(des)
+    return np.array(dscs)
+
+def generate_dictionary_from_descriptors(dscs, dictionary_size: int):
+    """Clusters the given (D)SIFT descriptors using k-means.
+    This may take a while depending on the number of descriptors.
+
+    Args:
+        dscs (np.array, shape(-1, 128)): (D)SIFT descriptors for clustering.
+        dictionary_size (int): Number of k-means clusters.
+
+    Returns:
+        np.array, shape=(dictionary_size, 128): BOW dictionary.
+    """
+    BOW = cv.BOWKMeansTrainer(dictionary_size)
+    for dsc in dscs:
+        BOW.add(dsc)
+    dictionary = BOW.cluster()
+    return dictionary
+
+def generate_bow_features(images: list[SessionImage], dictionary):
+    """Calculates the BOW features for the provided images using dictionary.
+    Yields a feature vector for every image.
+
+    Args:
+        images (list[SessionImage]): List of images to read and compute feature vectors from.
+        dictionary (np.array, shape=(-1, 128)): BOW dictionary.
+
+    Yields:
+        (str, np.array of shape=(dictionary.shape[0])): (filename, feature vector)
+    """
+    flann = cv.FlannBasedMatcher({"algorithm": 0, "trees": 5}, {"checks": 50})
+    sift = cv.SIFT_create()
+    bow_extractor = cv.BOWImgDescriptorExtractor(sift, flann) # or cv.BFMatcher(cv.NORM_L2)
+    bow_extractor.setVocabulary(dictionary)
+    
+    for image in tqdm(images):
+        img = image.read_opencv(gray=True)
+        kp = dense_keypoints(img)
+        feat = bow_extractor.compute(img, kp)
+        yield image.filename, feat

+ 26 - 9
train_autoencoder.py

@@ -1,3 +1,7 @@
+# Approach 4: Autoencoder
+# This script is used for training an autoencoder on Lapse images.
+# See eval_autoencoder.py for evaluation.
+
 import argparse
 import os
 from tqdm import tqdm
@@ -11,7 +15,7 @@ from torchinfo import summary
 from py.PyTorchData import create_dataloader, model_output_to_image
 from py.Autoencoder2 import Autoencoder
 
-def train_autoencoder(model: nn.Module, train_dataloader: DataLoader, name: str, device: str = "cpu", num_epochs=100, criterion = nn.MSELoss(), lr: float = 1e-3, weight_decay: float = 1e-5, noise: bool = False):
+def train_autoencoder(model: Autoencoder, train_dataloader: DataLoader, name: str, device: str = "cpu", num_epochs=100, criterion = nn.MSELoss(), lr: float = 1e-3, weight_decay: float = 1e-5, noise: bool = False, sparse: bool = False):
     model = model.to(device)
     print(f"Using {device} device")
     optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
@@ -22,23 +26,31 @@ def train_autoencoder(model: nn.Module, train_dataloader: DataLoader, name: str,
     print(f"Training for {num_epochs} epochs.")
     for epoch in range(num_epochs):
         total_loss = 0
+        total_reg_loss = 0
         for img, _ in tqdm(train_dataloader):
-
+            optimizer.zero_grad()
             img = Variable(img).to(device)
             input = img
             if noise:
-                input = input + (0.01 ** 0.5) * torch.randn(img.size(), device=device)
+                input = input + (0.015 ** 0.5) * torch.randn(img.size(), device=device)
             # ===================forward=====================
-            output = model(input)
+            latent = model.encoder(input)
+            output = model.decoder(latent)
             loss = criterion(output, img)
+            total_loss += loss.data
+            if sparse:
+                reg_loss = 1e-4 * torch.mean(torch.abs(latent))
+                total_reg_loss += reg_loss.data
+                loss += reg_loss.data
             # ===================backward====================
-            optimizer.zero_grad()
             loss.backward()
             optimizer.step()
-            total_loss += loss.data
         # ===================log========================
         dsp_epoch = epoch + 1
-        print('epoch [{}/{}], loss:{:.4f}'.format(dsp_epoch, num_epochs, total_loss))
+        if sparse:
+            print('epoch [{}/{}], loss: {:.4f} + reg loss: {:.4f}'.format(dsp_epoch, num_epochs, total_loss, total_reg_loss))
+        else:
+            print('epoch [{}/{}], loss: {:.4f}'.format(dsp_epoch, num_epochs, total_loss))
         
         # log file
         with open(f"./ae_train_NoBackup/{name}/log.csv", "a+") as f:
@@ -64,8 +76,11 @@ if __name__ == "__main__":
     parser.add_argument("--epochs", type=int, help="Number of epochs", default=100)
     parser.add_argument("--batch_size", type=int, help="Batch size (>=1)", default=32)
     parser.add_argument("--lr", type=float, help="Learning rate", default=1e-3)
+    parser.add_argument("--dropout", type=float, help="Dropout rate on all layers", default=0.05)
+    parser.add_argument("--latent_channels", type=float, help="Latent channels n (-> n*16 latent features)", default=32)
     parser.add_argument("--image_transforms", action="store_true", help="Truncate and resize images (only enable if the input images have not been truncated resized to the target size already)")
     parser.add_argument("--noise", action="store_true", help="Add Gaussian noise to model input")
+    parser.add_argument("--sparse", action="store_true", help="Add L1 penalty to latent features")
 
     args = parser.parse_args()
 
@@ -75,7 +90,7 @@ if __name__ == "__main__":
         print("Image transforms disabled: Images are expected to be of the right size.")
     
     data_loader = create_dataloader(args.img_folder, batch_size=args.batch_size, skip_transforms=not args.image_transforms)
-    model = Autoencoder()
+    model = Autoencoder(dropout=args.dropout, latent_channels=args.latent_channels)
     print("Model:")
     summary(model, (args.batch_size, 3, 256, 256))
     print("Is CUDA available:", torch.cuda.is_available())
@@ -84,4 +99,6 @@ if __name__ == "__main__":
         print(torch.cuda.get_device_name(i))
     if args.noise:
         print("Adding Gaussian noise to model input")
-    train_autoencoder(model, data_loader, args.name, device=args.device, num_epochs=args.epochs, lr=args.lr, noise=args.noise)
+    if args.sparse:
+        print("Adding L1 penalty to latent features (sparse)")
+    train_autoencoder(model, data_loader, args.name, device=args.device, num_epochs=args.epochs, lr=args.lr, noise=args.noise, sparse=args.sparse)

+ 33 - 32
train_bow.py

@@ -1,36 +1,16 @@
+# Approach 3: Local features
+# This script is used for generating a BOW vocabulary using
+# densely sampeled SIFT features on Lapse images.
+# See eval_bow.py for evaluation.
+
 import argparse
 import os
-import cv2 as cv
 import numpy as np
-from tqdm import tqdm
 
 from py.Dataset import Dataset
-from py.Session import SessionImage
-
-def dense_keypoints(img, step=30, off=(15, 12)):
-    border_dist = (step + 1) // 2
-    return [cv.KeyPoint(x, y, step) for y in range(border_dist + off[0], img.shape[0] - border_dist, step) 
-                                    for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]
-
-def extract_descriptors(images: list[SessionImage]):
-    sift = cv.SIFT_create()
-    dscs = []
-    for image in tqdm(images):
-        img = image.read_opencv(gray=True)
-        kp = dense_keypoints(img)
-        kp, des = sift.compute(img, kp)
-        dscs.append(des)
-    return np.array(dscs)
-
-def generate_dictionary(dscs, dictionary_size):
-    # dictionary size = number of clusters
-    BOW = cv.BOWKMeansTrainer(dictionary_size)
-    for dsc in dscs:
-        BOW.add(dsc)
-    dictionary = BOW.cluster()
-    return dictionary
+from py.LocalFeatures import extract_descriptors, generate_dictionary_from_descriptors, generate_bow_features
 
-if __name__ == "__main__":
+def main():
     parser = argparse.ArgumentParser(description="BOW train script")
     parser.add_argument("dataset_dir", type=str, help="Directory of the dataset containing all session folders")
     parser.add_argument("session_name", type=str, help="Name of the session to use for Lapse images (e.g. marten_01)")
@@ -45,10 +25,18 @@ if __name__ == "__main__":
     # Lapse DSIFT descriptors
 
     lapse_dscs_file = os.path.join(save_dir, "lapse_dscs.npy")
+    dictionary_file = os.path.join(save_dir, f"bow_dict_{args.clusters}.npy")
+    train_feat_file = os.path.join(save_dir, f"bow_train_{args.clusters}.npy")
+
     if os.path.isfile(lapse_dscs_file):
-        print(f"{lapse_dscs_file} already exists, loading lapse descriptor from file...")
-        lapse_dscs = np.load(lapse_dscs_file)
+        if os.path.isfile(dictionary_file):
+            # if dictionary file already exists, we don't need the lapse descriptors
+            print(f"{lapse_dscs_file} already exists, skipping lapse descriptor extraction...")
+        else:
+            print(f"{lapse_dscs_file} already exists, loading lapse descriptor from file...")
+            lapse_dscs = np.load(lapse_dscs_file)
     else:
+        # Step 1 - extract dense SIFT descriptors
         print("Extracting lapse descriptors...")
         lapse_dscs = extract_descriptors(list(session.generate_lapse_images()))
         os.makedirs(save_dir, exist_ok=True)
@@ -56,13 +44,26 @@ if __name__ == "__main__":
 
     # BOW dictionary
 
-    dictionary_file = os.path.join(save_dir, f"bow_dict_{args.clusters}.npy")
     if os.path.isfile(dictionary_file):
         print(f"{dictionary_file} already exists, loading BOW dictionary from file...")
         dictionary = np.load(dictionary_file)
     else:
+        # Step 2 - create BOW dictionary from Lapse SIFT descriptors
         print(f"Creating BOW vocabulary with {args.clusters} clusters...")
-        dictionary = generate_dictionary(lapse_dscs, args.clusters)
+        dictionary = generate_dictionary_from_descriptors(lapse_dscs, args.clusters)
         np.save(dictionary_file, dictionary)
     
-    print("Complete!")
+    # Extract Lapse BOW features using vocabulary (train data)
+
+    if os.path.isfile(train_feat_file):
+        print(f"{train_feat_file} already exists, skipping lapse BOW feature extraction...")
+    else:
+        # Step 3 - calculate training data (BOW features of Lapse images)
+        print(f"Extracting BOW features from Lapse images...")
+        features = list(generate_bow_features(list(session.generate_lapse_images()), dictionary))
+        np.save(train_feat_file, features)
+    
+    print("Complete!")
+
+if __name__ == "__main__":
+    main()

Daži faili netika attēloti, jo izmaiņu fails ir pārāk liels