فهرست منبع

local features + autoencoder

Felix Kleinsteuber 3 سال پیش
والد
کامیت
a11917b7fb

+ 1 - 0
.gitignore

@@ -1,2 +1,3 @@
 *.jpg
 __pycache__
+*_NoBackup

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 25 - 11
approach3_local_features.ipynb


BIN
approach4_ae1.png


BIN
approach4_ae2.png


BIN
approach4_ae2_noise.png


تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 54 - 0
approach4_autoencoder.ipynb


تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 117 - 0
approach4_autoencoder2.ipynb


BIN
plots/approach3_sift200_cluster16_tar_vs_tnr.png


BIN
plots/approach3_sift200_cluster32_tar_vs_tnr.png


BIN
plots/approach3_sift200_cluster64_tar_vs_tnr.png


+ 40 - 0
py/Autoencoder.py

@@ -0,0 +1,40 @@
+from torch import nn
+
+class Autoencoder(nn.Module):
+    def __init__(self):
+        super(Autoencoder, self).__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(3, 128, kernel_size=7, stride=4, padding=2),
+            nn.ReLU(True),
+            nn.Conv2d(128, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.Flatten(),
+            nn.Linear(2048, 512),
+            nn.ReLU(True), # see https://stackoverflow.com/questions/50187127/is-it-necessary-to-use-a-linear-bottleneck-layer-for-autoencoder
+        )
+        self.decoder = nn.Sequential(
+            nn.Linear(512, 2048),
+            nn.ReLU(True),
+            nn.Unflatten(1, (128, 4, 4)),
+            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(32, 3, kernel_size=8, stride=4, padding=2),
+            nn.Tanh(),
+        )
+    
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+        return x

+ 40 - 0
py/Autoencoder2.py

@@ -0,0 +1,40 @@
+from torch import nn
+
+class Autoencoder(nn.Module):
+    def __init__(self):
+        super(Autoencoder, self).__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(3, 128, kernel_size=7, stride=4, padding=2),
+            nn.ReLU(True),
+            nn.Conv2d(128, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.Conv2d(128, 64, kernel_size=3, padding="same"),
+            nn.ReLU(True),
+        )
+        self.decoder = nn.Sequential(
+            nn.Conv2d(64, 128, kernel_size=3, padding="same"),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+            nn.ConvTranspose2d(32, 32, kernel_size=8, stride=4, padding=2),
+            nn.ReLU(True),
+            nn.Conv2d(32, 3, kernel_size=3, stride=1, padding="same"),
+            nn.Tanh(),
+        )
+    
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+        return x

+ 84 - 0
py/PyTorchData.py

@@ -0,0 +1,84 @@
+import os
+import matplotlib.pyplot as plt
+from torchvision import io, transforms
+from torch.utils.data import DataLoader, Dataset
+
+class ImageDataset(Dataset):
+    def __init__(self, img_dir: str, transform = None, labeler = None):
+        self.img_dir = img_dir
+        self.transform = transform
+        self.labeler = labeler
+        with os.scandir(img_dir) as it:
+            self.files = [entry.name for entry in it if entry.name.endswith(".jpg") and entry.is_file()]
+    
+    def __len__(self):
+        return len(self.files)
+
+    def __getitem__(self, idx):
+        img_path = os.path.join(self.img_dir, self.files[idx])
+        img = io.read_image(img_path)
+        if self.transform:
+            img = self.transform(img)
+        label = 0
+        if self.labeler:
+            label = self.labeler(self.files[idx])
+        return img, label
+
+def create_dataloader(img_folder: str, target_size: tuple = (256, 256), batch_size: int = 32, shuffle: bool = True, truncate_y: tuple = (40, 40), labeler = None, skip_transforms: bool = False) -> DataLoader:
+    """Creates a PyTorch DataLoader from the given image folder.
+
+    Args:
+        img_folder (str): Folder containing images. (All subfolders will be scanned for jpg images)
+        target_size (tuple, optional): Model input size. Images are resized to this size. Defaults to (256, 256).
+        batch_size (int, optional): Batch size. Defaults to 32.
+        shuffle (bool, optional): Shuffle images. Good for training, useless for testing. Defaults to True.
+        truncate_y (tuple, optional): (a, b), cut off the first a and the last b pixel rows of the unresized image. Defaults to (40, 40).
+        labeler (lambda(filename: str) -> int, optional): Lambda that maps every filename to an int label. By default all labels are 0. Defaults to None.
+        skip_transforms (bool, optional): Skip truncate and resize transforms. (If the images are already truncated and resized). Defaults to False.
+
+    Returns:
+        DataLoader: PyTorch DataLoader
+    """
+    def crop_lambda(img):
+        return transforms.functional.crop(img, truncate_y[0], 0, img.shape[-2] - truncate_y[0] - truncate_y[1], img.shape[-1])
+
+    transform = None
+    if skip_transforms:
+        transform = transforms.Compose([
+            transforms.Lambda(lambda img: img.float()),
+            transforms.Normalize((127.5), (127.5)) # min-max normalization to [-1, 1]
+        ])
+    else:
+        transform = transforms.Compose([
+            transforms.Lambda(crop_lambda),
+            transforms.ToPILImage(),
+            transforms.Resize(target_size),
+            transforms.ToTensor(),
+            transforms.Normalize((0.5), (0.5)) # min-max normalization to [-1, 1]
+        ])
+
+    data = ImageDataset(img_folder, transform=transform, labeler=labeler)
+    return DataLoader(data, batch_size=batch_size, shuffle=shuffle)
+
+def model_output_to_image(y):
+    y = 0.5 * (y + 1) # normalize back to [0, 1]
+    y = y.clamp(0, 1) # clamp to [0, 1]
+    y = y.view(y.size(0), 3, 256, 256)
+    return y
+
+def get_log(name: str, display: bool = False, figsize: tuple = (12, 6)):
+    its = []
+    losses = []
+    with open(f"./ae_train_NoBackup/{name}/log.csv", "r") as f:
+        for line in f:
+            it, loss = line.rstrip().split(",")
+            its.append(int(it))
+            losses.append(float(loss))
+    if display:
+        plt.figure(figsize=figsize)
+        plt.plot(its, losses)
+        plt.title(f"Training curve ({name})")
+        plt.xlabel("Epoch")
+        plt.ylabel("MSE Loss")
+        plt.show()
+    return its, losses

+ 8 - 2
py/Session.py

@@ -70,19 +70,25 @@ class Session:
             pickle.dump(self.lapse_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
             print(f"Saved {lapse_map_file}")
     
+    def get_lapse_folder(self) -> str:
+        return os.path.join(self.folder, "Lapse")
+    
+    def get_motion_folder(self) -> str:
+        return os.path.join(self.folder, "Motion")
+    
     def scan(self, force=False, auto_save=True):
         if self.scanned and not force:
             raise ValueError("Session is already scanned. Use force=True to scan anyway and override scan progress.")
         # Scan motion dates
         print("Scanning motion dates...")
         self.motion_dates = {}
-        motion_folder = os.path.join(self.folder, "Motion")
+        motion_folder = self.get_motion_folder()
         for file in tqdm(list_jpegs_recursive(motion_folder)):
             self.motion_dates[os.path.relpath(file, motion_folder)] = get_image_date(file)
         # Scan lapse dates
         print("Scanning lapse dates...")
         self.lapse_dates = {}
-        lapse_folder = os.path.join(self.folder, "Lapse")
+        lapse_folder = self.get_lapse_folder()
         for file in tqdm(list_jpegs_recursive(lapse_folder)):
             self.lapse_dates[os.path.relpath(file, lapse_folder)] = get_image_date(file)
         # Create lapse map

+ 133 - 0
resize_session.ipynb

@@ -0,0 +1,133 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Resize session images"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 32 sessions\n",
+      "Session 'Beaver_01' at folder: /home/AMMOD_data/camera_traps/BayerWald/Vielkadaver-Projekt/VIELAAS_Spring_Session01-VIELAAS_Beaver_01\n",
+      "Loaded scans.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import glob\n",
+    "from tqdm import tqdm\n",
+    "import cv2 as cv\n",
+    "\n",
+    "from py.Session import Session\n",
+    "from py.Dataset import Dataset\n",
+    "from py.FileUtils import list_jpegs_recursive\n",
+    "\n",
+    "DIR = '/home/AMMOD_data/camera_traps/BayerWald/Vielkadaver-Projekt/' # dataset directory\n",
+    "\n",
+    "ds = Dataset(DIR)\n",
+    "session = ds.create_session(\"beaver_01\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def copy_session(session: Session, target_folder_name: str, scale=None, size=None, truncate_y=(0, 0)):\n",
+    "    new_folder = os.path.join(target_folder_name, os.path.basename(session.folder))\n",
+    "    for file in tqdm(list_jpegs_recursive(session.folder)):\n",
+    "        new_file = os.path.join(new_folder, os.path.relpath(file, session.folder))\n",
+    "        os.makedirs(os.path.dirname(new_file), exist_ok=True)\n",
+    "        # print(f\"Copying {file} to {new_file}\")\n",
+    "        img = cv.imread(file)\n",
+    "        img = img[truncate_y[0]:(-truncate_y[1])]\n",
+    "        # scale\n",
+    "        if scale is not None and scale < 1:\n",
+    "            img = cv.resize(img, None, fx=scale, fy=scale, interpolation=cv.INTER_LINEAR)\n",
+    "        elif size is not None:\n",
+    "            assert type(size) == tuple\n",
+    "            img = cv.resize(img, size, interpolation=cv.INTER_LINEAR)\n",
+    "        cv.imwrite(new_file, img)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 2629/2629 [11:35<00:00,  3.78it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "copy_session(session, \"ResizedSessions_NoBackup\", scale=0.4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 2629/2629 [07:13<00:00,  6.07it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "copy_session(session, \"ResizedSessions256_NoBackup\", size=(256, 256), truncate_y=(40, 40))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.6.9 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 87 - 0
train_autoencoder.py

@@ -0,0 +1,87 @@
+import argparse
+import os
+from tqdm import tqdm
+import torch
+from torch import nn
+from torch.autograd import Variable
+from torch.utils.data import DataLoader
+from torchvision.utils import save_image
+from torchinfo import summary
+
+from py.PyTorchData import create_dataloader, model_output_to_image
+from py.Autoencoder2 import Autoencoder
+
+def train_autoencoder(model: nn.Module, train_dataloader: DataLoader, name: str, device: str = "cpu", num_epochs=100, criterion = nn.MSELoss(), lr: float = 1e-3, weight_decay: float = 1e-5, noise: bool = False):
+    model = model.to(device)
+    print(f"Using {device} device")
+    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
+
+    print(f"Saving models to ./ae_train_NoBackup/{name}")
+    os.makedirs(f"./ae_train_NoBackup/{name}", exist_ok=True)
+
+    print(f"Training for {num_epochs} epochs.")
+    for epoch in range(num_epochs):
+        total_loss = 0
+        for img, _ in tqdm(train_dataloader):
+
+            img = Variable(img).to(device)
+            input = img
+            if noise:
+                input = input + (0.01 ** 0.5) * torch.randn(img.size(), device=device)
+            # ===================forward=====================
+            output = model(input)
+            loss = criterion(output, img)
+            # ===================backward====================
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.data
+        # ===================log========================
+        dsp_epoch = epoch + 1
+        print('epoch [{}/{}], loss:{:.4f}'.format(dsp_epoch, num_epochs, total_loss))
+        
+        # log file
+        with open(f"./ae_train_NoBackup/{name}/log.csv", "a+") as f:
+            f.write(f"{dsp_epoch},{total_loss}\n")
+        
+        # output image
+        if epoch % 2 == 0:
+            pic = model_output_to_image(output.cpu().data)
+            save_image(pic, f"./ae_train_NoBackup/{name}/image_{dsp_epoch:03d}.png")
+        
+        # model checkpoint
+        if epoch % 5 == 0:
+            torch.save(model.state_dict(), f"./ae_train_NoBackup/{name}/model_{dsp_epoch:03d}.pth")
+
+    torch.save(model.state_dict(), f"./ae_train_NoBackup/{name}/model_{num_epochs:03d}.pth")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Autoencoder train script")
+    parser.add_argument("name", type=str, help="Name of the training session (name of the save folder)")
+    parser.add_argument("img_folder", type=str, help="Path to directory containing train images (may contain subfolders)")
+    parser.add_argument("--device", type=str, help="PyTorch device to train on (cpu or cuda)", default="cpu")
+    parser.add_argument("--epochs", type=int, help="Number of epochs", default=100)
+    parser.add_argument("--batch_size", type=int, help="Batch size (>=1)", default=32)
+    parser.add_argument("--lr", type=float, help="Learning rate", default=1e-3)
+    parser.add_argument("--image_transforms", action="store_true", help="Truncate and resize images (only enable if the input images have not been truncated resized to the target size already)")
+    parser.add_argument("--noise", action="store_true", help="Add Gaussian noise to model input")
+
+    args = parser.parse_args()
+
+    if args.image_transforms:
+        print("Image transforms enabled: Images will be truncated and resized.")
+    else:
+        print("Image transforms disabled: Images are expected to be of the right size.")
+    
+    data_loader = create_dataloader(args.img_folder, batch_size=args.batch_size, skip_transforms=not args.image_transforms)
+    model = Autoencoder()
+    print("Model:")
+    summary(model, (args.batch_size, 3, 256, 256))
+    print("Is CUDA available:", torch.cuda.is_available())
+    print(f"Devices: ({torch.cuda.device_count()})")
+    for i in range(torch.cuda.device_count()):
+        print(torch.cuda.get_device_name(i))
+    if args.noise:
+        print("Adding Gaussian noise to model input")
+    train_autoencoder(model, data_loader, args.name, device=args.device, num_epochs=args.epochs, lr=args.lr, noise=args.noise)

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است