Ver código fonte

autoencoder experiments

Felix Kleinsteuber 2 anos atrás
pai
commit
7cbdf96e91
40 arquivos alterados com 410 adições e 71 exclusões
  1. 72 0
      analyze_labels.ipynb
  2. 10 19
      approach1a_basic_frame_differencing.ipynb
  3. BIN
      approach1a_difference_image.png
  4. BIN
      approach1a_difference_image2.png
  5. BIN
      approach1a_gaussianworksbetter_sigma0.png
  6. BIN
      approach1a_gaussianworksbetter_sigma4.png
  7. 8 11
      approach2_background_estimation.ipynb
  8. BIN
      approach2_bad_example_imgs.png
  9. BIN
      approach2_bad_example_median.png
  10. BIN
      approach2_good_example_imgs.png
  11. BIN
      approach2_good_example_median.png
  12. BIN
      approach3_dsift.png
  13. 3 19
      approach3_local_features.ipynb
  14. 5 5
      approach4_autoencoder.ipynb
  15. BIN
      approach4_reconstructions.png
  16. BIN
      approach4_reconstructions_beaver01.png
  17. 218 0
      autoencoder_experiments.ipynb
  18. 5 4
      eval_autoencoder.py
  19. BIN
      plots/approach1a/roc_curves/Beaver_01_absmean_sigma4.pdf
  20. BIN
      plots/approach1a/roc_curves/Beaver_01_absmean_sigma4.png
  21. BIN
      plots/approach1a/roc_curves/Beaver_01_absvar_sigma4.pdf
  22. BIN
      plots/approach1a/roc_curves/Beaver_01_absvar_sigma4.png
  23. BIN
      plots/approach1a/roc_curves/Beaver_01_sqmean_sigma4.pdf
  24. BIN
      plots/approach1a/roc_curves/Beaver_01_sqmean_sigma4.png
  25. BIN
      plots/approach1a/roc_curves/Beaver_01_sqvar_sigma4.pdf
  26. BIN
      plots/approach1a/roc_curves/Beaver_01_sqvar_sigma4.png
  27. BIN
      plots/approach2/roc_curves/Marten_01_sqmean_sigma4.pdf
  28. BIN
      plots/approach2/roc_curves/Marten_01_sqmean_sigma4.png
  29. BIN
      plots/approach2/roc_curves/Marten_01_sqvar_sigma4.pdf
  30. BIN
      plots/approach2/roc_curves/Marten_01_sqvar_sigma4.png
  31. BIN
      plots/approach4/roc_curves/Marten_01_kde,loss.pdf
  32. BIN
      plots/approach4/roc_curves/Marten_01_kde,loss.png
  33. BIN
      plots/approach4/roc_curves/Marten_01_kde.pdf
  34. BIN
      plots/approach4/roc_curves/Marten_01_kde.png
  35. BIN
      plots/approach4/roc_curves/Marten_01_loss.pdf
  36. 73 0
      py/Autoencoder3.py
  37. 1 0
      py/ImageUtils.py
  38. 1 1
      py/PyTorchData.py
  39. 2 1
      results.ipynb
  40. 12 11
      train_autoencoder.py

+ 72 - 0
analyze_labels.ipynb

@@ -0,0 +1,72 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from py.Labels import LABELS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Beaver_01: 0 anomalous, 74 normal, 0 not annotated, 695 max\n",
+      "Marten_01: 732 anomalous, 2373 normal, 0 not annotated, 3105 max\n",
+      "Fox_03: 246 anomalous, 3702 normal, 1547 not annotated, 5495 max\n",
+      "GFox_03: 246 anomalous, 3702 normal, 1547 not annotated, 5495 max\n"
+     ]
+    }
+   ],
+   "source": [
+    "for session_name, labels in LABELS.items():\n",
+    "    anomalous = len(labels[\"anomalous\"]) if \"anomalous\" in labels else 0\n",
+    "    normal = len(labels[\"normal\"]) if \"normal\" in labels else 0\n",
+    "    not_annotated = len(labels[\"not_annotated\"]) if \"not_annotated\" in labels else 0\n",
+    "    max_nr = labels[\"max\"] if \"max\" in labels else 0\n",
+    "    print(f\"{session_name}: {anomalous} anomalous, {normal} normal, {not_annotated} not annotated, {max_nr} max\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.4 ('pytorch-gpu')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "17cd5c528a3345b75540c61f907eece919c031d57a2ca1e5653325af249173c9"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Diferenças do arquivo suprimidas por serem muito extensas
+ 10 - 19
approach1a_basic_frame_differencing.ipynb


BIN
approach1a_difference_image.png


BIN
approach1a_difference_image2.png


BIN
approach1a_gaussianworksbetter_sigma0.png


BIN
approach1a_gaussianworksbetter_sigma4.png


Diferenças do arquivo suprimidas por serem muito extensas
+ 8 - 11
approach2_background_estimation.ipynb


BIN
approach2_bad_example_imgs.png


BIN
approach2_bad_example_median.png


BIN
approach2_good_example_imgs.png


BIN
approach2_good_example_median.png


BIN
approach3_dsift.png


Diferenças do arquivo suprimidas por serem muito extensas
+ 3 - 19
approach3_local_features.ipynb


Diferenças do arquivo suprimidas por serem muito extensas
+ 5 - 5
approach4_autoencoder.ipynb


BIN
approach4_reconstructions.png


BIN
approach4_reconstructions_beaver01.png


+ 218 - 0
autoencoder_experiments.ipynb

@@ -0,0 +1,218 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/kleinsteuber/anaconda3/envs/pytorch-gpu/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from torch import nn\n",
+    "from torchinfo import summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Autoencoder(nn.Module):\n",
+    "    def __init__(self, dropout=0.1, latent_features=512):\n",
+    "        super(Autoencoder, self).__init__()\n",
+    "        self.encoder = nn.Sequential(\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=2),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Flatten(),\n",
+    "            nn.Linear(1024, latent_features),\n",
+    "            nn.ReLU(True),\n",
+    "        )\n",
+    "        self.decoder = nn.Sequential(\n",
+    "            nn.Linear(512, 1024),\n",
+    "            nn.ReLU(True),\n",
+    "            nn.Unflatten(1, (64, 4, 4)),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.ConvTranspose2d(64, 64, kernel_size=6, stride=2, padding=2),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.ConvTranspose2d(64, 64, kernel_size=8, stride=2, padding=3),\n",
+    "            nn.ReLU(True),\n",
+    "\n",
+    "            nn.Dropout(dropout),\n",
+    "            nn.Conv2d(64, 3, kernel_size=3, stride=1, padding=\"same\"),\n",
+    "            nn.Tanh(),\n",
+    "        )\n",
+    "    \n",
+    "    def forward(self, x):\n",
+    "        x = self.encoder(x)\n",
+    "        x = self.decoder(x)\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "==========================================================================================\n",
+       "Layer (type:depth-idx)                   Output Shape              Param #\n",
+       "==========================================================================================\n",
+       "Autoencoder                              [32, 3, 256, 256]         --\n",
+       "├─Sequential: 1-1                        [32, 512]                 --\n",
+       "│    └─Dropout: 2-1                      [32, 3, 256, 256]         --\n",
+       "│    └─Conv2d: 2-2                       [32, 32, 128, 128]        4,736\n",
+       "│    └─ReLU: 2-3                         [32, 32, 128, 128]        --\n",
+       "│    └─Dropout: 2-4                      [32, 32, 128, 128]        --\n",
+       "│    └─Conv2d: 2-5                       [32, 64, 64, 64]          51,264\n",
+       "│    └─ReLU: 2-6                         [32, 64, 64, 64]          --\n",
+       "│    └─Dropout: 2-7                      [32, 64, 64, 64]          --\n",
+       "│    └─Conv2d: 2-8                       [32, 64, 32, 32]          36,928\n",
+       "│    └─ReLU: 2-9                         [32, 64, 32, 32]          --\n",
+       "│    └─Dropout: 2-10                     [32, 64, 32, 32]          --\n",
+       "│    └─Conv2d: 2-11                      [32, 64, 16, 16]          36,928\n",
+       "│    └─ReLU: 2-12                        [32, 64, 16, 16]          --\n",
+       "│    └─Dropout: 2-13                     [32, 64, 16, 16]          --\n",
+       "│    └─Conv2d: 2-14                      [32, 128, 8, 8]           73,856\n",
+       "│    └─ReLU: 2-15                        [32, 128, 8, 8]           --\n",
+       "│    └─Dropout: 2-16                     [32, 128, 8, 8]           --\n",
+       "│    └─Conv2d: 2-17                      [32, 64, 4, 4]            73,792\n",
+       "│    └─ReLU: 2-18                        [32, 64, 4, 4]            --\n",
+       "│    └─Dropout: 2-19                     [32, 64, 4, 4]            --\n",
+       "│    └─Flatten: 2-20                     [32, 1024]                --\n",
+       "│    └─Linear: 2-21                      [32, 512]                 524,800\n",
+       "│    └─ReLU: 2-22                        [32, 512]                 --\n",
+       "├─Sequential: 1-2                        [32, 3, 256, 256]         --\n",
+       "│    └─Linear: 2-23                      [32, 1024]                525,312\n",
+       "│    └─ReLU: 2-24                        [32, 1024]                --\n",
+       "│    └─Unflatten: 2-25                   [32, 64, 4, 4]            --\n",
+       "│    └─Dropout: 2-26                     [32, 64, 4, 4]            --\n",
+       "│    └─ConvTranspose2d: 2-27             [32, 128, 8, 8]           131,200\n",
+       "│    └─ReLU: 2-28                        [32, 128, 8, 8]           --\n",
+       "│    └─Dropout: 2-29                     [32, 128, 8, 8]           --\n",
+       "│    └─ConvTranspose2d: 2-30             [32, 64, 16, 16]          131,136\n",
+       "│    └─ReLU: 2-31                        [32, 64, 16, 16]          --\n",
+       "│    └─Dropout: 2-32                     [32, 64, 16, 16]          --\n",
+       "│    └─ConvTranspose2d: 2-33             [32, 64, 32, 32]          65,600\n",
+       "│    └─ReLU: 2-34                        [32, 64, 32, 32]          --\n",
+       "│    └─Dropout: 2-35                     [32, 64, 32, 32]          --\n",
+       "│    └─ConvTranspose2d: 2-36             [32, 64, 64, 64]          65,600\n",
+       "│    └─ReLU: 2-37                        [32, 64, 64, 64]          --\n",
+       "│    └─Dropout: 2-38                     [32, 64, 64, 64]          --\n",
+       "│    └─ConvTranspose2d: 2-39             [32, 32, 128, 128]        73,760\n",
+       "│    └─ReLU: 2-40                        [32, 32, 128, 128]        --\n",
+       "│    └─Dropout: 2-41                     [32, 32, 128, 128]        --\n",
+       "│    └─ConvTranspose2d: 2-42             [32, 16, 256, 256]        32,784\n",
+       "│    └─ReLU: 2-43                        [32, 16, 256, 256]        --\n",
+       "│    └─Dropout: 2-44                     [32, 16, 256, 256]        --\n",
+       "│    └─Conv2d: 2-45                      [32, 3, 256, 256]         435\n",
+       "│    └─Tanh: 2-46                        [32, 3, 256, 256]         --\n",
+       "==========================================================================================\n",
+       "Total params: 1,828,131\n",
+       "Trainable params: 1,828,131\n",
+       "Non-trainable params: 0\n",
+       "Total mult-adds (G): 131.37\n",
+       "==========================================================================================\n",
+       "Input size (MB): 25.17\n",
+       "Forward/backward pass size (MB): 768.21\n",
+       "Params size (MB): 7.31\n",
+       "Estimated Total Size (MB): 800.69\n",
+       "=========================================================================================="
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "summary(Autoencoder(), (32, 3, 256, 256))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.4 ('pytorch-gpu')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "17cd5c528a3345b75540c61f907eece919c031d57a2ca1e5653325af249173c9"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 5 - 4
eval_autoencoder.py

@@ -15,18 +15,18 @@ from torch.utils.data import DataLoader
 from py.FileUtils import dump
 from py.Dataset import Dataset
 from py.PyTorchData import create_dataloader
-from py.Autoencoder2 import Autoencoder
+from py.Autoencoder3 import Autoencoder
 from py.Labels import LABELS
 
 TRAIN_FOLDER = "./ae_train_NoBackup"
 
-def load_autoencoder(train_name: str, device: str = "cpu", model_number: int = -1):
+def load_autoencoder(train_name: str, device: str = "cpu", model_number: int = -1, latent_features: int = 32):
     if model_number < 0:
         model_path = sorted(glob(f"./ae_train_NoBackup/{train_name}/model_*.pth"))[-1]
     else:
         model_path = f"./ae_train_NoBackup/{train_name}/model_{model_number:03d}.pth"
     print(f"Loading model from {model_path}... ", end="")
-    model = Autoencoder()
+    model = Autoencoder(latent_features=latent_features)
     model.load_state_dict(torch.load(model_path, map_location=torch.device(device)))
     model.eval()
     print("Loaded!")
@@ -63,6 +63,7 @@ def main():
     parser.add_argument("session", type=str, help="Session name")
     parser.add_argument("--device", type=str, help="PyTorch device to train on (cpu or cuda)", default="cpu")
     parser.add_argument("--batch_size", type=int, help="Batch size (>=1)", default=32)
+    parser.add_argument("--latent", type=int, help="Number of latent features", default=512)
     parser.add_argument("--model_number", type=int, help="Load model save of specific epoch (default: use latest)", default=-1)
     parser.add_argument("--image_transforms", action="store_true", help="Truncate and resize images (only enable if the input images have not been truncated resized to the target size already)")
     
@@ -85,7 +86,7 @@ def main():
     motion_eval_file = os.path.join(save_dir, f"{session.name}_motion.pickle")
 
     # Load model
-    model = load_autoencoder(args.name, args.device, args.model_number)
+    model = load_autoencoder(args.name, args.device, args.model_number, latent_features=args.latent)
     
     # Check CUDA
     print("Is CUDA available:", torch.cuda.is_available())

BIN
plots/approach1a/roc_curves/Beaver_01_absmean_sigma4.pdf


BIN
plots/approach1a/roc_curves/Beaver_01_absmean_sigma4.png


BIN
plots/approach1a/roc_curves/Beaver_01_absvar_sigma4.pdf


BIN
plots/approach1a/roc_curves/Beaver_01_absvar_sigma4.png


BIN
plots/approach1a/roc_curves/Beaver_01_sqmean_sigma4.pdf


BIN
plots/approach1a/roc_curves/Beaver_01_sqmean_sigma4.png


BIN
plots/approach1a/roc_curves/Beaver_01_sqvar_sigma4.pdf


BIN
plots/approach1a/roc_curves/Beaver_01_sqvar_sigma4.png


BIN
plots/approach2/roc_curves/Marten_01_sqmean_sigma4.pdf


BIN
plots/approach2/roc_curves/Marten_01_sqmean_sigma4.png


BIN
plots/approach2/roc_curves/Marten_01_sqvar_sigma4.pdf


BIN
plots/approach2/roc_curves/Marten_01_sqvar_sigma4.png


BIN
plots/approach4/roc_curves/Marten_01_kde,loss.pdf


BIN
plots/approach4/roc_curves/Marten_01_kde,loss.png


BIN
plots/approach4/roc_curves/Marten_01_kde.pdf


BIN
plots/approach4/roc_curves/Marten_01_kde.png


BIN
plots/approach4/roc_curves/Marten_01_loss.pdf


+ 73 - 0
py/Autoencoder3.py

@@ -0,0 +1,73 @@
+from torch import nn
+
+class Autoencoder(nn.Module):
+    def __init__(self, dropout=0.1, latent_features=512):
+        super(Autoencoder, self).__init__()
+        self.encoder = nn.Sequential(
+            nn.Dropout(dropout),
+            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=2),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.Flatten(),
+            nn.Linear(1024, latent_features),
+            nn.ReLU(True),
+        )
+        self.decoder = nn.Sequential(
+            nn.Linear(512, 1024),
+            nn.ReLU(True),
+            nn.Unflatten(1, (64, 4, 4)),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(64, 64, kernel_size=6, stride=2, padding=2),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.ConvTranspose2d(64, 64, kernel_size=8, stride=2, padding=3),
+            nn.ReLU(True),
+
+            nn.Dropout(dropout),
+            nn.Conv2d(64, 3, kernel_size=3, stride=1, padding="same"),
+            nn.Tanh(),
+        )
+    
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+        return x

+ 1 - 0
py/ImageUtils.py

@@ -24,6 +24,7 @@ def display_images(images: list, titles: list, colorbar=False, size=(8, 5), row_
         titles (list of str): list of titles
         colorbar (bool, optional): Display colorbars. Defaults to False.
         size (tuple of ints, optional): plt size (width, height) per image. Defaults to (8, 5).
+        row_size (int, optional): Images per row. Defaults to 2.
     """
     num_imgs = len(images)
     num_cols = row_size

+ 1 - 1
py/PyTorchData.py

@@ -73,7 +73,7 @@ def get_log(name: str, display: bool = False, figsize: tuple = (12, 6)):
     losses = []
     with open(f"./ae_train_NoBackup/{name}/log.csv", "r") as f:
         for line in f:
-            it, loss = line.rstrip().split(",")
+            it, loss = line.rstrip().split(",")[:2]
             its.append(int(it))
             losses.append(float(loss))
     if display:

+ 2 - 1
results.ipynb

@@ -53,8 +53,9 @@
     "| | $\\sigma=2$, sq var | 0.8004 | 0.3236 | 0.1606 | 0.0434 | 0 | 6 min |\n",
     "| | $\\sigma=4$, sq var | 0.8030 | 0.3536 | 0.2031 | 0.0801 | 0 | 6 min |\n",
     "| 2 - Background Estimation | sqmean | 0.5056 | 0.0295 | 0.0219 | 0.0169 | 0 | 2:30 min |\n",
+    "| | $\\sigma=4$, sqvar | 0.7403 | 0.2090 | 0.1150 | 0.0253 | 0 | 4:00 min |\n",
     "| 3 - BOW | $k = 4096, kp = 30$, random | 0.6619 | 0.4973 | 0.2186 | 0.1298 | 12 min | 13 min |\n",
-    "| 4 - Autoencoder | Deep +Noise +Sparse Loss (lr=1e-4, 200 epochs) | 0.8572 | 0.3211 | 0.0021 | 0.0000 | 8:30 min | 1:30 min |"
+    "| 4 - Autoencoder | Deep +Noise +Sparse Loss (lr=1e-4, 200 epochs, reg=0.1) | 0.7479 | 0.2086 | 0.1138 | 0.0008 | 8:30 min | 1:30 min |"
    ]
   },
   {

+ 12 - 11
train_autoencoder.py

@@ -13,9 +13,9 @@ from torchvision.utils import save_image
 from torchinfo import summary
 
 from py.PyTorchData import create_dataloader, model_output_to_image
-from py.Autoencoder2 import Autoencoder
+from py.Autoencoder3 import Autoencoder
 
-def train_autoencoder(model: Autoencoder, train_dataloader: DataLoader, name: str, device: str = "cpu", num_epochs=100, criterion = nn.MSELoss(), lr: float = 1e-3, weight_decay: float = 1e-5, noise: bool = False, sparse: bool = False):
+def train_autoencoder(model: Autoencoder, train_dataloader: DataLoader, name: str, device: str = "cpu", num_epochs=100, criterion = nn.MSELoss(), lr: float = 1e-3, weight_decay: float = 1e-5, noise: bool = False, sparse: bool = False, reg_rate: float = 1e-4):
     model = model.to(device)
     print(f"Using {device} device")
     optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
@@ -37,11 +37,11 @@ def train_autoencoder(model: Autoencoder, train_dataloader: DataLoader, name: st
             latent = model.encoder(input)
             output = model.decoder(latent)
             loss = criterion(output, img)
-            total_loss += loss.data
+            total_loss += loss.item()
             if sparse:
-                reg_loss = 1e-4 * torch.mean(torch.abs(latent))
-                total_reg_loss += reg_loss.data
-                loss += reg_loss.data
+                reg_loss = reg_rate * torch.mean(torch.abs(latent))
+                total_reg_loss += reg_loss.item()
+                loss += reg_loss
             # ===================backward====================
             loss.backward()
             optimizer.step()
@@ -54,7 +54,7 @@ def train_autoencoder(model: Autoencoder, train_dataloader: DataLoader, name: st
         
         # log file
         with open(f"./ae_train_NoBackup/{name}/log.csv", "a+") as f:
-            f.write(f"{dsp_epoch},{total_loss}\n")
+            f.write(f"{dsp_epoch},{total_loss},{total_reg_loss}\n")
         
         # output image
         if epoch % 2 == 0:
@@ -62,7 +62,7 @@ def train_autoencoder(model: Autoencoder, train_dataloader: DataLoader, name: st
             save_image(pic, f"./ae_train_NoBackup/{name}/image_{dsp_epoch:03d}.png")
         
         # model checkpoint
-        if epoch % 5 == 0:
+        if epoch % 10 == 0:
             torch.save(model.state_dict(), f"./ae_train_NoBackup/{name}/model_{dsp_epoch:03d}.pth")
 
     torch.save(model.state_dict(), f"./ae_train_NoBackup/{name}/model_{num_epochs:03d}.pth")
@@ -76,8 +76,9 @@ if __name__ == "__main__":
     parser.add_argument("--epochs", type=int, help="Number of epochs", default=100)
     parser.add_argument("--batch_size", type=int, help="Batch size (>=1)", default=32)
     parser.add_argument("--lr", type=float, help="Learning rate", default=1e-3)
+    parser.add_argument("--reg_rate", type=float, help="Sparse regularization rate", default=1e-4)
     parser.add_argument("--dropout", type=float, help="Dropout rate on all layers", default=0.05)
-    parser.add_argument("--latent_channels", type=float, help="Latent channels n (-> n*16 latent features)", default=32)
+    parser.add_argument("--latent", type=int, help="Number of latent features", default=512)
     parser.add_argument("--image_transforms", action="store_true", help="Truncate and resize images (only enable if the input images have not been truncated resized to the target size already)")
     parser.add_argument("--noise", action="store_true", help="Add Gaussian noise to model input")
     parser.add_argument("--sparse", action="store_true", help="Add L1 penalty to latent features")
@@ -90,7 +91,7 @@ if __name__ == "__main__":
         print("Image transforms disabled: Images are expected to be of the right size.")
     
     data_loader = create_dataloader(args.img_folder, batch_size=args.batch_size, skip_transforms=not args.image_transforms)
-    model = Autoencoder(dropout=args.dropout, latent_channels=args.latent_channels)
+    model = Autoencoder(dropout=args.dropout, latent_features=args.latent)
     print("Model:")
     summary(model, (args.batch_size, 3, 256, 256))
     print("Is CUDA available:", torch.cuda.is_available())
@@ -101,4 +102,4 @@ if __name__ == "__main__":
         print("Adding Gaussian noise to model input")
     if args.sparse:
         print("Adding L1 penalty to latent features (sparse)")
-    train_autoencoder(model, data_loader, args.name, device=args.device, num_epochs=args.epochs, lr=args.lr, noise=args.noise, sparse=args.sparse)
+    train_autoencoder(model, data_loader, args.name, device=args.device, num_epochs=args.epochs, lr=args.lr, noise=args.noise, sparse=args.sparse, reg_rate=args.reg_rate)

Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff