3 年之前 · 6686087b7c
--- a/approach3_local_features.ipynb
+++ b/approach3_local_features.ipynb
--- a/approach4_autoencoder2.ipynb
+++ b/approach4_autoencoder2.ipynb
@@ -9,16 +9,22 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 7,
			
 
				+   "execution_count": 1,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				+    {
			
 
				+     "name": "stderr",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "/home/kleinsteuber/anaconda3/envs/pytorch-gpu/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
			
 
				+      "  from .autonotebook import tqdm as notebook_tqdm\n"
			
 
				+     ]
			
 
				+    },
			
 
				     {
			
 
				      "name": "stdout",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "The autoreload extension is already loaded. To reload it, use:\n",
			
 
				-      "  %reload_ext autoreload\n",
			
 
				-      "Found 1 sessions\n",
			
 
				+      "Found 2 sessions\n",
			
 
				       "Session 'Beaver_01' at folder: /home/kleinsteuber/vscode/ResizedSessions256_NoBackup/VIELAAS_Spring_Session01-VIELAAS_Beaver_01\n",
			
 
				       "Loaded scans.\n"
			
 
				      ]
			
@@ -110,7 +116,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 25,
			
 
				+   "execution_count": 3,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -132,7 +138,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 26,
			
 
				+   "execution_count": 3,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -172,7 +178,7 @@
 
				        ")"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 26,
			
 
				+     "execution_count": 3,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -185,7 +191,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 27,
			
 
				+   "execution_count": 6,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -198,7 +204,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 28,
			
 
				+   "execution_count": 7,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -220,7 +226,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 29,
			
 
				+   "execution_count": 9,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -250,23 +256,42 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 30,
			
 
				+   "execution_count": 16,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stderr",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "100%|██████████| 22/22 [03:12<00:00,  8.77s/it]\n"
			
 
				+      "100%|██████████| 22/22 [03:42<00:00, 10.09s/it]\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				+    "from sklearn.neighbors import KernelDensity\n",
			
 
				+    "\n",
			
 
				+    "#Losses\n",
			
 
				     "criterion = nn.MSELoss()\n",
			
 
				     "normal_losses = []\n",
			
 
				     "anomalous_losses = []\n",
			
 
				+    "\n",
			
 
				+    "# KDE\n",
			
 
				+    "kde = KernelDensity(kernel=\"gaussian\", bandwidth=0.2)\n",
			
 
				+    "normal_encoded_imgs = []\n",
			
 
				+    "anomalous_encoded_imgs = []\n",
			
 
				+    "\n",
			
 
				     "for features, labels in tqdm(test_dl):\n",
			
 
				-    "    output_batch = model(features)\n",
			
 
				+    "    # Calculate and save encoded representation\n",
			
 
				+    "    encoded = model.encoder(features)\n",
			
 
				+    "    encoded_flat = encoded.detach().numpy().reshape(encoded.size()[0], -1)\n",
			
 
				+    "    for enc, label in zip(encoded_flat, labels):\n",
			
 
				+    "        if label == 1:\n",
			
 
				+    "            anomalous_encoded_imgs.append(enc)\n",
			
 
				+    "        else:\n",
			
 
				+    "            normal_encoded_imgs.append(enc)\n",
			
 
				+    "    \n",
			
 
				+    "    # decode and determine loss\n",
			
 
				+    "    output_batch = model.decoder(encoded)\n",
			
 
				     "    for input, output, label in zip(features, output_batch, labels):\n",
			
 
				     "        loss = criterion(input, output)\n",
			
 
				     "        if label == 1:\n",
			
@@ -275,6 +300,53 @@
 
				     "            normal_losses.append(loss.item())"
			
 
				    ]
			
 
				   },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 17,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stderr",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "/home/kleinsteuber/anaconda3/envs/pytorch-gpu/lib/python3.10/site-packages/numpy/lib/npyio.py:518: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
			
 
				+      "  arr = np.asanyarray(arr)\n"
			
 
				+     ]
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "np.save(\"Cache_NoBackup/approach4_ae2_encoded.npy\", [normal_encoded_imgs, anomalous_encoded_imgs])"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 21,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "{'algorithm': 'auto',\n",
			
 
				+       " 'atol': 0,\n",
			
 
				+       " 'bandwidth': 0.2,\n",
			
 
				+       " 'breadth_first': True,\n",
			
 
				+       " 'kernel': 'gaussian',\n",
			
 
				+       " 'leaf_size': 40,\n",
			
 
				+       " 'metric': 'euclidean',\n",
			
 
				+       " 'metric_params': None,\n",
			
 
				+       " 'rtol': 0}"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 21,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "kde = KernelDensity(kernel=\"gaussian\", bandwidth=0.2).fit(normal_encoded_imgs)\n",
			
 
				+    "kde.get_params()"
			
 
				+   ]
			
 
				+  },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": 31,
			
--- a/eval_autoencoder.py
+++ b/eval_autoencoder.py
@@ -0,0 +1,69 @@
 
				+import argparse
			
 
				+import os
			
 
				+from tqdm import tqdm
			
 
				+import numpy as np
			
 
				+import torch
			
 
				+from torch import nn
			
 
				+from torch.autograd import Variable
			
 
				+from torch.utils.data import DataLoader
			
 
				+from torchvision.utils import save_image
			
 
				+from torchinfo import summary
			
 
				+
			
 
				+from py.PyTorchData import create_dataloader, model_output_to_image
			
 
				+from py.Autoencoder2 import Autoencoder
			
 
				+
			
 
				+def eval_autoencoder(model: Autoencoder, dataloader: DataLoader, name: str, set_name: str, device: str = "cpu", criterion = nn.MSELoss()):
			
 
				+    model = model.to(device)
			
 
				+    print(f"Using {device} device")
			
 
				+
			
 
				+    print(f"Saving evaluation results to ./ae_train_NoBackup/{name}/eval")
			
 
				+    os.makedirs(f"./ae_train_NoBackup/{name}/eval", exist_ok=True)
			
 
				+
			
 
				+    labels = []
			
 
				+    encodeds = []
			
 
				+    losses = []
			
 
				+
			
 
				+
			
 
				+    for img, labels in tqdm(dataloader):
			
 
				+        img_batch = Variable(img_batch).to(device)
			
 
				+        # ===================forward=====================
			
 
				+        encoded = model.encoder(img)
			
 
				+        encoded_flat = encoded.detach().numpy().reshape(encoded.size()[0], -1)
			
 
				+        output_batch = model.decoder(encoded)
			
 
				+
			
 
				+        for input, output, label, enc_flat in zip(img, output_batch, labels, encoded_flat):
			
 
				+            losses.append(criterion(input, output))
			
 
				+            encodeds.append(enc_flat)
			
 
				+            labels.append(label)
			
 
				+    np.save(f"./ae_train_NoBackup/{name}/eval/{set_name}.npy")
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    parser = argparse.ArgumentParser(description="Autoencoder eval script")
			
 
				+    parser.add_argument("name", type=str, help="Name of the training session (name of the save folder)")
			
 
				+    parser.add_argument("model_name", type=str, help="Filename of the model (e.g. model_120.pth)")
			
 
				+    parser.add_argument("set_name", type=str, help="Name of the dataset (e.g. train or test)")
			
 
				+    parser.add_argument("img_folder", type=str, help="Path to directory containing train images (may contain subfolders)")
			
 
				+    parser.add_argument("--device", type=str, help="PyTorch device to train on (cpu or cuda)", default="cpu")
			
 
				+    parser.add_argument("--batch_size", type=int, help="Batch size (>=1)", default=32)
			
 
				+    parser.add_argument("--image_transforms", action="store_true", help="Truncate and resize images (only enable if the input images have not been truncated resized to the target size already)")
			
 
				+    
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.image_transforms:
			
 
				+        print("Image transforms enabled: Images will be truncated and resized.")
			
 
				+    else:
			
 
				+        print("Image transforms disabled: Images are expected to be of the right size.")
			
 
				+    
			
 
				+    dataloader = create_dataloader(args.img_folder, batch_size=args.batch_size, skip_transforms=not args.image_transforms)
			
 
				+    model = Autoencoder()
			
 
				+    print("Model:")
			
 
				+    summary(model, (args.batch_size, 3, 256, 256))
			
 
				+    print("Is CUDA available:", torch.cuda.is_available())
			
 
				+    print(f"Devices: ({torch.cuda.device_count()})")
			
 
				+    for i in range(torch.cuda.device_count()):
			
 
				+        print(torch.cuda.get_device_name(i))
			
 
				+    if args.noise:
			
 
				+        print("Adding Gaussian noise to model input")
			
 
				+    eval_autoencoder(model, dataloader, args.model_name, args.set_name, args.device)
			
--- a/py/FileUtils.py
+++ b/py/FileUtils.py
@@ -32,4 +32,5 @@ def verify_expected_subfolders(session_path: str):
 
				         session_path (str): session folder path
			
 
				     """
			
 
				     subfolders = list_folders(session_path)
			
 
				-    assert sorted(subfolders) == expected_subfolders
			
 
				+    if sorted(subfolders) != sorted(expected_subfolders):
			
 
				+        raise AssertionError(f"{session_path}: Expected subfolders {expected_subfolders} but found {subfolders}")
			
--- a/resize_session.ipynb
+++ b/resize_session.ipynb
@@ -9,7 +9,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				+   "execution_count": 4,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
@@ -24,7 +24,6 @@
 
				    ],
			
 
				    "source": [
			
 
				     "import os\n",
			
 
				-    "import glob\n",
			
 
				     "from tqdm import tqdm\n",
			
 
				     "import cv2 as cv\n",
			
 
				     "\n",
			
@@ -40,7 +39,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				+   "execution_count": 5,
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
@@ -63,31 +62,31 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 22,
			
 
				+   "execution_count": 6,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stderr",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "100%|██████████| 2629/2629 [11:35<00:00,  3.78it/s]\n"
			
 
				+      "100%|██████████| 2629/2629 [08:16<00:00,  5.29it/s]\n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "copy_session(session, \"ResizedSessions_NoBackup\", scale=0.4)"
			
 
				+    "copy_session(session, \"ResizedSessions_NoBackup\", scale=0.3, truncate_y=(40, 40))"
			
 
				    ]
			
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 12,
			
 
				+   "execution_count": 3,
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "name": "stderr",
			
 
				      "output_type": "stream",
			
 
				      "text": [
			
 
				-      "100%|██████████| 2629/2629 [07:13<00:00,  6.07it/s]\n"
			
 
				+      "100%|██████████| 5767/5767 [17:31<00:00,  5.48it/s] \n"
			
 
				      ]
			
 
				     }
			
 
				    ],
			
--- a/train_bow.py
+++ b/train_bow.py
@@ -0,0 +1,68 @@
 
				+import argparse
			
 
				+import os
			
 
				+import cv2 as cv
			
 
				+import numpy as np
			
 
				+from tqdm import tqdm
			
 
				+
			
 
				+from py.Dataset import Dataset
			
 
				+from py.Session import SessionImage
			
 
				+
			
 
				+def dense_keypoints(img, step=30, off=(15, 12)):
			
 
				+    border_dist = (step + 1) // 2
			
 
				+    return [cv.KeyPoint(x, y, step) for y in range(border_dist + off[0], img.shape[0] - border_dist, step) 
			
 
				+                                    for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]
			
 
				+
			
 
				+def extract_descriptors(images: list[SessionImage]):
			
 
				+    sift = cv.SIFT_create()
			
 
				+    dscs = []
			
 
				+    for image in tqdm(images):
			
 
				+        img = image.read_opencv(gray=True)
			
 
				+        kp = dense_keypoints(img)
			
 
				+        kp, des = sift.compute(img, kp)
			
 
				+        dscs.append(des)
			
 
				+    return np.array(dscs)
			
 
				+
			
 
				+def generate_dictionary(dscs, dictionary_size):
			
 
				+    # dictionary size = number of clusters
			
 
				+    BOW = cv.BOWKMeansTrainer(dictionary_size)
			
 
				+    for dsc in dscs:
			
 
				+        BOW.add(dsc)
			
 
				+    dictionary = BOW.cluster()
			
 
				+    return dictionary
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    parser = argparse.ArgumentParser(description="BOW train script")
			
 
				+    parser.add_argument("dataset_dir", type=str, help="Directory of the dataset containing all session folders")
			
 
				+    parser.add_argument("session_name", type=str, help="Name of the session to use for Lapse images (e.g. marten_01)")
			
 
				+    parser.add_argument("--clusters", type=int, help="Number of clusters / BOW vocabulary size", default=1024)
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    ds = Dataset(args.dataset_dir)
			
 
				+    session = ds.create_session(args.session_name)
			
 
				+    save_dir = f"./bow_train_NoBackup/{session.name}"
			
 
				+
			
 
				+    # Lapse DSIFT descriptors
			
 
				+
			
 
				+    lapse_dscs_file = os.path.join(save_dir, "lapse_dscs.npy")
			
 
				+    if os.path.isfile(lapse_dscs_file):
			
 
				+        print(f"{lapse_dscs_file} already exists, loading lapse descriptor from file...")
			
 
				+        lapse_dscs = np.load(lapse_dscs_file)
			
 
				+    else:
			
 
				+        print("Extracting lapse descriptors...")
			
 
				+        lapse_dscs = extract_descriptors(list(session.generate_lapse_images()))
			
 
				+        os.makedirs(save_dir, exist_ok=True)
			
 
				+        np.save(lapse_dscs_file, lapse_dscs)
			
 
				+
			
 
				+    # BOW dictionary
			
 
				+
			
 
				+    dictionary_file = os.path.join(save_dir, f"bow_dict_{args.clusters}.npy")
			
 
				+    if os.path.isfile(dictionary_file):
			
 
				+        print(f"{dictionary_file} already exists, loading BOW dictionary from file...")
			
 
				+        dictionary = np.load(dictionary_file)
			
 
				+    else:
			
 
				+        print(f"Creating BOW vocabulary with {args.clusters} clusters...")
			
 
				+        dictionary = generate_dictionary(lapse_dscs, args.clusters)
			
 
				+        np.save(dictionary_file, dictionary)
			
 
				+    
			
 
				+    print("Complete!")