浏览代码

BOW train script

Felix Kleinsteuber 3 年之前
父节点
当前提交
6686087b7c
共有 6 个文件被更改,包括 248 次插入28 次删除
  1. 17 6
      approach3_local_features.ipynb
  2. 85 13
      approach4_autoencoder2.ipynb
  3. 69 0
      eval_autoencoder.py
  4. 2 1
      py/FileUtils.py
  5. 7 8
      resize_session.ipynb
  6. 68 0
      train_bow.py

文件差异内容过多而无法显示
+ 17 - 6
approach3_local_features.ipynb


+ 85 - 13
approach4_autoencoder2.ipynb

@@ -9,16 +9,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/kleinsteuber/anaconda3/envs/pytorch-gpu/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n",
-      "Found 1 sessions\n",
+      "Found 2 sessions\n",
       "Session 'Beaver_01' at folder: /home/kleinsteuber/vscode/ResizedSessions256_NoBackup/VIELAAS_Spring_Session01-VIELAAS_Beaver_01\n",
       "Loaded scans.\n"
      ]
@@ -110,7 +116,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -132,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -172,7 +178,7 @@
        ")"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -185,7 +191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -198,7 +204,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -220,7 +226,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -250,23 +256,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 22/22 [03:12<00:00,  8.77s/it]\n"
+      "100%|██████████| 22/22 [03:42<00:00, 10.09s/it]\n"
      ]
     }
    ],
    "source": [
+    "from sklearn.neighbors import KernelDensity\n",
+    "\n",
+    "#Losses\n",
     "criterion = nn.MSELoss()\n",
     "normal_losses = []\n",
     "anomalous_losses = []\n",
+    "\n",
+    "# KDE\n",
+    "kde = KernelDensity(kernel=\"gaussian\", bandwidth=0.2)\n",
+    "normal_encoded_imgs = []\n",
+    "anomalous_encoded_imgs = []\n",
+    "\n",
     "for features, labels in tqdm(test_dl):\n",
-    "    output_batch = model(features)\n",
+    "    # Calculate and save encoded representation\n",
+    "    encoded = model.encoder(features)\n",
+    "    encoded_flat = encoded.detach().numpy().reshape(encoded.size()[0], -1)\n",
+    "    for enc, label in zip(encoded_flat, labels):\n",
+    "        if label == 1:\n",
+    "            anomalous_encoded_imgs.append(enc)\n",
+    "        else:\n",
+    "            normal_encoded_imgs.append(enc)\n",
+    "    \n",
+    "    # decode and determine loss\n",
+    "    output_batch = model.decoder(encoded)\n",
     "    for input, output, label in zip(features, output_batch, labels):\n",
     "        loss = criterion(input, output)\n",
     "        if label == 1:\n",
@@ -275,6 +300,53 @@
     "            normal_losses.append(loss.item())"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/kleinsteuber/anaconda3/envs/pytorch-gpu/lib/python3.10/site-packages/numpy/lib/npyio.py:518: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  arr = np.asanyarray(arr)\n"
+     ]
+    }
+   ],
+   "source": [
+    "np.save(\"Cache_NoBackup/approach4_ae2_encoded.npy\", [normal_encoded_imgs, anomalous_encoded_imgs])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'algorithm': 'auto',\n",
+       " 'atol': 0,\n",
+       " 'bandwidth': 0.2,\n",
+       " 'breadth_first': True,\n",
+       " 'kernel': 'gaussian',\n",
+       " 'leaf_size': 40,\n",
+       " 'metric': 'euclidean',\n",
+       " 'metric_params': None,\n",
+       " 'rtol': 0}"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "kde = KernelDensity(kernel=\"gaussian\", bandwidth=0.2).fit(normal_encoded_imgs)\n",
+    "kde.get_params()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 31,

+ 69 - 0
eval_autoencoder.py

@@ -0,0 +1,69 @@
+import argparse
+import os
+from tqdm import tqdm
+import numpy as np
+import torch
+from torch import nn
+from torch.autograd import Variable
+from torch.utils.data import DataLoader
+from torchvision.utils import save_image
+from torchinfo import summary
+
+from py.PyTorchData import create_dataloader, model_output_to_image
+from py.Autoencoder2 import Autoencoder
+
+def eval_autoencoder(model: Autoencoder, dataloader: DataLoader, name: str, set_name: str, device: str = "cpu", criterion = nn.MSELoss()):
+    model = model.to(device)
+    print(f"Using {device} device")
+
+    print(f"Saving evaluation results to ./ae_train_NoBackup/{name}/eval")
+    os.makedirs(f"./ae_train_NoBackup/{name}/eval", exist_ok=True)
+
+    labels = []
+    encodeds = []
+    losses = []
+
+
+    for img, labels in tqdm(dataloader):
+        img_batch = Variable(img_batch).to(device)
+        # ===================forward=====================
+        encoded = model.encoder(img)
+        encoded_flat = encoded.detach().numpy().reshape(encoded.size()[0], -1)
+        output_batch = model.decoder(encoded)
+
+        for input, output, label, enc_flat in zip(img, output_batch, labels, encoded_flat):
+            losses.append(criterion(input, output))
+            encodeds.append(enc_flat)
+            labels.append(label)
+    np.save(f"./ae_train_NoBackup/{name}/eval/{set_name}.npy")
+
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Autoencoder eval script")
+    parser.add_argument("name", type=str, help="Name of the training session (name of the save folder)")
+    parser.add_argument("model_name", type=str, help="Filename of the model (e.g. model_120.pth)")
+    parser.add_argument("set_name", type=str, help="Name of the dataset (e.g. train or test)")
+    parser.add_argument("img_folder", type=str, help="Path to directory containing train images (may contain subfolders)")
+    parser.add_argument("--device", type=str, help="PyTorch device to train on (cpu or cuda)", default="cpu")
+    parser.add_argument("--batch_size", type=int, help="Batch size (>=1)", default=32)
+    parser.add_argument("--image_transforms", action="store_true", help="Truncate and resize images (only enable if the input images have not been truncated resized to the target size already)")
+    
+    args = parser.parse_args()
+
+    if args.image_transforms:
+        print("Image transforms enabled: Images will be truncated and resized.")
+    else:
+        print("Image transforms disabled: Images are expected to be of the right size.")
+    
+    dataloader = create_dataloader(args.img_folder, batch_size=args.batch_size, skip_transforms=not args.image_transforms)
+    model = Autoencoder()
+    print("Model:")
+    summary(model, (args.batch_size, 3, 256, 256))
+    print("Is CUDA available:", torch.cuda.is_available())
+    print(f"Devices: ({torch.cuda.device_count()})")
+    for i in range(torch.cuda.device_count()):
+        print(torch.cuda.get_device_name(i))
+    if args.noise:
+        print("Adding Gaussian noise to model input")
+    eval_autoencoder(model, dataloader, args.model_name, args.set_name, args.device)

+ 2 - 1
py/FileUtils.py

@@ -32,4 +32,5 @@ def verify_expected_subfolders(session_path: str):
         session_path (str): session folder path
     """
     subfolders = list_folders(session_path)
-    assert sorted(subfolders) == expected_subfolders
+    if sorted(subfolders) != sorted(expected_subfolders):
+        raise AssertionError(f"{session_path}: Expected subfolders {expected_subfolders} but found {subfolders}")

+ 7 - 8
resize_session.ipynb

@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -24,7 +24,6 @@
    ],
    "source": [
     "import os\n",
-    "import glob\n",
     "from tqdm import tqdm\n",
     "import cv2 as cv\n",
     "\n",
@@ -40,7 +39,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -63,31 +62,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 2629/2629 [11:35<00:00,  3.78it/s]\n"
+      "100%|██████████| 2629/2629 [08:16<00:00,  5.29it/s]\n"
      ]
     }
    ],
    "source": [
-    "copy_session(session, \"ResizedSessions_NoBackup\", scale=0.4)"
+    "copy_session(session, \"ResizedSessions_NoBackup\", scale=0.3, truncate_y=(40, 40))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 2629/2629 [07:13<00:00,  6.07it/s]\n"
+      "100%|██████████| 5767/5767 [17:31<00:00,  5.48it/s] \n"
      ]
     }
    ],

+ 68 - 0
train_bow.py

@@ -0,0 +1,68 @@
+import argparse
+import os
+import cv2 as cv
+import numpy as np
+from tqdm import tqdm
+
+from py.Dataset import Dataset
+from py.Session import SessionImage
+
+def dense_keypoints(img, step=30, off=(15, 12)):
+    border_dist = (step + 1) // 2
+    return [cv.KeyPoint(x, y, step) for y in range(border_dist + off[0], img.shape[0] - border_dist, step) 
+                                    for x in range(border_dist + off[1], img.shape[1] - border_dist, step)]
+
+def extract_descriptors(images: list[SessionImage]):
+    sift = cv.SIFT_create()
+    dscs = []
+    for image in tqdm(images):
+        img = image.read_opencv(gray=True)
+        kp = dense_keypoints(img)
+        kp, des = sift.compute(img, kp)
+        dscs.append(des)
+    return np.array(dscs)
+
+def generate_dictionary(dscs, dictionary_size):
+    # dictionary size = number of clusters
+    BOW = cv.BOWKMeansTrainer(dictionary_size)
+    for dsc in dscs:
+        BOW.add(dsc)
+    dictionary = BOW.cluster()
+    return dictionary
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="BOW train script")
+    parser.add_argument("dataset_dir", type=str, help="Directory of the dataset containing all session folders")
+    parser.add_argument("session_name", type=str, help="Name of the session to use for Lapse images (e.g. marten_01)")
+    parser.add_argument("--clusters", type=int, help="Number of clusters / BOW vocabulary size", default=1024)
+
+    args = parser.parse_args()
+
+    ds = Dataset(args.dataset_dir)
+    session = ds.create_session(args.session_name)
+    save_dir = f"./bow_train_NoBackup/{session.name}"
+
+    # Lapse DSIFT descriptors
+
+    lapse_dscs_file = os.path.join(save_dir, "lapse_dscs.npy")
+    if os.path.isfile(lapse_dscs_file):
+        print(f"{lapse_dscs_file} already exists, loading lapse descriptor from file...")
+        lapse_dscs = np.load(lapse_dscs_file)
+    else:
+        print("Extracting lapse descriptors...")
+        lapse_dscs = extract_descriptors(list(session.generate_lapse_images()))
+        os.makedirs(save_dir, exist_ok=True)
+        np.save(lapse_dscs_file, lapse_dscs)
+
+    # BOW dictionary
+
+    dictionary_file = os.path.join(save_dir, f"bow_dict_{args.clusters}.npy")
+    if os.path.isfile(dictionary_file):
+        print(f"{dictionary_file} already exists, loading BOW dictionary from file...")
+        dictionary = np.load(dictionary_file)
+    else:
+        print(f"Creating BOW vocabulary with {args.clusters} clusters...")
+        dictionary = generate_dictionary(lapse_dscs, args.clusters)
+        np.save(dictionary_file, dictionary)
+    
+    print("Complete!")

部分文件因为文件数量过多而无法显示