Przeglądaj źródła

session scanning

Felix Kleinsteuber 3 lat temu
rodzic
commit
9e0c7a3c95
6 zmienionych plików z 247 dodań i 7 usunięć
  1. 3 2
      analyze_dataset.ipynb
  2. 13 4
      py/Dataset.py
  3. 12 1
      py/FileUtils.py
  4. 15 0
      py/ImageUtils.py
  5. 87 0
      py/Session.py
  6. 117 0
      scan_sessions.ipynb

+ 3 - 2
analyze_dataset.ipynb

@@ -30,7 +30,7 @@
    ],
    ],
    "source": [
    "source": [
     "%load_ext autoreload\n",
     "%load_ext autoreload\n",
-    "%autoreload 2'\n",
+    "%autoreload 2\n",
     "\n",
     "\n",
     "from tqdm.notebook import tqdm\n",
     "from tqdm.notebook import tqdm\n",
     "import os\n",
     "import os\n",
@@ -492,7 +492,8 @@
    "cell_type": "markdown",
    "cell_type": "markdown",
    "metadata": {},
    "metadata": {},
    "source": [
    "source": [
-    "## Verify that 'Full' folder is a subset of 'Motion'"
+    "## Verify that 'Full' folder is a subset of 'Motion'\n",
+    "We expect the 'Full' folder to be a subset of 'Motion'. The following code checks that by iterating over all files in 'Full' for every session and looking for them in 'Motion' of the same session."
    ]
    ]
   },
   },
   {
   {

+ 13 - 4
py/Dataset.py

@@ -1,9 +1,9 @@
 import os
 import os
 from tqdm import tqdm
 from tqdm import tqdm
 from py.DatasetStatistics import DatasetStatistics
 from py.DatasetStatistics import DatasetStatistics
-from py.FileUtils import list_folders, list_jpegs_recursive
+from py.FileUtils import list_folders, list_jpegs_recursive, expected_subfolders, verify_expected_subfolders
+from py.Session import Session
 
 
-expected_subfolders = sorted(["Motion", "Lapse", "Full"])
 
 
 class Dataset:
 class Dataset:
 
 
@@ -18,8 +18,7 @@ class Dataset:
         # Verify every session contains the subfolders Motion, Lapse, Full
         # Verify every session contains the subfolders Motion, Lapse, Full
         for folder in self.raw_sessions:
         for folder in self.raw_sessions:
             path = os.path.join(self.base_path, folder)
             path = os.path.join(self.base_path, folder)
-            subfolders = list_folders(path)
-            assert sorted(subfolders) == expected_subfolders
+            verify_expected_subfolders(path)
         print(f"Found {len(self.raw_sessions)} sessions")
         print(f"Found {len(self.raw_sessions)} sessions")
 
 
 
 
@@ -38,3 +37,13 @@ class Dataset:
                 counts[folder[33:]][subfolder] = numFiles
                 counts[folder[33:]][subfolder] = numFiles
                 counts[folder[33:]]["Total"] += numFiles
                 counts[folder[33:]]["Total"] += numFiles
         return DatasetStatistics(counts)
         return DatasetStatistics(counts)
+
+    def create_session(self, session_name: str) -> Session:
+        if session_name in self.raw_sessions:
+            return Session(os.path.join(self.base_path, session_name))
+        filtered = [s for s in self.raw_sessions if session_name.lower() in s.lower()]
+        if len(filtered) == 0:
+            raise ValueError(f"There are no sessions matching this name: {filtered}")
+        elif len(filtered) > 1:
+            raise ValueError(f"There are several sessions matching this name: {session_name}")
+        return Session(os.path.join(self.base_path, filtered[0]))

+ 12 - 1
py/FileUtils.py

@@ -1,6 +1,8 @@
 from glob import glob
 from glob import glob
 import os
 import os
 
 
+expected_subfolders = sorted(["Motion", "Lapse", "Full"])
+
 def list_folders(path: str) -> list:
 def list_folders(path: str) -> list:
     """Returns the names of all immediate child folders of path.
     """Returns the names of all immediate child folders of path.
 
 
@@ -21,4 +23,13 @@ def list_jpegs_recursive(path: str) -> list:
     Returns:
     Returns:
         list: list of all jpeg files
         list: list of all jpeg files
     """
     """
-    return [name for name in glob(os.path.join(path, "**/*.jpg"), recursive=True) if os.path.isfile(os.path.join(path, name))]
+    return [name for name in glob(os.path.join(path, "**/*.jpg"), recursive=True) if os.path.isfile(os.path.join(path, name))]
+
+def verify_expected_subfolders(session_path: str):
+    """Assert that the given session folder contains exactly the three subfolders Motion, Lapse, Full.
+
+    Args:
+        session_path (str): session folder path
+    """
+    subfolders = list_folders(session_path)
+    assert sorted(subfolders) == expected_subfolders

+ 15 - 0
py/ImageUtils.py

@@ -0,0 +1,15 @@
+from datetime import datetime
+from PIL import Image
+
+def get_image_date(img_path: str) -> datetime:
+    """Returns the date from the image EXIF data.
+
+    Args:
+        img_path (str): path to image
+
+    Returns:
+        datetime: datetime extracted from EXIF data
+    """
+    img = Image.open(img_path)
+    date_raw = img.getexif()[306]
+    return datetime.strptime(date_raw, "%Y:%m:%d %H:%M:%S")

+ 87 - 0
py/Session.py

@@ -0,0 +1,87 @@
+import pickle
+from warnings import warn
+import os
+from tqdm import tqdm
+
+from py.FileUtils import list_folders, list_jpegs_recursive, verify_expected_subfolders
+from py.ImageUtils import get_image_date
+
+class Session:
+    def __init__(self, folder: str):
+        self.folder = folder
+        # session name = folder name[33:], the first 33 characters are always the same
+        self.name = os.path.basename(folder)[33:]
+        print(f"Session '{self.name}' at folder: {self.folder}")
+        assert self.name != ""
+        verify_expected_subfolders(self.folder)
+        self.scanned = False
+        # maps lapse files to their exif dates (for statistic and prediction purposes)
+        self.lapse_dates = {}
+        # maps motion files to their exif dates (for statistic purposes)
+        self.motion_dates = {}
+        # maps exif dates to lapse files (for prediction purposes)
+        self.lapse_map = {}
+        self.load_scans()
+        if not self.scanned:
+            print("Session not scanned. Run session.scan() to create scan files")
+    
+    def load_scans(self):
+        lapse_dates_file = os.path.join("session_scans", self.name, "lapse_dates.pickle")
+        motion_dates_file = os.path.join("session_scans", self.name, "motion_dates.pickle")
+        lapse_map_file = os.path.join("session_scans", self.name, "lapse_map.pickle")
+        lapse_dates_exists = os.path.isfile(lapse_dates_file)
+        motion_dates_exists = os.path.isfile(motion_dates_file)
+        lapse_map_exists = os.path.isfile(lapse_map_file)
+        if lapse_dates_exists and motion_dates_exists and lapse_map_exists:
+            with open(lapse_dates_file, "rb") as handle:
+                self.lapse_dates = pickle.load(handle)
+            with open(motion_dates_file, "rb") as handle:
+                self.motion_dates_file = pickle.load(handle)
+            with open(lapse_map_file, "rb") as handle:
+                self.lapse_map_file = pickle.load(handle)
+            self.scanned = True
+            print("Loaded scans.")
+        else:
+            if not (not lapse_dates_exists and not motion_dates_exists and not lapse_map_exists):
+                warn(f"Warning: Only partial scan data available. Not loading.")
+            self.scanned = False
+    
+    def save_scans(self):
+        lapse_dates_file = os.path.join("session_scans", self.name, "lapse_dates.pickle")
+        motion_dates_file = os.path.join("session_scans", self.name, "motion_dates.pickle")
+        lapse_map_file = os.path.join("session_scans", self.name, "lapse_map.pickle")
+        with open(lapse_dates_file, "wb") as handle:
+            pickle.dump(self.lapse_dates, handle, protocol=pickle.HIGHEST_PROTOCOL)
+            print(f"Saved {lapse_dates_file}")
+        with open(motion_dates_file, "wb") as handle:
+            pickle.dump(self.motion_dates, handle, protocol=pickle.HIGHEST_PROTOCOL)
+            print(f"Saved {motion_dates_file}")
+        with open(lapse_map_file, "wb") as handle:
+            pickle.dump(self.lapse_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
+            print(f"Saved {lapse_map_file}")
+    
+    def scan(self, force=False, auto_save=True):
+        if self.scanned and not force:
+            raise ValueError("Session is already scanned. Use force=True to scan anyway and override scan progress.")
+        # Scan motion dates
+        print("Scanning motion dates...")
+        self.motion_dates = {}
+        for motionFile in tqdm(list_jpegs_recursive(os.path.join(self.folder, "Motion"))):
+            self.motion_dates[os.path.basename(motionFile)] = get_image_date(motionFile)
+        # Scan lapse dates
+        print("Scanning lapse dates...")
+        self.lapse_dates = {}
+        for motionFile in tqdm(list_jpegs_recursive(os.path.join(self.folder, "Motion"))):
+            self.motion_dates[os.path.basename(motionFile)] = get_image_date(motionFile)
+        # Create lapse map
+        print("Creating lapse map...")
+        self.lapse_map = {}
+        for file, date in self.lapse_dates.items():
+            if date in self.lapse_map:
+                self.lapse_map[date].append(file)
+            else:
+                self.lapse_map[date] = [file]
+        # Auto save
+        if auto_save:
+            print("Saving...")
+            self.save_scans()

+ 117 - 0
scan_sessions.ipynb

@@ -0,0 +1,117 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "from tqdm.notebook import tqdm\n",
+    "import os\n",
+    "\n",
+    "from py.Dataset import Dataset\n",
+    "from py.DatasetStatistics import DatasetStatistics\n",
+    "from py.Session import Session\n",
+    "from py.FileUtils import list_folders, list_jpegs_recursive"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 32 sessions\n"
+     ]
+    }
+   ],
+   "source": [
+    "DIR = '/home/AMMOD_data/camera_traps/BayerWald/Vielkadaver-Projekt/'\n",
+    "\n",
+    "ds = Dataset(DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Beaver_01', 'Marten_01', 'Raccoon_01', 'Reddeer_01', 'Roedeer_01', 'Wildboar_01', 'Badger_02', 'Beaver_02', 'Fox_02', 'Marten_02', 'Raccoon_02', 'Rat_02', 'Reddeer_02', 'Roedeer_02', 'Badger_03', 'Fox_03', 'Raccoon_03', 'Reddeer_03', 'Wildboar_03', 'Badger_04', 'Rat_04', 'Reddeer_04', 'Wildboar_04', 'Badger_05', 'Beaver_05', 'Ermine_05', 'Fox_05', 'Marten_05', 'Raccoon_05', 'Reddeer_05', 'Roedeer_05', 'Wildboar_05']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(ds.get_sessions())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Session 'Marten_01' at folder: /home/AMMOD_data/camera_traps/BayerWald/Vielkadaver-Projekt/VIELAAS_Spring_Session01-VIELAAS_Marten_01\n",
+      "Session not scanned. Run session.scan() to create scan files\n"
+     ]
+    }
+   ],
+   "source": [
+    "session = ds.create_session(\"marten_01\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.6.9 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}