{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyze dataset" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "import os\n", "import glob\n", "import numpy as np\n", "from tqdm.notebook import tqdm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## List all folders" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def listFolders(path):\n", " return [name for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "32 folders: ['Beaver_01', 'Marten_01', 'Raccoon_01', 'Reddeer_01', 'Roedeer_01', 'Wildboar_01', 'Badger_02', 'Beaver_02', 'Fox_02', 'Marten_02', 'Raccoon_02', 'Rat_02', 'Reddeer_02', 'Roedeer_02', 'Badger_03', 'Fox_03', 'Raccoon_03', 'Reddeer_03', 'Wildboar_03', 'Badger_04', 'Rat_04', 'Reddeer_04', 'Wildboar_04', 'Badger_05', 'Beaver_05', 'Ermine_05', 'Fox_05', 'Marten_05', 'Raccoon_05', 'Reddeer_05', 'Roedeer_05', 'Wildboar_05']\n" ] } ], "source": [ "DIR = '/home/AMMOD_data/camera_traps/BayerWald/Vielkadaver-Projekt/'\n", "folders = listFolders(DIR)\n", "folders = sorted(folders)\n", "print(f\"{len(folders)} folders: {[name[33:] for name in folders]}\")\n", "# np.all([name[:14] == \"Spring_Session\" for name in folders])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check that all directories contain the subfolders \"Motion\", \"Lapse\", and \"Full\"." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "expectedSubFolders = sorted([\"Motion\", \"Lapse\", \"Full\"])\n", "for folder in folders:\n", " path = os.path.join(DIR, folder)\n", " subFolders = listFolders(path)\n", " assert sorted(subFolders) == expectedSubFolders" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Count samples by folder and subfolder" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def listFiles(path):\n", " return [name for name in os.listdir(path) if os.path.isfile(os.path.join(path, name))]\n", "\n", "def listFilesRecursive(path):\n", " path = os.path.join(path, \"**/*.jpg\")\n", " return [name for name in glob.glob(path, recursive=True) if os.path.isfile(os.path.join(path, name))]" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "86bca5e0c2114c3e99bc1a5448386e82", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/32 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "counts = {}\n", "for folder in tqdm(folders):\n", " counts[folder[33:]] = {}\n", " counts[folder[33:]][\"Total\"] = 0\n", " for subfolder in expectedSubFolders:\n", " path = os.path.join(DIR, folder, subfolder)\n", " numFiles = len(listFilesRecursive(path))\n", " counts[folder[33:]][subfolder] = numFiles\n", " counts[folder[33:]][\"Total\"] += numFiles" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [], "source": [ "counts[\"Z_Total\"] = {}\n", "for subfolder in expectedSubFolders + [\"Total\"]:\n", " counts[\"Z_Total\"][subfolder] = 0\n", " for folder in folders:\n", " counts[\"Z_Total\"][subfolder] += counts[folder[33:]][subfolder]" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [], "source": [ "np.save(\"counts.npy\", counts)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "counts = np.load(\"counts.npy\", allow_pickle=True).tolist()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame.from_dict(counts).transpose()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Lapse | \n", "Motion | \n", "Full | \n", "Total | \n", "
|---|---|---|---|---|
| Badger_02 | \n", "1728 | \n", "4715 | \n", "202 | \n", "6645 | \n", "
| Badger_03 | \n", "46 | \n", "4245 | \n", "67 | \n", "4358 | \n", "
| Badger_04 | \n", "56 | \n", "480 | \n", "192 | \n", "728 | \n", "
| Badger_05 | \n", "1174 | \n", "3860 | \n", "108 | \n", "5142 | \n", "
| Beaver_01 | \n", "1734 | \n", "695 | \n", "200 | \n", "2629 | \n", "
| Beaver_02 | \n", "1727 | \n", "2890 | \n", "270 | \n", "4887 | \n", "
| Beaver_05 | \n", "1321 | \n", "2415 | \n", "32 | \n", "3768 | \n", "
| Ermine_05 | \n", "867 | \n", "2380 | \n", "135 | \n", "3382 | \n", "
| Fox_02 | \n", "957 | \n", "1110 | \n", "200 | \n", "2267 | \n", "
| Fox_03 | \n", "38 | \n", "5495 | \n", "206 | \n", "5739 | \n", "
| Fox_05 | \n", "1083 | \n", "753 | \n", "65 | \n", "1901 | \n", "
| Marten_01 | \n", "2462 | \n", "3105 | \n", "200 | \n", "5767 | \n", "
| Marten_02 | \n", "1726 | \n", "883 | \n", "200 | \n", "2809 | \n", "
| Marten_05 | \n", "890 | \n", "16170 | \n", "70 | \n", "17130 | \n", "
| Raccoon_01 | \n", "850 | \n", "375 | \n", "120 | \n", "1345 | \n", "
| Raccoon_02 | \n", "1639 | \n", "332 | \n", "162 | \n", "2133 | \n", "
| Raccoon_03 | \n", "35 | \n", "475 | \n", "100 | \n", "610 | \n", "
| Raccoon_05 | \n", "1089 | \n", "1600 | \n", "116 | \n", "2805 | \n", "
| Rat_02 | \n", "626 | \n", "845 | \n", "118 | \n", "1589 | \n", "
| Rat_04 | \n", "39 | \n", "640 | \n", "96 | \n", "775 | \n", "
| Reddeer_01 | \n", "1628 | \n", "9380 | \n", "200 | \n", "11208 | \n", "
| Reddeer_02 | \n", "1635 | \n", "675 | \n", "200 | \n", "2510 | \n", "
| Reddeer_03 | \n", "1027 | \n", "1770 | \n", "141 | \n", "2938 | \n", "
| Reddeer_04 | \n", "46 | \n", "1690 | \n", "89 | \n", "1825 | \n", "
| Reddeer_05 | \n", "1078 | \n", "18251 | \n", "166 | \n", "19495 | \n", "
| Roedeer_01 | \n", "1380 | \n", "38820 | \n", "18 | \n", "40218 | \n", "
| Roedeer_02 | \n", "888 | \n", "2770 | \n", "206 | \n", "3864 | \n", "
| Roedeer_05 | \n", "1090 | \n", "13176 | \n", "79 | \n", "14345 | \n", "
| Wildboar_01 | \n", "1732 | \n", "2895 | \n", "137 | \n", "4764 | \n", "
| Wildboar_03 | \n", "46 | \n", "515 | \n", "102 | \n", "663 | \n", "
| Wildboar_04 | \n", "39 | \n", "2250 | \n", "107 | \n", "2396 | \n", "
| Wildboar_05 | \n", "903 | \n", "22263 | \n", "86 | \n", "23252 | \n", "
| Z_Total | \n", "31579 | \n", "167918 | \n", "4390 | \n", "203887 | \n", "