Browse Source

notes I made when researching continuous learning

Julia Boehlke 2 years ago
parent
commit
78aea9fad0

+ 1 - 0
literature_notes/README.md

@@ -0,0 +1 @@
+# Literature for the AMMOD Project

+ 232 - 0
literature_notes/life_long_leaning.bib

@@ -0,0 +1,232 @@
+@InProceedings{lopez2017gradient,
+  author  = {Lopez-Paz, David and Ranzato, Marc'Aurelio},
+  title   = {Gradient episodic memory for continual learning},
+  year    = {2017},
+  pages   = {6467--6476},
+  volume  = {30},
+  comment = {They introduce metrics for evaluting backward and forward transfer for task incremental learning and assume task label available at inference.
+No assumptions on the number of tasks are made. 
+Use Memory buffer to constraint updates when training new tasks.
+Constraint: gradient direction of past task (estimated with memory) has positive dot product with gradient from batch.
+Disadvantage: slow optimization with constraint & TASK INCEMENTAL},
+  file    = {:life_long_learning_papers/lopez2017gradient - Gradient Episodic Memory for Continual Learning.pdf:PDF},
+  journal = {Advances in neural information processing systems},
+  url     = {https://proceedings.neurips.cc/paper/2017/file/f87522788a2be2d171666752f97ddebb-Paper.pdf},
+}
+
+@InProceedings{Prabhu2020GDumbAS,
+  author    = {Ameya Prabhu and Philip H. S. Torr and P. Dokania},
+  booktitle = {ECCV},
+  title     = {GDumb: A Simple Approach that Questions Our Progress in Continual Learning},
+  year      = {2020},
+  comment   = {\begin{itemize}
+\item GDumb = Greedy Smapler and Dumb Learner (class balanced storing, retrained from scratch)
+\item{Simplifying Assumptions in CL}
+\begin{enumerate}
+         \item Disjoint Task Formulation: at a particular duration in time data-stream will provide samples specific to one task.
+         \item Task-Incremental(multi-head): along with the disjoint task assumption, the task information (or id) is also passed by an oracle during training and inference. In Class-incremental learning no such task information is given
+         \item Online CL: restricting the learner to use each sampel only once to update parameters. In offline CL there is unrestricted access to entire dataset of current task
+\end{itemize}
+\item Online CL preferably in situations fast spitting data stream.
+\item found GDumb outperforms most methods by large margin},
+  file      = {:life_long_learning_papers/Prabhu2020GDumbAS - GDumb_ a Simple Approach That Questions Our Progress in Continual Learning.pdf:PDF},
+  url       = {https://link.springer.com/content/pdf/10.1007/978-3-030-58536-5_31.pdf},
+}
+
+@Article{Aljundi2019OnlineCL,
+  author  = {Rahaf Aljundi and Lucas Caccia and Eugene Belilovsky and Massimo Caccia and Min Lin and Laurent Charlin and T. Tuytelaars},
+  journal = {ArXiv},
+  title   = {Online Continual Learning with Maximally Interfered Retrieval},
+  year    = {2019},
+  volume  = {abs/1908.04742},
+  comment = {\item CI-CL, online, disjoint, memory based approach.
+\item Sample criterion for controlled sampling for replay retrieving samples where predictions will be most negatively impacted by forseen parameter update. Their Research question: what samples should be replayed from the previous history when new samples are
+received
+\item Main idea: Experience Replay where samples for augmenting batch from strem are selected when their loss changes most, when updating on new data (computed for subset of previous data).
+\item also applicable to generative replay approaches 
+\item in a reltivel small number of total classes/task case (MNIST SPLIT) their approach (ER+MIR) is significantly better (87,6\%) than random sampling with ER (82.1\%). In other scenarios, their approach outperforms with a smaller margin.
+\item (At least in the ER scenario, I dont really understand, why they restrict themselves to a disjoint setting)},
+  file    = {:life_long_learning_papers/Aljundi2019OnlineCL - Online Continual Learning with Maximally Interfered Retrieval.pdf:PDF},
+  url     = {https://arxiv.org/pdf/1908.04742.pdf},
+}
+
+@InProceedings{Aljundi2019GradientBS,
+  author    = {Rahaf Aljundi and Min Lin and Baptiste Goujaud and Yoshua Bengio},
+  booktitle = {NeurIPS},
+  title     = {Gradient based sample selection for online continual learning},
+  year      = {2019},
+  file      = {:life_long_learning_papers/Aljundi2019GradientBS - Gradient Based Sample Selection for Online Continual Learning.pdf:PDF},
+  url       = {https://arxiv.org/pdf/1903.08671.pdf},
+}
+
+@Article{chaudhry2019continual,
+  author   = {Chaudhry, Arslan and Rohrbach, Marcus and Elhoseiny, Mohamed and Ajanthan, Thalaiyasingam and Dokania, Puneet K and Torr, Philip HS and Ranzato, M},
+  title    = {Continual learning with tiny episodic memories},
+  year     = {2019},
+  comment  = {reservoir sampling with imbalanced data "so that the data distribution in the replaybuffer follows the data distribution that has already been seen."},
+  file     = {:life_long_learning_papers/chaudhry2019continual - Continual Learning with Tiny Episodic Memories.pdf:PDF},
+  priority = {prio1},
+  url      = {https://ora.ox.ac.uk/objects/uuid:6e7580c4-85c9-4874-a52d-e4184046935c/download_file?file_format=pdf&safe_filename=Continual+Learning+with+Tiny+Episodic+Memories.pdf&type_of_work=Conference item},
+}
+
+@InProceedings{isele2018selective,
+  author    = {Isele, David and Cosgun, Akansel},
+  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
+  title     = {Selective experience replay for lifelong learning},
+  year      = {2018},
+  number    = {1},
+  volume    = {32},
+  comment   = {'For example, reservoir sampling has been employed in [5, 8] so that the data distribution in the replaybuffer follows the data distribution that has already been seen. The problem of reservoir sampling isthat the minor modes in the distribution with small probability mass may fail to be represented in thereplay buffer. As a remedy to this problem, coverage maximization is also proposed in [8].'
+
+==8},
+  file      = {:life_long_learning_papers/isele2018selective-Selective_Experience_Replay_For_Lifelong_Learrning.pdf:PDF},
+  priority  = {prio1},
+}
+
+@Article{shin2017continual,
+  author     = {Shin, Hanul and Lee, Jung Kwon and Kim, Jaehong and Kim, Jiwon},
+  journal    = {arXiv preprint arXiv:1705.08690},
+  title      = {Continual learning with deep generative replay},
+  year       = {2017},
+  readstatus = {skimmed},
+  url        = {https://arxiv.org/pdf/1705.08690.pdf},
+}
+
+@Article{li2017learning,
+  author     = {Li, Zhizhong and Hoiem, Derek},
+  journal    = {IEEE transactions on pattern analysis and machine intelligence},
+  title      = {Learning without forgetting},
+  year       = {2017},
+  number     = {12},
+  pages      = {2935--2947},
+  volume     = {40},
+  publisher  = {IEEE},
+  readstatus = {skimmed},
+  url        = {https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8107520},
+}
+
+@InProceedings{rebuffi2017icarl,
+  author    = {Rebuffi, Sylvestre-Alvise and Kolesnikov, Alexander and Sperl, Georg and Lampert, Christoph H},
+  booktitle = {Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+  title     = {icarl: Incremental classifier and representation learning},
+  year      = {2017},
+  pages     = {2001--2010},
+  file      = {:life_long_learning_papers/rebuffi2017icarl - Icarl_ Incremental Classifier and Representation Learning.pdf:PDF},
+  url       = {https://openaccess.thecvf.com/content_cvpr_2017/papers/Rebuffi_iCaRL_Incremental_Classifier_CVPR_2017_paper.pdf},
+}
+
+@Article{kirkpatrick2017overcoming,
+  author     = {Kirkpatrick, James and Pascanu, Razvan and Rabinowitz, Neil and Veness, Joel and Desjardins, Guillaume and Rusu, Andrei A and Milan, Kieran and Quan, John and Ramalho, Tiago and Grabska-Barwinska, Agnieszka and others},
+  journal    = {Proceedings of the national academy of sciences},
+  title      = {Overcoming catastrophic forgetting in neural networks},
+  year       = {2017},
+  number     = {13},
+  pages      = {3521--3526},
+  volume     = {114},
+  comment    = {EWC paper},
+  publisher  = {National Acad Sciences},
+  readstatus = {skimmed},
+  url        = {https://www.pnas.org/content/pnas/114/13/3521.full.pdf},
+}
+
+@InProceedings{chaudhry2018riemannian,
+  author    = {Chaudhry, Arslan and Dokania, Puneet K and Ajanthan, Thalaiyasingam and Torr, Philip HS},
+  booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
+  title     = {Riemannian walk for incremental learning: Understanding forgetting and intransigence},
+  year      = {2018},
+  pages     = {532--547},
+  file      = {:life_long_learning_papers/chaudhry2018riemannian - Riemannian Walk for Incremental Learning_ Understanding Forgetting and Intransigence.pdf:PDF},
+  url       = {https://openaccess.thecvf.com/content_ECCV_2018/papers/Arslan_Chaudhry__Riemannian_Walk_ECCV_2018_paper.pdf},
+}
+
+@Article{Pelosin2021MoreIB,
+  author  = {Francesco Pelosin and A. Torsello},
+  journal = {ArXiv},
+  title   = {More Is Better: An Analysis of Instance Quantity/Quality Trade-off in Rehearsal-based Continual Learning},
+  year    = {2021},
+  volume  = {abs/2105.14106},
+  file    = {:life_long_learning_papers/Pelosin2021MoreIB - More Is Better_ an Analysis of Instance Quantity_Quality Trade off in Rehearsal Based Continual Learning.pdf:PDF},
+  url     = {https://arxiv.org/pdf/2105.14106.pdf},
+}
+
+@InProceedings{Knoblauch2020OptimalCL,
+  author     = {Jeremias Knoblauch and H. Husain and Tom Diethe},
+  booktitle  = {ICML},
+  title      = {Optimal Continual Learning has Perfect Memory and is NP-hard},
+  year       = {2020},
+  readstatus = {skimmed},
+  url        = {https://arxiv.org/pdf/2006.05188.pdf},
+}
+
+@Article{Vitter1985RandomSW,
+  author  = {J. Vitter},
+  journal = {ACM Trans. Math. Softw.},
+  title   = {Random sampling with a reservoir},
+  year    = {1985},
+  pages   = {37-57},
+  volume  = {11},
+  file    = {:Vitter1985RandomSW - Random Sampling with a Reservoir.pdf:PDF},
+  url     = {http://www.cs.umd.edu/~samir/498/vitter.pdf},
+}
+
+@Article{Lomonaco2020CVPR2C,
+  author  = {V. Lomonaco and Lorenzo Pellegrini and Pau Rodr{\'i}guez and Massimo Caccia and Qi She and Y. Chen and Quentin Jodelet and Ruiping Wang and Zheda Mai and David V{\'a}zquez and G. I. Parisi and Nikhil Churamani and M. Pickett and Issam H. Laradji and D. Maltoni},
+  journal = {ArXiv},
+  title   = {CVPR 2020 Continual Learning in Computer Vision Competition: Approaches, Results, Current Challenges and Future Directions},
+  year    = {2020},
+  volume  = {abs/2009.09929},
+  file    = {:life_long_learning_papers/Lomonaco2020CVPR2C - CVPR 2020 Continual Learning in Computer Vision Competition_ Approaches, Results, Current Challenges and Future Directions.pdf:PDF},
+  url     = {https://arxiv.org/pdf/2009.09929.pdf},
+}
+
+@Article{Pellegrini2020LatentRF,
+  author     = {Lorenzo Pellegrini and Gabrile Graffieti and V. Lomonaco and D. Maltoni},
+  journal    = {2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
+  title      = {Latent Replay for Real-Time Continual Learning},
+  year       = {2020},
+  pages      = {10203-10209},
+  file       = {:life_long_learning_papers/Pellegrini2020LatentRF - Latent Replay for Real Time Continual Learning.pdf:PDF},
+  readstatus = {skimmed},
+  url        = {https://arxiv.org/pdf/1912.01100.pdf},
+}
+
+@Article{Lomonaco2020RehearsalFreeCL,
+  author  = {V. Lomonaco and D. Maltoni and Lorenzo Pellegrini},
+  journal = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
+  title   = {Rehearsal-Free Continual Learning over Small Non-I.I.D. Batches},
+  year    = {2020},
+  pages   = {989-998},
+  file    = {:life_long_learning_papers/Lomonaco2020RehearsalFreeCL - Rehearsal Free Continual Learning Over Small Non I.I.D. Batches.pdf:PDF},
+  url     = {https://arxiv.org/pdf/1907.03799.pdf},
+}
+
+@Article{Maltoni2019ContinuousLI,
+  author  = {D. Maltoni and V. Lomonaco},
+  journal = {Neural networks : the official journal of the International Neural Network Society},
+  title   = {Continuous Learning in Single-Incremental-Task Scenarios},
+  year    = {2019},
+  pages   = {56-73},
+  volume  = {116},
+  file    = {:life_long_learning_papers/Maltoni2019ContinuousLI - Continuous Learning in Single Incremental Task Scenarios.pdf:PDF},
+  url     = {https://arxiv.org/pdf/1806.08568.pdf},
+}
+
+@InProceedings{zenke2017continual,
+  author       = {Zenke, Friedemann and Poole, Ben and Ganguli, Surya},
+  booktitle    = {International Conference on Machine Learning},
+  title        = {Continual learning through synaptic intelligence},
+  year         = {2017},
+  organization = {PMLR},
+  pages        = {3987--3995},
+  url          = {http://proceedings.mlr.press/v70/zenke17a/zenke17a.pdf},
+}
+
+@Article{Amalapuram2021OnHC,
+  author  = {Suresh Kumar Amalapuram and Thushara Tippi Reddy and Sumohana S. Channappayya and Tamma Bheemarjuna Reddy},
+  journal = {The First International Conference on AI-ML-Systems},
+  title   = {On Handling Class Imbalance in Continual Learning based Network Intrusion Detection Systems},
+  year    = {2021},
+  url     = {https://dl.acm.org/doi/pdf/10.1145/3486001.3486231},
+}
+
+@Comment{jabref-meta: databaseType:bibtex;}

+ 226 - 0
literature_notes/life_long_leaning.bib.bak

@@ -0,0 +1,226 @@
+% Encoding: UTF-8
+
+@InProceedings{lopez2017gradient,
+  author  = {Lopez-Paz, David and Ranzato, Marc'Aurelio},
+  title   = {Gradient episodic memory for continual learning},
+  year    = {2017},
+  pages   = {6467--6476},
+  volume  = {30},
+  comment = {They introduce metrics for evaluting backward and forward transfer for task incremental learning and assume task label available at inference.
+No assumptions on the number of tasks are made. 
+Use Memory buffer to constraint updates when training new tasks.
+Constraint: gradient direction of past task (estimated with memory) has positive dot product with gradient from batch.
+Disadvantage: slow optimization with constraint & TASK INCEMENTAL},
+  file    = {:life_long_learning_papers/lopez2017gradient - Gradient Episodic Memory for Continual Learning.pdf:PDF},
+  journal = {Advances in neural information processing systems},
+  url     = {https://proceedings.neurips.cc/paper/2017/file/f87522788a2be2d171666752f97ddebb-Paper.pdf},
+}
+
+@InProceedings{Prabhu2020GDumbAS,
+  author    = {Ameya Prabhu and Philip H. S. Torr and P. Dokania},
+  booktitle = {ECCV},
+  title     = {GDumb: A Simple Approach that Questions Our Progress in Continual Learning},
+  year      = {2020},
+  comment   = {\begin{itemize}
+\item GDumb = Greedy Smapler and Dumb Learner (class balanced storing, retrained from scratch)
+\item{Simplifying Assumptions in CL}
+\begin{enumerate}
+         \item Disjoint Task Formulation: at a particular duration in time data-stream will provide samples specific to one task.
+         \item Task-Incremental(multi-head): along with the disjoint task assumption, the task information (or id) is also passed by an oracle during training and inference. In Class-incremental learning no such task information is given
+         \item Online CL: restricting the learner to use each sampel only once to update parameters. In offline CL there is unrestricted access to entire dataset of current task
+\end{itemize}
+\item Online CL preferably in situations fast spitting data stream.
+\item found GDumb outperforms most methods by large margin},
+  file      = {:life_long_learning_papers/Prabhu2020GDumbAS - GDumb_ a Simple Approach That Questions Our Progress in Continual Learning.pdf:PDF},
+  url       = {https://link.springer.com/content/pdf/10.1007/978-3-030-58536-5_31.pdf},
+}
+
+@Article{Aljundi2019OnlineCL,
+  author  = {Rahaf Aljundi and Lucas Caccia and Eugene Belilovsky and Massimo Caccia and Min Lin and Laurent Charlin and T. Tuytelaars},
+  journal = {ArXiv},
+  title   = {Online Continual Learning with Maximally Interfered Retrieval},
+  year    = {2019},
+  volume  = {abs/1908.04742},
+  comment = {\item CI-CL, online, disjoint, memory based approach.
+\item Sample criterion for controlled sampling for replay retrieving samples where predictions will be most negatively impacted by forseen parameter update. Their Research question: what samples should be replayed from the previous history when new samples are
+received
+\item Main idea: Experience Replay where samples for augmenting batch from strem are selected when their loss changes most, when updating on new data (computed for subset of previous data).
+\item also applicable to generative replay approaches 
+\item in a reltivel small number of total classes/task case (MNIST SPLIT) their approach (ER+MIR) is significantly better (87,6\%) than random sampling with ER (82.1\%). In other scenarios, their approach outperforms with a smaller margin.
+\item (At least in the ER scenario, I dont really understand, why they restrict themselves to a disjoint setting)},
+  file    = {:life_long_learning_papers/Aljundi2019OnlineCL - Online Continual Learning with Maximally Interfered Retrieval.pdf:PDF},
+  url     = {https://arxiv.org/pdf/1908.04742.pdf},
+}
+
+@InProceedings{Aljundi2019GradientBS,
+  author    = {Rahaf Aljundi and Min Lin and Baptiste Goujaud and Yoshua Bengio},
+  booktitle = {NeurIPS},
+  title     = {Gradient based sample selection for online continual learning},
+  year      = {2019},
+  file      = {:life_long_learning_papers/Aljundi2019GradientBS - Gradient Based Sample Selection for Online Continual Learning.pdf:PDF},
+  url       = {https://arxiv.org/pdf/1903.08671.pdf},
+}
+
+@Article{chaudhry2019continual,
+  author   = {Chaudhry, Arslan and Rohrbach, Marcus and Elhoseiny, Mohamed and Ajanthan, Thalaiyasingam and Dokania, Puneet K and Torr, Philip HS and Ranzato, M},
+  title    = {Continual learning with tiny episodic memories},
+  year     = {2019},
+  comment  = {reservoir sampling with imbalanced data "so that the data distribution in the replaybuffer follows the data distribution that has already been seen."},
+  file     = {:life_long_learning_papers/chaudhry2019continual - Continual Learning with Tiny Episodic Memories.pdf:PDF},
+  priority = {prio1},
+  url      = {https://ora.ox.ac.uk/objects/uuid:6e7580c4-85c9-4874-a52d-e4184046935c/download_file?file_format=pdf&safe_filename=Continual+Learning+with+Tiny+Episodic+Memories.pdf&type_of_work=Conference item},
+}
+
+@InProceedings{isele2018selective,
+  author    = {Isele, David and Cosgun, Akansel},
+  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
+  title     = {Selective experience replay for lifelong learning},
+  year      = {2018},
+  number    = {1},
+  volume    = {32},
+  comment   = {'For example, reservoir sampling has been employed in [5, 8] so that the data distribution in the replaybuffer follows the data distribution that has already been seen. The problem of reservoir sampling isthat the minor modes in the distribution with small probability mass may fail to be represented in thereplay buffer. As a remedy to this problem, coverage maximization is also proposed in [8].'
+
+==8},
+  file      = {:life_long_learning_papers/isele2018selective-Selective_Experience_Replay_For_Lifelong_Learrning.pdf:PDF},
+  priority  = {prio1},
+}
+
+@Article{shin2017continual,
+  author     = {Shin, Hanul and Lee, Jung Kwon and Kim, Jaehong and Kim, Jiwon},
+  journal    = {arXiv preprint arXiv:1705.08690},
+  title      = {Continual learning with deep generative replay},
+  year       = {2017},
+  readstatus = {skimmed},
+  url        = {https://arxiv.org/pdf/1705.08690.pdf},
+}
+
+@Article{li2017learning,
+  author     = {Li, Zhizhong and Hoiem, Derek},
+  journal    = {IEEE transactions on pattern analysis and machine intelligence},
+  title      = {Learning without forgetting},
+  year       = {2017},
+  number     = {12},
+  pages      = {2935--2947},
+  volume     = {40},
+  publisher  = {IEEE},
+  readstatus = {skimmed},
+  url        = {https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8107520},
+}
+
+@InProceedings{rebuffi2017icarl,
+  author    = {Rebuffi, Sylvestre-Alvise and Kolesnikov, Alexander and Sperl, Georg and Lampert, Christoph H},
+  booktitle = {Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+  title     = {icarl: Incremental classifier and representation learning},
+  year      = {2017},
+  pages     = {2001--2010},
+  file      = {:life_long_learning_papers/rebuffi2017icarl - Icarl_ Incremental Classifier and Representation Learning.pdf:PDF},
+  url       = {https://openaccess.thecvf.com/content_cvpr_2017/papers/Rebuffi_iCaRL_Incremental_Classifier_CVPR_2017_paper.pdf},
+}
+
+@Article{kirkpatrick2017overcoming,
+  author     = {Kirkpatrick, James and Pascanu, Razvan and Rabinowitz, Neil and Veness, Joel and Desjardins, Guillaume and Rusu, Andrei A and Milan, Kieran and Quan, John and Ramalho, Tiago and Grabska-Barwinska, Agnieszka and others},
+  journal    = {Proceedings of the national academy of sciences},
+  title      = {Overcoming catastrophic forgetting in neural networks},
+  year       = {2017},
+  number     = {13},
+  pages      = {3521--3526},
+  volume     = {114},
+  comment    = {EWC paper},
+  publisher  = {National Acad Sciences},
+  readstatus = {skimmed},
+  url        = {https://www.pnas.org/content/pnas/114/13/3521.full.pdf},
+}
+
+@InProceedings{chaudhry2018riemannian,
+  author    = {Chaudhry, Arslan and Dokania, Puneet K and Ajanthan, Thalaiyasingam and Torr, Philip HS},
+  booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
+  title     = {Riemannian walk for incremental learning: Understanding forgetting and intransigence},
+  year      = {2018},
+  pages     = {532--547},
+  file      = {:life_long_learning_papers/chaudhry2018riemannian - Riemannian Walk for Incremental Learning_ Understanding Forgetting and Intransigence.pdf:PDF},
+  url       = {https://openaccess.thecvf.com/content_ECCV_2018/papers/Arslan_Chaudhry__Riemannian_Walk_ECCV_2018_paper.pdf},
+}
+
+@Article{Pelosin2021MoreIB,
+  author  = {Francesco Pelosin and A. Torsello},
+  journal = {ArXiv},
+  title   = {More Is Better: An Analysis of Instance Quantity/Quality Trade-off in Rehearsal-based Continual Learning},
+  year    = {2021},
+  volume  = {abs/2105.14106},
+  file    = {:life_long_learning_papers/Pelosin2021MoreIB - More Is Better_ an Analysis of Instance Quantity_Quality Trade off in Rehearsal Based Continual Learning.pdf:PDF},
+  url     = {https://arxiv.org/pdf/2105.14106.pdf},
+}
+
+@InProceedings{Knoblauch2020OptimalCL,
+  author     = {Jeremias Knoblauch and H. Husain and Tom Diethe},
+  booktitle  = {ICML},
+  title      = {Optimal Continual Learning has Perfect Memory and is NP-hard},
+  year       = {2020},
+  readstatus = {skimmed},
+  url        = {https://arxiv.org/pdf/2006.05188.pdf},
+}
+
+@Article{Vitter1985RandomSW,
+  author  = {J. Vitter},
+  journal = {ACM Trans. Math. Softw.},
+  title   = {Random sampling with a reservoir},
+  year    = {1985},
+  pages   = {37-57},
+  volume  = {11},
+  file    = {:Vitter1985RandomSW - Random Sampling with a Reservoir.pdf:PDF},
+  url     = {http://www.cs.umd.edu/~samir/498/vitter.pdf},
+}
+
+@Article{Lomonaco2020CVPR2C,
+  author  = {V. Lomonaco and Lorenzo Pellegrini and Pau Rodr{\'i}guez and Massimo Caccia and Qi She and Y. Chen and Quentin Jodelet and Ruiping Wang and Zheda Mai and David V{\'a}zquez and G. I. Parisi and Nikhil Churamani and M. Pickett and Issam H. Laradji and D. Maltoni},
+  journal = {ArXiv},
+  title   = {CVPR 2020 Continual Learning in Computer Vision Competition: Approaches, Results, Current Challenges and Future Directions},
+  year    = {2020},
+  volume  = {abs/2009.09929},
+  file    = {:life_long_learning_papers/Lomonaco2020CVPR2C - CVPR 2020 Continual Learning in Computer Vision Competition_ Approaches, Results, Current Challenges and Future Directions.pdf:PDF},
+  url     = {https://arxiv.org/pdf/2009.09929.pdf},
+}
+
+@Article{Pellegrini2020LatentRF,
+  author     = {Lorenzo Pellegrini and Gabrile Graffieti and V. Lomonaco and D. Maltoni},
+  journal    = {2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
+  title      = {Latent Replay for Real-Time Continual Learning},
+  year       = {2020},
+  pages      = {10203-10209},
+  file       = {:life_long_learning_papers/Pellegrini2020LatentRF - Latent Replay for Real Time Continual Learning.pdf:PDF},
+  readstatus = {skimmed},
+  url        = {https://arxiv.org/pdf/1912.01100.pdf},
+}
+
+@Article{Lomonaco2020RehearsalFreeCL,
+  author  = {V. Lomonaco and D. Maltoni and Lorenzo Pellegrini},
+  journal = {2020 IEEE/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
+  title   = {Rehearsal-Free Continual Learning over Small Non-I.I.D. Batches},
+  year    = {2020},
+  pages   = {989-998},
+  file    = {:life_long_learning_papers/Lomonaco2020RehearsalFreeCL - Rehearsal Free Continual Learning Over Small Non I.I.D. Batches.pdf:PDF},
+  url     = {https://arxiv.org/pdf/1907.03799.pdf},
+}
+
+@Article{Maltoni2019ContinuousLI,
+  author  = {D. Maltoni and V. Lomonaco},
+  journal = {Neural networks : the official journal of the International Neural Network Society},
+  title   = {Continuous Learning in Single-Incremental-Task Scenarios},
+  year    = {2019},
+  pages   = {56-73},
+  volume  = {116},
+  file    = {:life_long_learning_papers/Maltoni2019ContinuousLI - Continuous Learning in Single Incremental Task Scenarios.pdf:PDF},
+  url     = {https://arxiv.org/pdf/1806.08568.pdf},
+}
+
+@InProceedings{zenke2017continual,
+  author       = {Zenke, Friedemann and Poole, Ben and Ganguli, Surya},
+  booktitle    = {International Conference on Machine Learning},
+  title        = {Continual learning through synaptic intelligence},
+  year         = {2017},
+  organization = {PMLR},
+  pages        = {3987--3995},
+  url          = {http://proceedings.mlr.press/v70/zenke17a/zenke17a.pdf},
+}
+
+@Comment{jabref-meta: databaseType:bibtex;}

+ 135 - 0
literature_notes/life_long_leaning.bib.sav.tmp

@@ -0,0 +1,135 @@
+@InProceedings{lopez2017gradient,
+  author  = {Lopez-Paz, David and Ranzato, Marc'Aurelio},
+  title   = {Gradient episodic memory for continual learning},
+  year    = {2017},
+  pages   = {6467--6476},
+  volume  = {30},
+  comment = {They introduce metrics for evaluting backward and forward transfer for task incremental learning and assume task label available at inference.
+No assumptions on the number of tasks are made. 
+Use Memory buffer to constraint updates when training new tasks.
+Constraint: gradient direction of past task (estimated with memory) has positive dot product with gradient from batch.
+Disadvantage: slow optimization with constraint & TASK INCEMENTAL},
+  file    = {:life_long_learning_papers/lopez2017gradient - Gradient Episodic Memory for Continual Learning.pdf:PDF},
+  journal = {Advances in neural information processing systems},
+  url     = {https://proceedings.neurips.cc/paper/2017/file/f87522788a2be2d171666752f97ddebb-Paper.pdf},
+}
+
+@InProceedings{Prabhu2020GDumbAS,
+  author    = {Ameya Prabhu and Philip H. S. Torr and P. Dokania},
+  booktitle = {ECCV},
+  title     = {GDumb: A Simple Approach that Questions Our Progress in Continual Learning},
+  year      = {2020},
+  comment   = {\begin{itemize}
+\item GDumb = Greedy Smapler and Dumb Learner (class balanced storing, retrained from scratch)
+\item{Simplifying Assumptions in CL}
+\begin{enumerate}
+         \item Disjoint Task Formulation: at a particular duration in time data-stream will provide samples specific to one task.
+         \item Task-Incremental(multi-head): along with the disjoint task assumption, the task information (or id) is also passed by an oracle during training and inference. In Class-incremental learning no such task information is given
+         \item Online CL: restricting the learner to use each sampel only once to update parameters. In offline CL there is unrestricted access to entire dataset of current task
+\end{itemize}
+\item Online CL preferably in situations fast spitting data stream.
+\item found GDumb outperforms most methods by large margin},
+  file      = {:life_long_learning_papers/Prabhu2020GDumbAS - GDumb_ a Simple Approach That Questions Our Progress in Continual Learning.pdf:PDF},
+  url       = {https://link.springer.com/content/pdf/10.1007/978-3-030-58536-5_31.pdf},
+}
+
+@Article{Aljundi2019OnlineCL,
+  author  = {Rahaf Aljundi and Lucas Caccia and Eugene Belilovsky and Massimo Caccia and Min Lin and Laurent Charlin and T. Tuytelaars},
+  journal = {ArXiv},
+  title   = {Online Continual Learning with Maximally Interfered Retrieval},
+  year    = {2019},
+  volume  = {abs/1908.04742},
+  comment = {\item CI-CL, online, disjoint, memory based approach.
+\item Sample criterion for controlled sampling for replay retrieving samples where predictions will be most negatively impacted by forseen parameter update. Their Research question: what samples should be replayed from the previous history when new samples are
+received
+\item Main idea: Experience Replay where samples for augmenting batch from strem are selected when their loss changes most, when updating on new data (computed for subset of previous data).
+\item also applicable to generative replay approaches 
+\item in a reltivel small number of total classes/task case (MNIST SPLIT) their approach (ER+MIR) is significantly better (87,6\%) than random sampling with ER (82.1\%). In other scenarios, their approach outperforms with a smaller margin.
+\item (At least in the ER scenario, I dont really understand, why they restrict themselves to a disjoint setting)},
+  file    = {:life_long_learning_papers/Aljundi2019OnlineCL - Online Continual Learning with Maximally Interfered Retrieval.pdf:PDF},
+  url     = {https://arxiv.org/pdf/1908.04742.pdf},
+}
+
+@InProceedings{Aljundi2019GradientBS,
+  author    = {Rahaf Aljundi and Min Lin and Baptiste Goujaud and Yoshua Bengio},
+  booktitle = {NeurIPS},
+  title     = {Gradient based sample selection for online continual learning},
+  year      = {2019},
+  file      = {:life_long_learning_papers/Aljundi2019GradientBS - Gradient Based Sample Selection for Online Continual Learning.pdf:PDF},
+  url       = {https://arxiv.org/pdf/1903.08671.pdf},
+}
+
+@Article{chaudhry2019continual,
+  author   = {Chaudhry, Arslan and Rohrbach, Marcus and Elhoseiny, Mohamed and Ajanthan, Thalaiyasingam and Dokania, Puneet K and Torr, Philip HS and Ranzato, M},
+  title    = {Continual learning with tiny episodic memories},
+  year     = {2019},
+  comment  = {reservoir sampling with imbalanced data "so that the data distribution in the replaybuffer follows the data distribution that has already been seen."},
+  file     = {:life_long_learning_papers/chaudhry2019continual - Continual Learning with Tiny Episodic Memories.pdf:PDF},
+  priority = {prio1},
+  url      = {https://ora.ox.ac.uk/objects/uuid:6e7580c4-85c9-4874-a52d-e4184046935c/download_file?file_format=pdf&safe_filename=Continual+Learning+with+Tiny+Episodic+Memories.pdf&type_of_work=Conference item},
+}
+
+@InProceedings{isele2018selective,
+  author    = {Isele, David and Cosgun, Akansel},
+  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
+  title     = {Selective experience replay for lifelong learning},
+  year      = {2018},
+  number    = {1},
+  volume    = {32},
+  comment   = {'For example, reservoir sampling has been employed in [5, 8] so that the data distribution in the replaybuffer follows the data distribution that has already been seen. The problem of reservoir sampling isthat the minor modes in the distribution with small probability mass may fail to be represented in thereplay buffer. As a remedy to this problem, coverage maximization is also proposed in [8].'
+
+==8},
+  file      = {:life_long_learning_papers/isele2018selective-Selective_Experience_Replay_For_Lifelong_Learrning.pdf:PDF},
+  priority  = {prio1},
+}
+
+@Article{shin2017continual,
+  author     = {Shin, Hanul and Lee, Jung Kwon and Kim, Jaehong and Kim, Jiwon},
+  journal    = {arXiv preprint arXiv:1705.08690},
+  title      = {Continual learning with deep generative replay},
+  year       = {2017},
+  readstatus = {skimmed},
+  url        = {https://arxiv.org/pdf/1705.08690.pdf},
+}
+
+@Article{li2017learning,
+  author     = {Li, Zhizhong and Hoiem, Derek},
+  journal    = {IEEE transactions on pattern analysis and machine intelligence},
+  title      = {Learning without forgetting},
+  year       = {2017},
+  number     = {12},
+  pages      = {2935--2947},
+  volume     = {40},
+  publisher  = {IEEE},
+  readstatus = {skimmed},
+  url        = {https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8107520},
+}
+
+@InProceedings{rebuffi2017icarl,
+  author    = {Rebuffi, Sylvestre-Alvise and Kolesnikov, Alexander and Sperl, Georg and Lampert, Christoph H},
+  booktitle = {Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+  title     = {icarl: Incremental classifier and representation learning},
+  year      = {2017},
+  pages     = {2001--2010},
+  file      = {:life_long_learning_papers/rebuffi2017icarl - Icarl_ Incremental Classifier and Representation Learning.pdf:PDF},
+  url       = {https://openaccess.thecvf.com/content_cvpr_2017/papers/Rebuffi_iCaRL_Incremental_Classifier_CVPR_2017_paper.pdf},
+}
+
+@Article{kirkpatrick2017overcoming,
+  author     = {Kirkpatrick, James and Pascanu, Razvan and Rabinowitz, Neil and Veness, Joel and Desjardins, Guillaume and Rusu, Andrei A and Milan, Kieran and Quan, John and Ramalho, Tiago and Grabska-Barwinska, Agnieszka and others},
+  journal    = {Proceedings of the national academy of sciences},
+  title      = {Overcoming catastrophic forgetting in neural networks},
+  year       = {2017},
+  number     = {13},
+  pages      = {3521--3526},
+  volume     = {114},
+  comment    = {EWC paper},
+  publisher  = {National Acad Sciences},
+  readstatus = {skimmed},
+  url        = {https://www.pnas.org/content/pnas/114/13/3521.full.pdf},
+}
+
+@InProceedings{chaudhry2018riemannian,
+  author    = {Chaudhry, Arslan and Dokania, Puneet K and Ajanthan, Thalaiyasingam and Torr, Philip HS},
+  booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
+  title     = {Riemannian walk for incremental lea

+ 58 - 0
literature_notes/life_long_learning.aux

@@ -0,0 +1,58 @@
+\relax 
+\citation{Prabhu2020GDumbAS}
+\@writefile{toc}{\contentsline {section}{\numberline {1}Requirements of CL Literature}{1}}
+\@writefile{toc}{\contentsline {section}{\numberline {2}Survey Papers / Meta Papers}{1}}
+\@writefile{toc}{\contentsline {paragraph}{GDumb: A Simple Approachthat Questions Our Progressin Continual Learning}{1}}
+\citation{Lomonaco2020CVPR2C}
+\citation{Pellegrini2020LatentRF}
+\citation{shin2017continual}
+\@writefile{toc}{\contentsline {paragraph}{CVPR 2020 Continual Learning in Computer Vision Competition: Approaches, Results, Current Challenges and Future Directions}{2}}
+\citation{Aljundi2019OnlineCL}
+\citation{Aljundi2019GradientBS}
+\@writefile{toc}{\contentsline {section}{\numberline {3}Papers}{3}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Rehersal Based Mehthods}{3}}
+\@writefile{toc}{\contentsline {paragraph}{Online Continual Learning with Maximally Interfered Retrieval}{3}}
+\@writefile{toc}{\contentsline {paragraph}{Gradient based sample selection for online continuallearning}{3}}
+\citation{Pelosin2021MoreIB}
+\citation{Knoblauch2020OptimalCL}
+\@writefile{toc}{\contentsline {paragraph}{Random Sampling with a Reservoir }{4}}
+\@writefile{toc}{\contentsline {paragraph}{More Is Better: An Analysis of InstanceQuantity/Quality Trade-off in Rehearsal-basedContinual Learning}{4}}
+\citation{Pellegrini2020LatentRF}
+\citation{Maltoni2019ContinuousLI}
+\citation{li2017learning}
+\citation{rebuffi2017icarl}
+\@writefile{toc}{\contentsline {paragraph}{Latent Replay for Real-Time Continual Learning}{5}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Kowledge Distillation}{5}}
+\citation{kirkpatrick2017overcoming}
+\@writefile{toc}{\contentsline {paragraph}{iCaRL: Incremental Classifier and Representation Learning}{6}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Regularization Approaches}{6}}
+\citation{chaudhry2018riemannian}
+\citation{lopez2017gradient}
+\@writefile{toc}{\contentsline {paragraph}{Riemannian Walk for Incremental Learning:Understanding Forgetting and Intransigence}{7}}
+\citation{Maltoni2019ContinuousLI}
+\citation{zenke2017continual}
+\citation{Lomonaco2020RehearsalFreeCL}
+\citation{Pellegrini2020LatentRF}
+\citation{Lomonaco2020RehearsalFreeCL}
+\@writefile{toc}{\contentsline {paragraph}{Gradient Episodic Memory for Continual Learning}{8}}
+\@writefile{toc}{\contentsline {paragraph}{Continuous Learning in Single-Incremental-Task Scenarios}{8}}
+\bibstyle{abbrv}
+\bibdata{life_long_leaning}
+\bibcite{Aljundi2019OnlineCL}{1}
+\bibcite{Aljundi2019GradientBS}{2}
+\bibcite{chaudhry2018riemannian}{3}
+\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Parameter Isolation}{9}}
+\@writefile{toc}{\contentsline {paragraph}{Conditional Channel Gated Networks for Task-Aware Continual Learning}{9}}
+\bibcite{kirkpatrick2017overcoming}{4}
+\bibcite{Knoblauch2020OptimalCL}{5}
+\bibcite{li2017learning}{6}
+\bibcite{Lomonaco2020RehearsalFreeCL}{7}
+\bibcite{Lomonaco2020CVPR2C}{8}
+\bibcite{lopez2017gradient}{9}
+\bibcite{Maltoni2019ContinuousLI}{10}
+\bibcite{Pellegrini2020LatentRF}{11}
+\bibcite{Pelosin2021MoreIB}{12}
+\bibcite{Prabhu2020GDumbAS}{13}
+\bibcite{rebuffi2017icarl}{14}
+\bibcite{shin2017continual}{15}
+\bibcite{zenke2017continual}{16}

+ 99 - 0
literature_notes/life_long_learning.bbl

@@ -0,0 +1,99 @@
+\begin{thebibliography}{10}
+
+\bibitem{Aljundi2019OnlineCL}
+R.~Aljundi, L.~Caccia, E.~Belilovsky, M.~Caccia, M.~Lin, L.~Charlin, and
+  T.~Tuytelaars.
+\newblock Online continual learning with maximally interfered retrieval.
+\newblock {\em ArXiv}, abs/1908.04742, 2019.
+
+\bibitem{Aljundi2019GradientBS}
+R.~Aljundi, M.~Lin, B.~Goujaud, and Y.~Bengio.
+\newblock Gradient based sample selection for online continual learning.
+\newblock In {\em NeurIPS}, 2019.
+
+\bibitem{chaudhry2018riemannian}
+A.~Chaudhry, P.~K. Dokania, T.~Ajanthan, and P.~H. Torr.
+\newblock Riemannian walk for incremental learning: Understanding forgetting
+  and intransigence.
+\newblock In {\em Proceedings of the European Conference on Computer Vision
+  (ECCV)}, pages 532--547, 2018.
+
+\bibitem{kirkpatrick2017overcoming}
+J.~Kirkpatrick, R.~Pascanu, N.~Rabinowitz, J.~Veness, G.~Desjardins, A.~A.
+  Rusu, K.~Milan, J.~Quan, T.~Ramalho, A.~Grabska-Barwinska, et~al.
+\newblock Overcoming catastrophic forgetting in neural networks.
+\newblock {\em Proceedings of the national academy of sciences},
+  114(13):3521--3526, 2017.
+
+\bibitem{Knoblauch2020OptimalCL}
+J.~Knoblauch, H.~Husain, and T.~Diethe.
+\newblock Optimal continual learning has perfect memory and is np-hard.
+\newblock In {\em ICML}, 2020.
+
+\bibitem{li2017learning}
+Z.~Li and D.~Hoiem.
+\newblock Learning without forgetting.
+\newblock {\em IEEE transactions on pattern analysis and machine intelligence},
+  40(12):2935--2947, 2017.
+
+\bibitem{Lomonaco2020RehearsalFreeCL}
+V.~Lomonaco, D.~Maltoni, and L.~Pellegrini.
+\newblock Rehearsal-free continual learning over small non-i.i.d. batches.
+\newblock {\em 2020 IEEE/CVF Conference on Computer Vision and Pattern
+  Recognition Workshops (CVPRW)}, pages 989--998, 2020.
+
+\bibitem{Lomonaco2020CVPR2C}
+V.~Lomonaco, L.~Pellegrini, P.~Rodr{\'i}guez, M.~Caccia, Q.~She, Y.~Chen,
+  Q.~Jodelet, R.~Wang, Z.~Mai, D.~V{\'a}zquez, G.~I. Parisi, N.~Churamani,
+  M.~Pickett, I.~H. Laradji, and D.~Maltoni.
+\newblock Cvpr 2020 continual learning in computer vision competition:
+  Approaches, results, current challenges and future directions.
+\newblock {\em ArXiv}, abs/2009.09929, 2020.
+
+\bibitem{lopez2017gradient}
+D.~Lopez-Paz and M.~Ranzato.
+\newblock Gradient episodic memory for continual learning.
+\newblock volume~30, pages 6467--6476, 2017.
+
+\bibitem{Maltoni2019ContinuousLI}
+D.~Maltoni and V.~Lomonaco.
+\newblock Continuous learning in single-incremental-task scenarios.
+\newblock {\em Neural networks : the official journal of the International
+  Neural Network Society}, 116:56--73, 2019.
+
+\bibitem{Pellegrini2020LatentRF}
+L.~Pellegrini, G.~Graffieti, V.~Lomonaco, and D.~Maltoni.
+\newblock Latent replay for real-time continual learning.
+\newblock {\em 2020 IEEE/RSJ International Conference on Intelligent Robots and
+  Systems (IROS)}, pages 10203--10209, 2020.
+
+\bibitem{Pelosin2021MoreIB}
+F.~Pelosin and A.~Torsello.
+\newblock More is better: An analysis of instance quantity/quality trade-off in
+  rehearsal-based continual learning.
+\newblock {\em ArXiv}, abs/2105.14106, 2021.
+
+\bibitem{Prabhu2020GDumbAS}
+A.~Prabhu, P.~H.~S. Torr, and P.~Dokania.
+\newblock Gdumb: A simple approach that questions our progress in continual
+  learning.
+\newblock In {\em ECCV}, 2020.
+
+\bibitem{rebuffi2017icarl}
+S.-A. Rebuffi, A.~Kolesnikov, G.~Sperl, and C.~H. Lampert.
+\newblock icarl: Incremental classifier and representation learning.
+\newblock In {\em Proceedings of the IEEE conference on Computer Vision and
+  Pattern Recognition}, pages 2001--2010, 2017.
+
+\bibitem{shin2017continual}
+H.~Shin, J.~K. Lee, J.~Kim, and J.~Kim.
+\newblock Continual learning with deep generative replay.
+\newblock {\em arXiv preprint arXiv:1705.08690}, 2017.
+
+\bibitem{zenke2017continual}
+F.~Zenke, B.~Poole, and S.~Ganguli.
+\newblock Continual learning through synaptic intelligence.
+\newblock In {\em International Conference on Machine Learning}, pages
+  3987--3995. PMLR, 2017.
+
+\end{thebibliography}

+ 48 - 0
literature_notes/life_long_learning.blg

@@ -0,0 +1,48 @@
+This is BibTeX, Version 0.99d (TeX Live 2017/Debian)
+Capacity: max_strings=100000, hash_size=100000, hash_prime=85009
+The top-level auxiliary file: life_long_learning.aux
+The style file: abbrv.bst
+Database file #1: life_long_leaning.bib
+Warning--empty booktitle in lopez2017gradient
+You've used 16 entries,
+            2118 wiz_defined-function locations,
+            594 strings with 7380 characters,
+and the built_in function-call counts, 6919 in all, are:
+= -- 656
+> -- 413
+< -- 7
++ -- 162
+- -- 146
+* -- 496
+:= -- 1123
+add.period$ -- 49
+call.type$ -- 16
+change.case$ -- 120
+chr.to.int$ -- 0
+cite$ -- 17
+duplicate$ -- 256
+empty$ -- 476
+format.name$ -- 146
+if$ -- 1457
+int.to.chr$ -- 0
+int.to.str$ -- 16
+missing$ -- 16
+newline$ -- 83
+num.names$ -- 32
+pop$ -- 132
+preamble$ -- 1
+purify$ -- 104
+quote$ -- 0
+skip$ -- 209
+stack$ -- 0
+substring$ -- 381
+swap$ -- 87
+text.length$ -- 7
+text.prefix$ -- 0
+top$ -- 0
+type$ -- 64
+warning$ -- 1
+while$ -- 53
+width$ -- 18
+write$ -- 175
+(There was 1 warning)

+ 39 - 0
literature_notes/life_long_learning.fdb_latexmk

@@ -0,0 +1,39 @@
+# Fdb version 3
+["bibtex life_long_learning"] 1630059599 "life_long_learning.aux" "life_long_learning.bbl" "life_long_learning" 1630059600
+  "/usr/share/texlive/texmf-dist/bibtex/bst/base/abbrv.bst" 1480098433 20329 b5fed53e10044d0f8112183785c759b1 ""
+  "life_long_leaning.bib" 1630059495 12527 71ed040ed3df53735116835ee12da179 ""
+  "life_long_learning.aux" 1630059600 3247 ec0780a394644896ad3b3476cedbcefe ""
+  "life_long_learning.bcf" 0 -1 0 ""
+  (generated)
+  "life_long_learning.bbl"
+["pdflatex"] 1630059599 "life_long_learning.tex" "/users/boehlke/AMMOD/Research/literature/life_long_learning.pdf" "life_long_learning" 1630059600
+  "/etc/texmf/web2c/texmf.cnf" 1612436002 475 c0e671620eb5563b2130f56340a5fde8 ""
+  "/users/boehlke/AMMOD/Research/literature/life_long_learning.aux" 1630059600 3247 ec0780a394644896ad3b3476cedbcefe ""
+  "/usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map" 1511824771 3332 103109f5612ad95229751940c61aada0 ""
+  "/usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmbx12.tfm" 1480098701 1324 c910af8c371558dc20f2d7822f66fe64 ""
+  "/usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmmi12.tfm" 1480098701 1524 4414a8315f39513458b80dfc63bff03a ""
+  "/usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmr12.tfm" 1480098701 1288 655e228510b4c2a1abe905c368440826 ""
+  "/usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmr17.tfm" 1480098701 1292 296a67155bdbfc32aa9c636f21e91433 ""
+  "/usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmsy10.tfm" 1480098701 1124 6c73e740cf17375f03eec0ee63599741 ""
+  "/usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmti12.tfm" 1480098701 1484 ed72f8f5cf654cda15ecc8e32bfcbee5 ""
+  "/usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb" 1480098733 32080 340ef9bf63678554ee606688e7b5339d ""
+  "/usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmr12.pfb" 1480098733 32722 d7379af29a190c3f453aba36302ff5a9 ""
+  "/usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmr17.pfb" 1480098733 32362 179c33bbf43f19adbb3825bb4e36e57a ""
+  "/usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb" 1480098733 32569 5e5ddc8df908dea60932f3c484a54c0d ""
+  "/usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti12.pfb" 1480098733 36118 fad905eba93cff5bce1e185fe980a177 ""
+  "/usr/share/texlive/texmf-dist/tex/latex/base/article.cls" 1480098821 19821 310da678527a7dfe2a02c88af38079b7 ""
+  "/usr/share/texlive/texmf-dist/tex/latex/base/omscmr.fd" 1480098821 2256 80ce1168fb4ce6a85583a9cf8972c013 ""
+  "/usr/share/texlive/texmf-dist/tex/latex/base/size12.clo" 1480098821 8303 1d67e16498f00f63da792fab169302fc ""
+  "/usr/share/texlive/texmf-dist/web2c/texmf.cnf" 1520210507 32485 c64754543d8ac501bea6e75e209ea521 ""
+  "/usr/share/texmf/web2c/texmf.cnf" 1520210507 32485 c64754543d8ac501bea6e75e209ea521 ""
+  "/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map" 1612436036 2700761 ac0584cc9514ab21918550a6948c4ee2 ""
+  "/var/lib/texmf/web2c/pdftex/pdflatex.fmt" 1612436045 4126197 7f8723027c3ae142c3b6d14052bd603c ""
+  "life_long_learning.aux" 1630059600 3247 ec0780a394644896ad3b3476cedbcefe ""
+  "life_long_learning.bbl" 1630059599 3955 25086b4989fd9e175a3b5db955c32866 "bibtex life_long_learning"
+  "life_long_learning.tex" 1630059495 17367 35e5b75b7c4e7d1c31bf34f9c1db65ab ""
+  (generated)
+  "life_long_learning.aux"
+  "life_long_learning.pdf"
+  "life_long_learning.log"
+  "/users/boehlke/AMMOD/Research/literature/life_long_learning.log"
+  "/users/boehlke/AMMOD/Research/literature/life_long_learning.pdf"

+ 38 - 0
literature_notes/life_long_learning.fls

@@ -0,0 +1,38 @@
+PWD /users/boehlke/AMMOD/Research/literature
+INPUT /etc/texmf/web2c/texmf.cnf
+INPUT /usr/share/texmf/web2c/texmf.cnf
+INPUT /usr/share/texlive/texmf-dist/web2c/texmf.cnf
+INPUT /var/lib/texmf/web2c/pdftex/pdflatex.fmt
+INPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.tex
+OUTPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.log
+INPUT /usr/share/texlive/texmf-dist/tex/latex/base/article.cls
+INPUT /usr/share/texlive/texmf-dist/tex/latex/base/article.cls
+INPUT /usr/share/texlive/texmf-dist/tex/latex/base/size12.clo
+INPUT /usr/share/texlive/texmf-dist/tex/latex/base/size12.clo
+INPUT /usr/share/texlive/texmf-dist/fonts/map/fontname/texfonts.map
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmr12.tfm
+INPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.aux
+INPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.aux
+OUTPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.aux
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmr17.tfm
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmr12.tfm
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmmi12.tfm
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmsy10.tfm
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmr17.tfm
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmbx12.tfm
+INPUT /usr/share/texlive/texmf-dist/tex/latex/base/omscmr.fd
+INPUT /usr/share/texlive/texmf-dist/tex/latex/base/omscmr.fd
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmsy10.tfm
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmbx12.tfm
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmti12.tfm
+OUTPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.pdf
+INPUT /var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map
+INPUT /usr/share/texlive/texmf-dist/fonts/tfm/public/cm/cmbx12.tfm
+INPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.bbl
+INPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.bbl
+INPUT /users/boehlke/AMMOD/Research/literature/life_long_learning.aux
+INPUT /usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmbx12.pfb
+INPUT /usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmr12.pfb
+INPUT /usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmr17.pfb
+INPUT /usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb
+INPUT /usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti12.pfb

BIN
literature_notes/life_long_learning.pdf


BIN
literature_notes/life_long_learning.synctex.gz


+ 198 - 0
literature_notes/life_long_learning.tex

@@ -0,0 +1,198 @@
+\title{Literature Notes: Continual Learning}
+\author{Julia Boehlke}
+\date{\today}
+
+\documentclass[12pt]{article}
+
+\begin{document}
+\maketitle
+
+
+\section{Requirements of CL Literature}
+\begin{itemize}
+\itemsep0em 
+  \item Avoid forgetting (*)
+  \item Fixed memory and compute
+  \item Enable forward transfer
+  \item Enable backward transfer (*)
+  \item Do not store examples
+\end{itemize}
+
+
+\section{Survey Papers / Meta Papers}
+\paragraph{GDumb: A Simple Approachthat Questions Our Progressin Continual Learning} \cite{Prabhu2020GDumbAS}
+\begin{itemize}
+\itemsep0em 
+\item GDumb = Greedy Smapler and Dumb Learner (class balanced fixed memory buffer, retrained from scratch using samples in buffer)
+\item{Simplifying Assumptions in CL}
+\begin{enumerate}
+         \item Disjoint Task Formulation: at a particular duration in time data-stream will provide samples specific to one task. Sometimes this assumption also entails that there is only \emph{one} specific time, where data for a specific task is streamed. This means there is no backward transfer. 
+         \item Task-Incremental(TI\_CL) : along with the disjoint task assumption, the task information (or id) is also passed during training and inference (multi-head). In Class-incremental continual learning (CI-CL) no such task information is given. 
+         \item Online CL: restricting the learner to use each sampel only once to update parameters (unless stored in buffer). In offline CL there is unrestricted access to entire current dataset for training multiple epochs. 
+\end{enumerate}
+\item Online CL preferable in situations with fast spitting data stream.
+\item Found GDumb outperforms most methods by large margin
+\item Table 1 gives a great overview/categorization of methods and assumptions
+\item non of the reviewd papers seem to match our assumptions/requirements exactly: not dijoint, class-incremental, offline. 
+\end{itemize}
+
+\paragraph{CVPR 2020 Continual Learning in Computer Vision Competition: Approaches,
+Results, Current Challenges and Future Directions} \cite{Lomonaco2020CVPR2C}
+\begin{itemize}
+\itemsep0em 
+\item CVPR Continula Learning challenge on CORe50 dataset including three different tasks: New Instances (8 batches of all classes, i.e., focus on backward transfer); Multi-Task New Classes (multi-head); New Instances and Classes (batches containing examples of single class may contain previously seen or new classes, i.e., disjoint setting focused on improving on seed classes with  single-head classification)
+\item evaluate on a weighted sum of scores on accuracy, Disk usage, RAM, time
+\item baseline include naive fine-tuning, rehersal with growing memory (20 images of each batch stored), and ARI* with latent replay \cite{Pellegrini2020LatentRF} (described below)
+\item winning team uses replay method for NIC and devided network outputs by prior probability for each class to handle class imbalance (Buda et al. 2018)
+\item top-4 solutions employ rehersal-based technique
+\item on NI challenge, UT\_LG Team  rehersal training with batch instead of mini-batch level (for every epoch one memory batch and current new batch is concatenated) and introduce review step (with lower learning rate) before testing, where only memory data is used.
+\item code available for all submissions 
+\end{itemize}
+
+
+\section{Papers}
+
+\subsection{Rehersal Based Mehthods}
+Rehersal Methods allow at least some data to be stored and used to \emph{reherse} previously learned knowledge. This is also known as Expereience Replay (ER). When no storage of data is possible, rehersal is often perforemed using generated images (\cite{shin2017continual}), where previousöy learned knowledge is stored indirectly.
+
+\paragraph{Online Continual Learning with Maximally Interfered Retrieval} (MIR) \cite{Aljundi2019OnlineCL}
+\begin{itemize}
+\itemsep0em 
+\item CI-CL, online, disjoint, rehersal based approach.
+\item Sample criterion for controlled selection (from rehersal) of samples from buffer  where predictions will be most negatively impacted by forseen parameter update. Their Research question: what samples should be replayed from the previous history when new samples are received
+\item most negatively impacted = loss changes most, when updating on new data (estimated for subset of buffer data).
+\item also applicable to generative replay approaches 
+\item in a reltivel small number of total classes/task case (MNIST SPLIT) their approach (ER+MIR) is significantly better (87,6\%) than random sampling ER (82.1\%). In other scenarios, their approach outperforms with a smaller margin.
+\item (I dont really understand, why they restrict themselves to a disjoint setting, thos should work in non-disjoint situation.)
+\end{itemize}
+
+\paragraph{Gradient based sample selection for online continuallearning} \cite{Aljundi2019GradientBS}
+\begin{itemize}
+\itemsep0em 
+\item CI-CL, online, non-disjoint expand GEM approach to situation where task boundries are not available
+\item formulate replay buffer population problem as constrained minimization of the solid angle. Use a surrogate objective, which maximizes diversity of samples using the parameter gradients of samples instead of feature representations
+\item indirectly adress the issue of class imbalance 
+\item reevaluate replay buffer once a so called \emph{recent} buffer is full
+\item also propose cheap alternative greedy sample selection for large buffers (removes overhead of gradient computation for all samples solving constrained optimization). Idea: compute score based on max. coisne simitlarity of current sample gradients with randomly selected subset of buffer gradients. When new sample arrives, compute its score and randomly select candidate for replecement (probability of normalized scores) and compare scores to decide. Replace constrained optimization when buffer is large with soft regularization equivalent to rehersal. 
+\item Experiments performed on low resolution datasets such as MNIST and CIFAR10  
+\item compared with random or clustering-based, buffer population methods and reservoir population methods, their approached show merit, especially the greedy approach using rehersal instead of constrained optimization. 
+\item code availble
+\end{itemize}
+
+\paragraph{Random Sampling with a Reservoir }
+\begin{itemize}
+\itemsep0em
+\item 1985 algorithm designed to uniformly sample from a stream of data where the total number of elements the stream will entail is unknown. 
+\item This algorithm could be used to continuously update a fixed size buffer with samples from a stream while ensuring, that at the end, when the stream is done, that every sample has a probability of 1/(totel stream) of being in the buffer. 
+\end{itemize}
+
+\paragraph{More Is Better: An Analysis of InstanceQuantity/Quality Trade-off in Rehearsal-basedContinual Learning} \cite{Pelosin2021MoreIB}
+\begin{itemize}
+\itemsep0em
+\item evaluated for class-incremental setting, CI-CL, disjoint
+\item state that rehersal based methods are  'emerging as the most effective methodology to tackle CL' and refer to \cite{Knoblauch2020OptimalCL} for theoretical justification (optimal CL would require perfect memory)
+\item investigate several dimensionality reductions (deep encoders, variational autoencoders, random projections). They compare their methods to GDumb, Greedy sampler and Dumb learner, which does not use any clever selection strategy for buffer or training approach. 
+\item evaluated on final accuracy with several datasets(MNIST, CIFAR, ImageNet, Core50). Given a fixed memory size different numbers of samples can be stored when using different parameters for reduction. (peak performance achieved when storing 8x8 pixel images to fill memory)
+\item only performed experiemtns  for disjoint setting, i.e., where datastream shows one task once during training. 
+\item code available
+\end{itemize}
+
+
+\paragraph{Latent Replay for Real-Time Continual Learning} \cite{Pellegrini2020LatentRF}
+\begin{itemize}
+\itemsep0em
+\item store representations from some intermediate layer in the network instead of images in inputspace to reduce memory requirement. To keep representations valid, they propose slowed-down learning for the layers below the latent replay layer. 
+\item `a robot should be able to incrementally improve its object recognition capabilities while being exposed to newinstances of both known and completely new classes (de-noted as NIC setting - New Instances and Classes)'
+\item this paper aims at imporving overall accuracy for the non-rehersal based methods such as AR1 and CWR \cite{Maltoni2019ContinuousLI} (described below)
+\end{itemize}
+
+\subsection{Kowledge Distillation}
+This category is based on the disitllation loss. Basically, the output of old samples of the model becomes the new desired output when new data is available for updating. Especially in a multi-task/ multi-head scenario, the logits on heads for previously seen data shoul not change much when a new head is learned. The most famous, original introduction of distillation loss in continual learning was made by \cite{li2017learning}, which does not enable any backward transfer of knowledge and required task knowledge at inference. 
+
+\paragraph{iCaRL: Incremental Classifier and Representation Learning} \cite{rebuffi2017icarl}
+\begin{itemize}
+\itemsep0em 
+\item CI-CL, offline, disjoint and assumes that samples from each task (a batch of classes) are only present at one point in time of the data stream. 
+\item assumes, there is a fixed size memory available to store examples from previous classes
+\item use nearest-mean-of-examples classifier (using representatiosn) for inference. At training time, the sample memory buffer and model parameters are updated. When samples for a new class is available, a new training batch is constructed from the new and stored data. The output of the current network for all stored images of previous classes are stored since they are needed for the distillation loss. The model is updated with the cross-entropy loss for samples from the new class while the model is encouraged to reproduce the previously stored outputs (disitllation loss) for the old samples.
+\item when new classes are introduced and weights are added to the network, some sampels in buffer are dropped to make room for samples from new class. The set of examples for each class is selected based on the current class mean of the feature vectors. 
+\item Evalutaed using CIFAR100 and ImageNet datastes showing impressive results compared to previous methods for the disjoint task formulation
+\item (Whil I think the idea of using the distillation loss for previously stored samples could be applicable in a non-disjoint task set formulation. The distillation loss is designed to preserve previosly infered knowledge in a model and allow forward transfer. In our situation backward transfer is one of the most important requirements, which the distillation loss is not designed for. I dont think it would be wise in our scenario to penelize model outputs changing for previously seen data since that might be necessary to improve the classification boundries.) 
+\end{itemize}
+
+\subsection{Regularization Approaches}
+The basic idea behind regularization based approaches is to penelize a model for changing \emph{too much} with newly seen and finding a sensible trade-off between plasticity and stability of the network over time. 
+Most influential in this category is the Elasitc Weight Consolidation Approach proposed by \cite{kirkpatrick2017overcoming}. Each parameters importance for classification of previous task is estimated using the Fisher Information (related to curvature of loss function). Updates to \emph{important} parameters are penelized proportionaly in the loss function when new tasks are learned. This approach is designed for task-incremental learning and does not allow backward transfer of knowledge.
+
+
+
+
+\paragraph{Riemannian Walk for Incremental Learning:Understanding Forgetting and Intransigence} 
+\cite{chaudhry2018riemannian}
+\begin{itemize}
+\itemsep0em 
+\item CI-CL, offline, disjoint
+\item RWalk is a generalization of EWC to CI setting. They use KL-divergence based regularization over conditional likelihood p(y|x) and a parameter importance score based on the sensitivity of the loss over the movement on the Riemannian manifold (induced by Fischer information) to mitigate catastrophic forgetting. By accumaulating parameter importance over the entire training trajectory, their approach allows class incremental learning. 
+\item define task-wise measures for Forgetting(: diff between maximum knowledge and current knowledge) and Intransigence, the inability of a netowrk to learn  new tasks (:diff btw model trained on entire data and incrementally learned model trainied up to specific task).  
+\item they show that for small number of samples their approach has much greater impact than when large datasets are availbale
+\item suggest entropy-based sampling for creating the buffer dataset of old examples. Samples where the output of the neural network has a larger cross-entropy are more likely picked.
+\item (while this approach allows for single-head classification, it still heavily relies on the disjoint dataset assumption. The basic idea is still that specific parameters have more importance for specific tasks and updating them when training new tasks should be avoided/reduced. For our application goals, this regularized loss could be used for a brief duration of the training when a new class is introduced. The first task would be defined as all previously known classes and the second task would consist of one new class only. This could be used in a strategy to focus learning of the new class while mitigating forgetting of the previous classes)
+\end{itemize}
+
+\paragraph{Gradient Episodic Memory for Continual Learning} (GEM) \cite{lopez2017gradient}
+\begin{itemize}
+\itemsep0em 
+\item TI-CL, online, rehersal (+regularization) based approach
+\item They introduce metrics for evaluting backward and forward transfer
+\item No assumptions on the number of tasks are made. 
+\item Use Memory buffer to constraint updates when training new tasks
+\item Constraint: gradient direction of past task (estimated with memory) has positive dot product with gradient from batch (of new task).
+\item Disadvantage: slow optimization with constraint and TASK INCEMENTAL
+\end{itemize}
+
+\paragraph{Continuous Learning in Single-Incremental-Task Scenarios} \cite{Maltoni2019ContinuousLI}
+\begin{itemize} 
+\itemsep0em
+\item CI-CL, disjoint, introduce CWR and AR1 for NC (new class) learning, where each batch can contains new classes, but argue this could be adapted for NIC (new instance or new class) learning.
+\item main idea: for the final layer have one set of consolidated weights used for inference and tempory weights reset to 0 for each batch used to updated the subset of weichts in the consolidated weights matrix relevant to the class seen in the current batch (CWR) 
+\item while CWR uses fixed represenations extracted from a model, AR1 allows end-to-end CL by allowing model used for extracting to be trained simultaneously using regularized loss in a controlled manner. They use Synaptic Inteligence (a variant of EWC \cite{zenke2017continual})   
+\item \cite{Lomonaco2020RehearsalFreeCL} expanded on this approach for the NIC task by updating weights for a class already seen using a weighted sum of past and current weights for the consolidation step
+\item the results of this approach was further imporved on using laten replay method \cite{Pellegrini2020LatentRF}
+\item \cite{Lomonaco2020RehearsalFreeCL} also provides a benchmark protocol for Core50 dataset on github for a NIC task
+
+\end{itemize}
+
+
+\subsection{Parameter Isolation}
+Generally, this approach to continual learning is again originaly designed for task-incremental learning. The main idea is to generate binary masks for parameters for each task indicating the their importance for specific tasks. Susequently learned tasks are learned only using the leftover parmeters in a network. This approach generally relies on task-incremental learning. 
+
+\paragraph{Conditional Channel Gated Networks for Task-Aware Continual Learning} 
+\begin{itemize}
+\itemsep0em 
+\item CI-CL, offline, disjoint (assumes stream produced samples for one task for a duration in time, but during inference, no task information is provided.)
+\item Original parameter isolation methods are not designed for class-incremental learning. This paper tries to generalize the formulation to class-incremental learning scenarios using some rehersal. 
+\item main idea: jointly predict task and class label.
+\item use gating module for each convolutional layer which decides which kernel in the layer should be applied (binary decision) based on the input feature. The gating module consists of a very shallow neural network tained with a sparsity objective such that the smallest possible number of kernels are applied. after the training of a task, the most important parameters are frozen, i.e., their gradients are zeroed out during updates for subsequent task learning.
+\item (I dont see the advantage of parameter isoltion methods for class incremental learning. This approach practically splits the network into subsets for each task.) 
+\end{itemize}
+
+
+
+\subsection{Continual Learning with Focus on Imbalanced Data}
+
+\paragraph{On Handling Class Imbalance in Continual Learning based
+Network Intrusion Detection Systems}
+\begin{itemize}
+\item Application Domain: Anomaly-based Network intrusion detection
+\item Class incremental setting
+\item Sample replay with class balancing reservoir sampling (CBRS) 
+\item compare with data augmentation strategy used for inflating small calsses in imbalanced datasets
+\item Related WOrk section contains great overview of strategies for handling class imbalance in standard, non-continual settings
+\end{itemize}
+
+\bibliographystyle{abbrv}
+
+\bibliography{life_long_leaning}
+
+\end{document}
+

+ 0 - 0
literature_notes/moths.bib