소스 검색

Added NoisyOracleInteractor

Clemens-Alexander Brust 4 년 전
부모
커밋
98f59ed7ee
4개의 변경된 파일188개의 추가작업 그리고 6개의 파일을 삭제
  1. 7 4
      chillax/experiment_selfsupervised.py
  2. 179 0
      chillax/noisy_oracle.py
  3. 1 1
      chillax/version.py
  4. 1 1
      setup.py

+ 7 - 4
chillax/experiment_selfsupervised.py

@@ -1,7 +1,7 @@
 from chia import containers, instrumentation
-from chia.components import classifiers
+from chia.components import classifiers, interactors
 from chia import helpers
-from chillax import chillax_classifier, chillax_extrapolator
+from chillax import chillax_classifier, chillax_extrapolator, noisy_oracle
 
 import config as pcfg
 import argparse
@@ -10,7 +10,7 @@ import argparse
 def main(config_files):
     # Set up buffered observer
     buffered_observer = instrumentation.ObserverFactory.create({"name": "buffered"})
-    
+
     # Set some important environment variables and validate the GPU configuration
     helpers.setup_environment([buffered_observer])
 
@@ -23,11 +23,14 @@ def main(config_files):
     classifiers.ClassifierFactory.name_to_class_mapping.update(
         {"chillax": chillax_classifier.CHILLAXKerasHC}
     )
+    interactors.InteractorFactory.name_to_class_mapping.update(
+        {"noisy_oracle": noisy_oracle.NoisyOracleInteractor}
+    )
 
     obs = instrumentation.NamedObservable("Experiment")
 
     experiment_container = containers.ExperimentContainer(config, outer_observable=obs)
-    
+
     # Replay the buffer
     buffered_observer.replay_messages(obs)
 

+ 179 - 0
chillax/noisy_oracle.py

@@ -0,0 +1,179 @@
+from typing import Optional
+
+import networkx as nx
+import numpy as np
+
+from chia import instrumentation, knowledge
+from chia.components.interactors import interactor
+
+
+class NoisyOracleInteractor(
+    interactor.Interactor, instrumentation.Observable, instrumentation.Observer
+):
+    def __init__(
+        self,
+        kb,
+        noise_model,
+        inaccuracy=0.0,
+        relabel_fraction=None,
+        lambda_=None,
+        q=None,
+        filter_imprecise=False,
+        project_to_random_leaf=False,
+    ):
+        interactor.Interactor.__init__(self, kb=kb)
+        instrumentation.Observable.__init__(self)
+        instrumentation.Observer.__init__(self)
+
+        self.noise_model = noise_model
+        self.inaccuracy = inaccuracy
+
+        if self.noise_model == "Deng2014":
+            assert relabel_fraction is not None
+            self.relabel_fraction: float = relabel_fraction
+        elif self.noise_model == "Poisson":
+            assert lambda_ is not None
+            self.lambda_: float = lambda_
+        elif self.noise_model == "Geometric":
+            assert q is not None
+            self.q: float = q
+        elif self.noise_model == "Inaccuracy":
+            pass
+        else:
+            raise ValueError(f"Unknown noise model: {self.noise_model}")
+
+        self.filter_imprecise = filter_imprecise
+        self.project_to_random_leaf = project_to_random_leaf
+
+        self.is_updated = False
+        self.graph: Optional[nx.DiGraph] = None
+        self.root = None
+        self.leaf_nodes = None
+
+        self._kb.register(self)
+
+    def _apply_deng_noise(self, uid):
+        if np.random.binomial(1, self.relabel_fraction):
+            chosen_predecessor = next(
+                self.graph.predecessors(uid)
+            )  # TODO what to do if there is more than 1 parent?
+            return chosen_predecessor
+        else:
+            return uid
+
+    def _apply_geometric_noise(self, uid):
+        target = np.random.geometric(1 - self.q) - 1
+        return self._reduce_depth_to(uid, target)
+
+    def _apply_poisson_noise(self, uid):
+        target = np.random.poisson(self.lambda_)
+        return self._reduce_depth_to(uid, target)
+
+    def _reduce_depth_to(self, uid, depth_target):
+        path_to_label = nx.shortest_path(self.graph, self.root, uid)
+        final_depth = max(0, min(len(path_to_label) - 1, depth_target))
+        return path_to_label[final_depth]
+
+    def _project_to_random_leaf(self, uid):
+        if self.graph.out_degree(uid) == 0:  # noqa
+            return uid
+        else:
+            # List all descendants
+            all_descendants = nx.descendants(self.graph, uid)
+
+            # Use only leaves
+            valid_descendants = list(
+                filter(lambda n: self.graph.out_degree(n) == 0, all_descendants)  # noqa
+            )
+
+            return np.random.choice(valid_descendants)
+
+    def _maybe_update_graphs(self):
+        if not self.is_updated:
+            try:
+                self.graph = self._kb.get_hyponymy_relation_rgraph()
+                self.root = next(nx.topological_sort(self.graph))
+                self.leaf_nodes = list(
+                    filter(
+                        lambda n: self.graph.out_degree(n) == 0, self.graph.nodes
+                    )  # noqa
+                )
+                self.is_updated = True
+            except ValueError:
+                # No graph available yet
+                pass
+
+    def query_annotations_for(self, samples, gt_resource_id, ann_resource_id):
+        self._maybe_update_graphs()
+
+        # Add noise
+        noisy_samples = [
+            sample.add_resource(
+                self.__class__.__name__,
+                ann_resource_id,
+                self.apply_noise(sample.get_resource(gt_resource_id)),
+            )
+            for sample in samples
+        ]
+
+        # Count modified samples
+        modified_samples = sum(
+            [
+                1
+                if noisy_sample.get_resource(gt_resource_id)
+                != noisy_sample.get_resource(ann_resource_id)
+                else 0
+                for noisy_sample in noisy_samples
+            ]
+        )
+        self.log_debug(f"Modified {modified_samples} out of {len(samples)} samples.")
+
+        # Filter imprecise samples
+        precise_only_samples = [
+            sample
+            for sample in noisy_samples
+            if self.apply_filter(sample.get_resource(ann_resource_id))
+        ]
+        self.log_debug(
+            f"Filtered out {len(samples)-len(precise_only_samples)}"
+            + f" out of {len(samples)} samples."
+        )
+        return precise_only_samples
+
+    def apply_noise(self, uid):
+        # Apply inaccuracy
+        if np.random.uniform() <= self.inaccuracy:
+            assert uid in self.leaf_nodes
+            inaccurate_uid = np.random.choice(self.leaf_nodes)
+        else:
+            inaccurate_uid = uid
+
+        # Select noise model
+        if self.noise_model == "Deng2014":
+            noisy_uid = self._apply_deng_noise(inaccurate_uid)
+        elif self.noise_model == "Geometric":
+            noisy_uid = self._apply_geometric_noise(inaccurate_uid)
+        elif self.noise_model == "Poisson":
+            noisy_uid = self._apply_poisson_noise(inaccurate_uid)
+        elif self.noise_model == "Inaccuracy":
+            noisy_uid = inaccurate_uid
+        else:
+            raise ValueError(f"Unknown noise model {self.noise_model}")
+
+        # Project to random leaf
+        if self.project_to_random_leaf:
+            noisy_uid = self._project_to_random_leaf(noisy_uid)
+
+        return noisy_uid
+
+    def apply_filter(self, uid):
+        if self.filter_imprecise:
+            return self.graph.out_degree(uid) == 0  # noqa
+        else:
+            return True
+
+    def update(self, message: instrumentation.Message):
+        if isinstance(message, knowledge.RelationChangeMessage) or isinstance(
+            message, knowledge.ConceptChangeMessage
+        ):
+            self.is_updated = False

+ 1 - 1
chillax/version.py

@@ -1 +1 @@
-__version__ = "0.1a12"
+__version__ = "0.1a13"

+ 1 - 1
setup.py

@@ -17,7 +17,7 @@ setup(
     packages=find_packages(),
     python_requires=">=3.7",
     install_requires=[
-        "chia>=2.0rc17",
+        "chia>=2.0rc18",
     ],
     # metadata to display on PyPI
     author="Clemens-Alexander Brust",