|
|
@@ -14,8 +14,7 @@ class CHILLAXKerasHC(
|
|
|
self,
|
|
|
kb,
|
|
|
l2=5e-5,
|
|
|
- mlnp=True,
|
|
|
- normalize_scores=True,
|
|
|
+ force_prediction_targets=True,
|
|
|
raw_output=False,
|
|
|
weighting="default",
|
|
|
gain_compensation="simple",
|
|
|
@@ -26,13 +25,12 @@ class CHILLAXKerasHC(
|
|
|
# Configuration
|
|
|
self._l2_regularization_coefficient = l2
|
|
|
|
|
|
- self._mlnp = mlnp
|
|
|
- self._normalize_scores = normalize_scores
|
|
|
+ self._force_prediction_targets = force_prediction_targets
|
|
|
|
|
|
self._raw_output = raw_output
|
|
|
- if self._raw_output and (self._mlnp or self._normalize_scores):
|
|
|
+ if self._raw_output and self._force_prediction_targets:
|
|
|
raise ValueError(
|
|
|
- "Cannot use raw output and MLNP or normalization at the same time!"
|
|
|
+ "Cannot use raw output and forced prediction targets at the same time!"
|
|
|
)
|
|
|
|
|
|
self._weighting = weighting
|
|
|
@@ -46,6 +44,8 @@ class CHILLAXKerasHC(
|
|
|
self.loss_weights = None
|
|
|
self.update_embedding()
|
|
|
|
|
|
+ self.extrapolator = None
|
|
|
+
|
|
|
def predict_embedded(self, feature_batch):
|
|
|
return self.fc_layer(feature_batch)
|
|
|
|
|
|
@@ -67,55 +67,61 @@ class CHILLAXKerasHC(
|
|
|
]
|
|
|
|
|
|
def _deembed_single(self, embedded_label):
|
|
|
- conditional_probabilities = {
|
|
|
- uid: embedded_label[i] for uid, i in self.uid_to_dimension.items()
|
|
|
- }
|
|
|
+ conditional_probabilities = self._calculate_conditional_probabilities(
|
|
|
+ embedded_label
|
|
|
+ )
|
|
|
|
|
|
if self._raw_output:
|
|
|
# Directly output conditional probabilities
|
|
|
return list(conditional_probabilities.items())
|
|
|
else:
|
|
|
- # Stage 1 calculates the unconditional probabilities
|
|
|
- unconditional_probabilities = {}
|
|
|
-
|
|
|
- for uid in self.topo_sorted_uids:
|
|
|
- unconditional_probability = conditional_probabilities[uid]
|
|
|
-
|
|
|
- no_parent_probability = 1.0
|
|
|
- has_parents = False
|
|
|
- for parent in self.graph.predecessors(uid):
|
|
|
- has_parents = True
|
|
|
- no_parent_probability *= 1.0 - unconditional_probabilities[parent]
|
|
|
-
|
|
|
- if has_parents:
|
|
|
- unconditional_probability *= 1.0 - no_parent_probability
|
|
|
-
|
|
|
- unconditional_probabilities[uid] = unconditional_probability
|
|
|
-
|
|
|
- # Stage 2 calculates the joint probability of the synset and "no children"
|
|
|
- joint_probabilities = {}
|
|
|
- for uid in reversed(self.topo_sorted_uids):
|
|
|
- joint_probability = unconditional_probabilities[uid]
|
|
|
- no_child_probability = 1.0
|
|
|
- for child in self.graph.successors(uid):
|
|
|
- no_child_probability *= 1.0 - unconditional_probabilities[child]
|
|
|
-
|
|
|
- joint_probabilities[uid] = joint_probability * no_child_probability
|
|
|
+ unconditional_probabilities = self._calculate_unconditional_probabilities(
|
|
|
+ conditional_probabilities
|
|
|
+ )
|
|
|
|
|
|
- tuples = joint_probabilities.items()
|
|
|
+ # Note: Stage 2 from IDK is missing here. This is on purpose.
|
|
|
+ tuples = unconditional_probabilities.items()
|
|
|
sorted_tuples = list(sorted(tuples, key=lambda tup: tup[1], reverse=True))
|
|
|
|
|
|
- if self._mlnp:
|
|
|
+ # If requested, only output scores for the forced prediction targets
|
|
|
+ if self._force_prediction_targets:
|
|
|
for i, (uid, p) in enumerate(sorted_tuples):
|
|
|
if uid not in self.prediction_target_uids:
|
|
|
sorted_tuples[i] = (uid, 0.0)
|
|
|
|
|
|
- if self._normalize_scores:
|
|
|
total_scores = sum([p for uid, p in sorted_tuples])
|
|
|
- sorted_tuples = [(uid, p / total_scores) for uid, p in sorted_tuples]
|
|
|
+ if total_scores > 0:
|
|
|
+ sorted_tuples = [
|
|
|
+ (uid, p / total_scores) for uid, p in sorted_tuples
|
|
|
+ ]
|
|
|
|
|
|
return list(sorted_tuples)
|
|
|
|
|
|
+ def _calculate_conditional_probabilities(self, embedded_label):
|
|
|
+ conditional_probabilities = {
|
|
|
+ uid: embedded_label[i] for uid, i in self.uid_to_dimension.items()
|
|
|
+ }
|
|
|
+ return conditional_probabilities
|
|
|
+
|
|
|
+ def _calculate_unconditional_probabilities(self, conditional_probabilities):
|
|
|
+ # Calculate the unconditional probabilities
|
|
|
+ unconditional_probabilities = {}
|
|
|
+ for uid in self.topo_sorted_uids:
|
|
|
+ unconditional_probability = conditional_probabilities[uid]
|
|
|
+
|
|
|
+ no_parent_probability = 1.0
|
|
|
+ has_parents = False
|
|
|
+ for parent in self.graph.predecessors(uid):
|
|
|
+ has_parents = True
|
|
|
+ no_parent_probability *= 1.0 - unconditional_probabilities[parent]
|
|
|
+
|
|
|
+ if has_parents:
|
|
|
+ unconditional_probability *= 1.0 - no_parent_probability
|
|
|
+
|
|
|
+ unconditional_probabilities[uid] = unconditional_probability
|
|
|
+
|
|
|
+ return unconditional_probabilities
|
|
|
+
|
|
|
def update_embedding(self):
|
|
|
current_concepts = self.kb.concepts()
|
|
|
current_concept_count = len(current_concepts)
|
|
|
@@ -161,7 +167,10 @@ class CHILLAXKerasHC(
|
|
|
}
|
|
|
|
|
|
self.prediction_target_uids = {
|
|
|
- concept.uid for concept in self.kb.concepts(flags={knowledge.ConceptFlagV2.PREDICTION_TARGET})
|
|
|
+ concept.uid
|
|
|
+ for concept in self.kb.concepts(
|
|
|
+ flags={knowledge.ConceptFlagV2.PREDICTION_TARGET}
|
|
|
+ )
|
|
|
}
|
|
|
|
|
|
if len(old_weights) == 2:
|
|
|
@@ -257,7 +266,9 @@ class CHILLAXKerasHC(
|
|
|
|
|
|
for i, uid in enumerate(self.uid_to_dimension):
|
|
|
descendants = set(nx.descendants(self.graph, uid)) | {uid}
|
|
|
- reachable_leaf_nodes = descendants.intersection(self.prediction_target_uids)
|
|
|
+ reachable_leaf_nodes = descendants.intersection(
|
|
|
+ self.prediction_target_uids
|
|
|
+ )
|
|
|
self.loss_weights[i] *= len(reachable_leaf_nodes)
|
|
|
|
|
|
# Test if any leaf nodes are reachable
|
|
|
@@ -293,11 +304,22 @@ class CHILLAXKerasHC(
|
|
|
|
|
|
def loss(self, feature_batch, ground_truth):
|
|
|
if not self.is_updated:
|
|
|
- raise RuntimeError("This classifier is not yet ready to compute a loss. "
|
|
|
- "Check if it has been notified of a hyponymy relation.")
|
|
|
+ raise RuntimeError(
|
|
|
+ "This classifier is not yet ready to compute a loss. "
|
|
|
+ "Check if it has been notified of a hyponymy relation."
|
|
|
+ )
|
|
|
|
|
|
- loss_mask = np.zeros((len(ground_truth), len(self.uid_to_dimension)))
|
|
|
- for i, label in enumerate(ground_truth):
|
|
|
+ # (1) Predict
|
|
|
+ prediction = self.predict_embedded(feature_batch)
|
|
|
+
|
|
|
+ # (2) Extrapolate ground truth
|
|
|
+ extrapolated_ground_truth = self._extrapolate(ground_truth, prediction)
|
|
|
+
|
|
|
+ # (3) Compute loss mask
|
|
|
+ loss_mask = np.zeros(
|
|
|
+ (len(extrapolated_ground_truth), len(self.uid_to_dimension))
|
|
|
+ )
|
|
|
+ for i, label in enumerate(extrapolated_ground_truth):
|
|
|
# Loss mask
|
|
|
loss_mask[i, self.uid_to_dimension[label]] = 1.0
|
|
|
|
|
|
@@ -307,7 +329,7 @@ class CHILLAXKerasHC(
|
|
|
loss_mask[i, self.uid_to_dimension[successor]] = 1.0
|
|
|
# This should also cover the node itself, but we do it anyway
|
|
|
|
|
|
- if not self._mlnp:
|
|
|
+ if not self._force_prediction_targets:
|
|
|
# Learn direct successors in order to "stop"
|
|
|
# prediction at these nodes.
|
|
|
# If MLNP is active, then this can be ignored.
|
|
|
@@ -317,19 +339,20 @@ class CHILLAXKerasHC(
|
|
|
for successor in self.graph.successors(label):
|
|
|
loss_mask[i, self.uid_to_dimension[successor]] = 1.0
|
|
|
|
|
|
- embedding = self.embed(ground_truth)
|
|
|
- prediction = self.predict_embedded(feature_batch)
|
|
|
+ # (4) Embed ground truth
|
|
|
+ embedded_ground_truth = self.embed(extrapolated_ground_truth)
|
|
|
|
|
|
- # Binary cross entropy loss function
|
|
|
+ # (5) Compute binary cross entropy loss function
|
|
|
clipped_probs = tf.clip_by_value(prediction, 1e-7, (1.0 - 1e-7))
|
|
|
the_loss = -(
|
|
|
- embedding * tf.math.log(clipped_probs)
|
|
|
- + (1.0 - embedding) * tf.math.log(1.0 - clipped_probs)
|
|
|
+ embedded_ground_truth * tf.math.log(clipped_probs)
|
|
|
+ + (1.0 - embedded_ground_truth) * tf.math.log(1.0 - clipped_probs)
|
|
|
)
|
|
|
|
|
|
sum_per_batch_element = tf.reduce_sum(
|
|
|
the_loss * loss_mask * self.loss_weights, axis=1
|
|
|
)
|
|
|
+
|
|
|
return tf.reduce_mean(sum_per_batch_element)
|
|
|
|
|
|
def observe(self, samples, gt_resource_id):
|
|
|
@@ -366,3 +389,44 @@ class CHILLAXKerasHC(
|
|
|
(self.uid_to_dimension,) = pickle.load(target)
|
|
|
|
|
|
self.update_embedding()
|
|
|
+
|
|
|
+ def _extrapolate(self, ground_truth, embedded_prediction):
|
|
|
+ # Only do anything if there is an extrapolator
|
|
|
+ if self.extrapolator is not None:
|
|
|
+ epn = embedded_prediction.numpy()
|
|
|
+ extrapolated_ground_truth = []
|
|
|
+ for i, ground_truth_element in enumerate(ground_truth):
|
|
|
+ # Get the raw scores
|
|
|
+ conditional_probabilities = self._calculate_conditional_probabilities(
|
|
|
+ epn[i]
|
|
|
+ )
|
|
|
+
|
|
|
+ # If the extrapolator wants it, apply the ground truth to the prediction at the
|
|
|
+ # conditional probability level.
|
|
|
+ if self.extrapolator.apply_ground_truth:
|
|
|
+ label_true = {ground_truth_element}
|
|
|
+ known = {ground_truth_element}
|
|
|
+ for ancestor in nx.ancestors(self.graph, ground_truth_element):
|
|
|
+ label_true |= {ancestor}
|
|
|
+ known |= {ancestor}
|
|
|
+ for child in self.graph.successors(ancestor):
|
|
|
+ known |= {child}
|
|
|
+
|
|
|
+ for uid in known:
|
|
|
+ conditional_probabilities[uid] = (
|
|
|
+ 1.0 if uid in label_true else 0.0
|
|
|
+ )
|
|
|
+
|
|
|
+ # Calculate unconditionals and extrapolate
|
|
|
+ unconditional_probabilities = self._calculate_unconditional_probabilities(
|
|
|
+ conditional_probabilities
|
|
|
+ )
|
|
|
+ extrapolated_ground_truth += [
|
|
|
+ self.extrapolator.extrapolate(
|
|
|
+ ground_truth_element, unconditional_probabilities
|
|
|
+ )
|
|
|
+ ]
|
|
|
+
|
|
|
+ return extrapolated_ground_truth
|
|
|
+ else:
|
|
|
+ return ground_truth
|