diff --git a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs
index c0a3e0bf3..0249835c6 100644
--- a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs
+++ b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs
@@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Linq;
using SIL.Machine.Corpora;
+using SIL.Machine.Statistics;
namespace SIL.Machine.QualityEstimation
{
@@ -83,22 +84,6 @@ ScriptureBookScores bookScores
return ComputeSegmentUsability(segmentScores, chapterScores, bookScores);
}
- ///
- /// Calculates the geometric mean for a collection of values.
- ///
- ///
- /// The geometric mean.
- private static double GeometricMean(IList values)
- {
- // Geometric mean requires positive values
- if (values == null || !values.Any() || values.Any(x => x <= 0))
- return 0;
-
- // Compute the sum of the natural logarithms of all values,
- // and divide by the count of numbers and take the exponential
- return Math.Exp(values.Sum(Math.Log) / values.Count);
- }
-
private double CalculateUsableProbability(double chrF3)
{
double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count;
@@ -267,7 +252,7 @@ private List ComputeTextUsability(TextScores textScores)
{
textScores.AddScore(
textIdConfidences.Key,
- new Score(_slope, confidence: GeometricMean(textIdConfidences.Value), _intercept)
+ new Score(_slope, confidence: StatisticalMethods.GeometricMean(textIdConfidences.Value), _intercept)
);
}
@@ -325,7 +310,11 @@ out List bookAndChapterConfidences
chapterScores.AddScore(
bookAndChapterConfidences.Key.Book,
bookAndChapterConfidences.Key.Chapter,
- new Score(_slope, confidence: GeometricMean(bookAndChapterConfidences.Value), _intercept)
+ new Score(
+ _slope,
+ confidence: StatisticalMethods.GeometricMean(bookAndChapterConfidences.Value),
+ _intercept
+ )
);
}
@@ -334,7 +323,7 @@ out List bookAndChapterConfidences
{
bookScores.AddScore(
bookConfidences.Key,
- new Score(_slope, confidence: GeometricMean(bookConfidences.Value), _intercept)
+ new Score(_slope, confidence: StatisticalMethods.GeometricMean(bookConfidences.Value), _intercept)
);
}
diff --git a/src/SIL.Machine/Statistics/StatisticalMethods.cs b/src/SIL.Machine/Statistics/StatisticalMethods.cs
index 081818c91..33e1b69ee 100644
--- a/src/SIL.Machine/Statistics/StatisticalMethods.cs
+++ b/src/SIL.Machine/Statistics/StatisticalMethods.cs
@@ -43,5 +43,21 @@ public static double KullbackLeiblerDivergence(IEnumerable dist1, IEnume
{
return dist1.Zip(dist2, (p1, p2) => p1 == 0 || p2 == 0 ? 0 : Math.Log(p1 / p2, 2) * p1).Sum();
}
+
+ ///
+ /// Calculates the geometric mean for a collection of values.
+ ///
+ ///
+ /// The geometric mean.
+ public static double GeometricMean(IList values)
+ {
+ // Geometric mean requires positive values
+ if (values == null || !values.Any() || values.Any(x => x <= 0))
+ return 0;
+
+ // Compute the sum of the natural logarithms of all values,
+ // and divide by the count of numbers and take the exponential
+ return Math.Exp(values.Sum(Math.Log) / values.Count);
+ }
}
}
diff --git a/src/SIL.Machine/Translation/HybridTranslationEngine.cs b/src/SIL.Machine/Translation/HybridTranslationEngine.cs
index 34af37ad2..f0e5a9033 100644
--- a/src/SIL.Machine/Translation/HybridTranslationEngine.cs
+++ b/src/SIL.Machine/Translation/HybridTranslationEngine.cs
@@ -3,6 +3,7 @@
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
+using SIL.Machine.Statistics;
using SIL.Machine.Tokenization;
using SIL.ObjectModel;
@@ -488,6 +489,7 @@ private TranslationResult Merge(TranslationResult interactiveResult, Translation
interactiveResult.SourceTokens,
mergedTargetSegment,
mergedConfidences,
+ StatisticalMethods.GeometricMean(mergedConfidences),
mergedSources,
alignment,
interactiveResult.Phrases
diff --git a/src/SIL.Machine/Translation/TransferEngine.cs b/src/SIL.Machine/Translation/TransferEngine.cs
index 56ac1d6eb..9551de0c7 100644
--- a/src/SIL.Machine/Translation/TransferEngine.cs
+++ b/src/SIL.Machine/Translation/TransferEngine.cs
@@ -5,6 +5,7 @@
using SIL.Machine.Annotations;
using SIL.Machine.Corpora;
using SIL.Machine.Morphology;
+using SIL.Machine.Statistics;
using SIL.Machine.Tokenization;
using SIL.ObjectModel;
@@ -186,6 +187,7 @@ public IReadOnlyList Translate(int n, IReadOnlyList s
segment,
targetTokens,
confidences,
+ StatisticalMethods.GeometricMean(confidences),
sources,
alignment,
new[] { new Phrase(Range.Create(0, normalizedSourceTokens.Count), targetWords.Count) }
diff --git a/src/SIL.Machine/Translation/TranslationExtensions.cs b/src/SIL.Machine/Translation/TranslationExtensions.cs
index a25e2c71a..dd9e07736 100644
--- a/src/SIL.Machine/Translation/TranslationExtensions.cs
+++ b/src/SIL.Machine/Translation/TranslationExtensions.cs
@@ -120,6 +120,7 @@ public static TranslationResult Truecase(
result.SourceTokens,
targetTokens,
result.Confidences,
+ result.SequenceConfidence,
result.Sources,
result.Alignment,
result.Phrases
diff --git a/src/SIL.Machine/Translation/TranslationResult.cs b/src/SIL.Machine/Translation/TranslationResult.cs
index 8fd257fe6..2e373dc5f 100644
--- a/src/SIL.Machine/Translation/TranslationResult.cs
+++ b/src/SIL.Machine/Translation/TranslationResult.cs
@@ -11,6 +11,7 @@ public TranslationResult(
IEnumerable sourceTokens,
IEnumerable targetTokens,
IEnumerable confidences,
+ double sequenceConfidence,
IEnumerable sources,
WordAlignmentMatrix alignment,
IEnumerable phrases
@@ -27,6 +28,7 @@ IEnumerable phrases
nameof(confidences)
);
}
+ SequenceConfidence = sequenceConfidence;
Sources = sources.ToArray();
if (Sources.Count != TargetTokens.Count)
{
@@ -58,6 +60,7 @@ IEnumerable phrases
public IReadOnlyList SourceTokens { get; }
public IReadOnlyList TargetTokens { get; }
public IReadOnlyList Confidences { get; }
+ public double SequenceConfidence { get; }
public IReadOnlyList Sources { get; }
public WordAlignmentMatrix Alignment { get; }
public IReadOnlyList Phrases { get; }
diff --git a/src/SIL.Machine/Translation/TranslationResultBuilder.cs b/src/SIL.Machine/Translation/TranslationResultBuilder.cs
index 461e20b98..d71ca616f 100644
--- a/src/SIL.Machine/Translation/TranslationResultBuilder.cs
+++ b/src/SIL.Machine/Translation/TranslationResultBuilder.cs
@@ -11,6 +11,7 @@ public class TranslationResultBuilder
private readonly List _confidences;
private readonly List _sources;
private readonly List _phrases;
+ private readonly double _sequenceConfidences;
public TranslationResultBuilder(IReadOnlyList sourceTokens)
{
@@ -19,6 +20,7 @@ public TranslationResultBuilder(IReadOnlyList sourceTokens)
_confidences = new List();
_sources = new List();
_phrases = new List();
+ _sequenceConfidences = -1.0;
}
public IReadOnlyList SourceTokens { get; }
@@ -29,6 +31,7 @@ public TranslationResultBuilder(IReadOnlyList sourceTokens)
public IReadOnlyList Confidences => _confidences;
public IReadOnlyList Sources => _sources;
public IReadOnlyList Phrases => _phrases;
+ public double SequenceConfidences => _sequenceConfidences;
public void AppendToken(string token, TranslationSources source, double confidence)
{
@@ -246,6 +249,7 @@ public TranslationResult ToResult(string translation = null)
SourceTokens,
_targetTokens,
_confidences,
+ _sequenceConfidences,
sources,
alignment,
phrases