diff --git a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs index c0a3e0bf3..0249835c6 100644 --- a/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs +++ b/src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Linq; using SIL.Machine.Corpora; +using SIL.Machine.Statistics; namespace SIL.Machine.QualityEstimation { @@ -83,22 +84,6 @@ ScriptureBookScores bookScores return ComputeSegmentUsability(segmentScores, chapterScores, bookScores); } - /// - /// Calculates the geometric mean for a collection of values. - /// - /// - /// The geometric mean. - private static double GeometricMean(IList values) - { - // Geometric mean requires positive values - if (values == null || !values.Any() || values.Any(x => x <= 0)) - return 0; - - // Compute the sum of the natural logarithms of all values, - // and divide by the count of numbers and take the exponential - return Math.Exp(values.Sum(Math.Log) / values.Count); - } - private double CalculateUsableProbability(double chrF3) { double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count; @@ -267,7 +252,7 @@ private List ComputeTextUsability(TextScores textScores) { textScores.AddScore( textIdConfidences.Key, - new Score(_slope, confidence: GeometricMean(textIdConfidences.Value), _intercept) + new Score(_slope, confidence: StatisticalMethods.GeometricMean(textIdConfidences.Value), _intercept) ); } @@ -325,7 +310,11 @@ out List bookAndChapterConfidences chapterScores.AddScore( bookAndChapterConfidences.Key.Book, bookAndChapterConfidences.Key.Chapter, - new Score(_slope, confidence: GeometricMean(bookAndChapterConfidences.Value), _intercept) + new Score( + _slope, + confidence: StatisticalMethods.GeometricMean(bookAndChapterConfidences.Value), + _intercept + ) ); } @@ -334,7 +323,7 @@ out List bookAndChapterConfidences { bookScores.AddScore( bookConfidences.Key, - new Score(_slope, confidence: GeometricMean(bookConfidences.Value), _intercept) + new Score(_slope, confidence: StatisticalMethods.GeometricMean(bookConfidences.Value), _intercept) ); } diff --git a/src/SIL.Machine/Statistics/StatisticalMethods.cs b/src/SIL.Machine/Statistics/StatisticalMethods.cs index 081818c91..33e1b69ee 100644 --- a/src/SIL.Machine/Statistics/StatisticalMethods.cs +++ b/src/SIL.Machine/Statistics/StatisticalMethods.cs @@ -43,5 +43,21 @@ public static double KullbackLeiblerDivergence(IEnumerable dist1, IEnume { return dist1.Zip(dist2, (p1, p2) => p1 == 0 || p2 == 0 ? 0 : Math.Log(p1 / p2, 2) * p1).Sum(); } + + /// + /// Calculates the geometric mean for a collection of values. + /// + /// + /// The geometric mean. + public static double GeometricMean(IList values) + { + // Geometric mean requires positive values + if (values == null || !values.Any() || values.Any(x => x <= 0)) + return 0; + + // Compute the sum of the natural logarithms of all values, + // and divide by the count of numbers and take the exponential + return Math.Exp(values.Sum(Math.Log) / values.Count); + } } } diff --git a/src/SIL.Machine/Translation/HybridTranslationEngine.cs b/src/SIL.Machine/Translation/HybridTranslationEngine.cs index 34af37ad2..f0e5a9033 100644 --- a/src/SIL.Machine/Translation/HybridTranslationEngine.cs +++ b/src/SIL.Machine/Translation/HybridTranslationEngine.cs @@ -3,6 +3,7 @@ using System.Linq; using System.Threading; using System.Threading.Tasks; +using SIL.Machine.Statistics; using SIL.Machine.Tokenization; using SIL.ObjectModel; @@ -488,6 +489,7 @@ private TranslationResult Merge(TranslationResult interactiveResult, Translation interactiveResult.SourceTokens, mergedTargetSegment, mergedConfidences, + StatisticalMethods.GeometricMean(mergedConfidences), mergedSources, alignment, interactiveResult.Phrases diff --git a/src/SIL.Machine/Translation/TransferEngine.cs b/src/SIL.Machine/Translation/TransferEngine.cs index 56ac1d6eb..9551de0c7 100644 --- a/src/SIL.Machine/Translation/TransferEngine.cs +++ b/src/SIL.Machine/Translation/TransferEngine.cs @@ -5,6 +5,7 @@ using SIL.Machine.Annotations; using SIL.Machine.Corpora; using SIL.Machine.Morphology; +using SIL.Machine.Statistics; using SIL.Machine.Tokenization; using SIL.ObjectModel; @@ -186,6 +187,7 @@ public IReadOnlyList Translate(int n, IReadOnlyList s segment, targetTokens, confidences, + StatisticalMethods.GeometricMean(confidences), sources, alignment, new[] { new Phrase(Range.Create(0, normalizedSourceTokens.Count), targetWords.Count) } diff --git a/src/SIL.Machine/Translation/TranslationExtensions.cs b/src/SIL.Machine/Translation/TranslationExtensions.cs index a25e2c71a..dd9e07736 100644 --- a/src/SIL.Machine/Translation/TranslationExtensions.cs +++ b/src/SIL.Machine/Translation/TranslationExtensions.cs @@ -120,6 +120,7 @@ public static TranslationResult Truecase( result.SourceTokens, targetTokens, result.Confidences, + result.SequenceConfidence, result.Sources, result.Alignment, result.Phrases diff --git a/src/SIL.Machine/Translation/TranslationResult.cs b/src/SIL.Machine/Translation/TranslationResult.cs index 8fd257fe6..2e373dc5f 100644 --- a/src/SIL.Machine/Translation/TranslationResult.cs +++ b/src/SIL.Machine/Translation/TranslationResult.cs @@ -11,6 +11,7 @@ public TranslationResult( IEnumerable sourceTokens, IEnumerable targetTokens, IEnumerable confidences, + double sequenceConfidence, IEnumerable sources, WordAlignmentMatrix alignment, IEnumerable phrases @@ -27,6 +28,7 @@ IEnumerable phrases nameof(confidences) ); } + SequenceConfidence = sequenceConfidence; Sources = sources.ToArray(); if (Sources.Count != TargetTokens.Count) { @@ -58,6 +60,7 @@ IEnumerable phrases public IReadOnlyList SourceTokens { get; } public IReadOnlyList TargetTokens { get; } public IReadOnlyList Confidences { get; } + public double SequenceConfidence { get; } public IReadOnlyList Sources { get; } public WordAlignmentMatrix Alignment { get; } public IReadOnlyList Phrases { get; } diff --git a/src/SIL.Machine/Translation/TranslationResultBuilder.cs b/src/SIL.Machine/Translation/TranslationResultBuilder.cs index 461e20b98..d71ca616f 100644 --- a/src/SIL.Machine/Translation/TranslationResultBuilder.cs +++ b/src/SIL.Machine/Translation/TranslationResultBuilder.cs @@ -11,6 +11,7 @@ public class TranslationResultBuilder private readonly List _confidences; private readonly List _sources; private readonly List _phrases; + private readonly double _sequenceConfidences; public TranslationResultBuilder(IReadOnlyList sourceTokens) { @@ -19,6 +20,7 @@ public TranslationResultBuilder(IReadOnlyList sourceTokens) _confidences = new List(); _sources = new List(); _phrases = new List(); + _sequenceConfidences = -1.0; } public IReadOnlyList SourceTokens { get; } @@ -29,6 +31,7 @@ public TranslationResultBuilder(IReadOnlyList sourceTokens) public IReadOnlyList Confidences => _confidences; public IReadOnlyList Sources => _sources; public IReadOnlyList Phrases => _phrases; + public double SequenceConfidences => _sequenceConfidences; public void AppendToken(string token, TranslationSources source, double confidence) { @@ -246,6 +249,7 @@ public TranslationResult ToResult(string translation = null) SourceTokens, _targetTokens, _confidences, + _sequenceConfidences, sources, alignment, phrases