Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 8 additions & 19 deletions src/SIL.Machine/QualityEstimation/ChrF3QualityEstimator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Linq;
using SIL.Machine.Corpora;
using SIL.Machine.Statistics;

namespace SIL.Machine.QualityEstimation
{
Expand Down Expand Up @@ -83,22 +84,6 @@ ScriptureBookScores bookScores
return ComputeSegmentUsability(segmentScores, chapterScores, bookScores);
}

/// <summary>
/// Calculates the geometric mean for a collection of values.
/// </summary>
/// <param name="values"></param>
/// <returns>The geometric mean.</returns>
private static double GeometricMean(IList<double> values)
{
// Geometric mean requires positive values
if (values == null || !values.Any() || values.Any(x => x <= 0))
return 0;

// Compute the sum of the natural logarithms of all values,
// and divide by the count of numbers and take the exponential
return Math.Exp(values.Sum(Math.Log) / values.Count);
}

private double CalculateUsableProbability(double chrF3)
{
double usableWeight = Math.Exp(-Math.Pow(chrF3 - Usable.Mean, 2) / (2 * Usable.Variance)) * Usable.Count;
Expand Down Expand Up @@ -267,7 +252,7 @@ private List<TextUsability> ComputeTextUsability(TextScores textScores)
{
textScores.AddScore(
textIdConfidences.Key,
new Score(_slope, confidence: GeometricMean(textIdConfidences.Value), _intercept)
new Score(_slope, confidence: StatisticalMethods.GeometricMean(textIdConfidences.Value), _intercept)
);
}

Expand Down Expand Up @@ -325,7 +310,11 @@ out List<double> bookAndChapterConfidences
chapterScores.AddScore(
bookAndChapterConfidences.Key.Book,
bookAndChapterConfidences.Key.Chapter,
new Score(_slope, confidence: GeometricMean(bookAndChapterConfidences.Value), _intercept)
new Score(
_slope,
confidence: StatisticalMethods.GeometricMean(bookAndChapterConfidences.Value),
_intercept
)
);
}

Expand All @@ -334,7 +323,7 @@ out List<double> bookAndChapterConfidences
{
bookScores.AddScore(
bookConfidences.Key,
new Score(_slope, confidence: GeometricMean(bookConfidences.Value), _intercept)
new Score(_slope, confidence: StatisticalMethods.GeometricMean(bookConfidences.Value), _intercept)
);
}

Expand Down
16 changes: 16 additions & 0 deletions src/SIL.Machine/Statistics/StatisticalMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,21 @@ public static double KullbackLeiblerDivergence(IEnumerable<double> dist1, IEnume
{
return dist1.Zip(dist2, (p1, p2) => p1 == 0 || p2 == 0 ? 0 : Math.Log(p1 / p2, 2) * p1).Sum();
}

/// <summary>
/// Calculates the geometric mean for a collection of values.
/// </summary>
/// <param name="values"></param>
/// <returns>The geometric mean.</returns>
public static double GeometricMean(IList<double> values)
{
// Geometric mean requires positive values
if (values == null || !values.Any() || values.Any(x => x <= 0))
return 0;

// Compute the sum of the natural logarithms of all values,
// and divide by the count of numbers and take the exponential
return Math.Exp(values.Sum(Math.Log) / values.Count);
}
}
}
2 changes: 2 additions & 0 deletions src/SIL.Machine/Translation/HybridTranslationEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using SIL.Machine.Statistics;
using SIL.Machine.Tokenization;
using SIL.ObjectModel;

Expand Down Expand Up @@ -488,6 +489,7 @@ private TranslationResult Merge(TranslationResult interactiveResult, Translation
interactiveResult.SourceTokens,
mergedTargetSegment,
mergedConfidences,
StatisticalMethods.GeometricMean(mergedConfidences),
mergedSources,
alignment,
interactiveResult.Phrases
Expand Down
2 changes: 2 additions & 0 deletions src/SIL.Machine/Translation/TransferEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using SIL.Machine.Annotations;
using SIL.Machine.Corpora;
using SIL.Machine.Morphology;
using SIL.Machine.Statistics;
using SIL.Machine.Tokenization;
using SIL.ObjectModel;

Expand Down Expand Up @@ -186,6 +187,7 @@ public IReadOnlyList<TranslationResult> Translate(int n, IReadOnlyList<string> s
segment,
targetTokens,
confidences,
StatisticalMethods.GeometricMean(confidences),
sources,
alignment,
new[] { new Phrase(Range<int>.Create(0, normalizedSourceTokens.Count), targetWords.Count) }
Expand Down
1 change: 1 addition & 0 deletions src/SIL.Machine/Translation/TranslationExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ public static TranslationResult Truecase(
result.SourceTokens,
targetTokens,
result.Confidences,
result.SequenceConfidence,
result.Sources,
result.Alignment,
result.Phrases
Expand Down
3 changes: 3 additions & 0 deletions src/SIL.Machine/Translation/TranslationResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public TranslationResult(
IEnumerable<string> sourceTokens,
IEnumerable<string> targetTokens,
IEnumerable<double> confidences,
double sequenceConfidence,
IEnumerable<TranslationSources> sources,
WordAlignmentMatrix alignment,
IEnumerable<Phrase> phrases
Expand All @@ -27,6 +28,7 @@ IEnumerable<Phrase> phrases
nameof(confidences)
);
}
SequenceConfidence = sequenceConfidence;
Sources = sources.ToArray();
if (Sources.Count != TargetTokens.Count)
{
Expand Down Expand Up @@ -58,6 +60,7 @@ IEnumerable<Phrase> phrases
public IReadOnlyList<string> SourceTokens { get; }
public IReadOnlyList<string> TargetTokens { get; }
public IReadOnlyList<double> Confidences { get; }
public double SequenceConfidence { get; }
public IReadOnlyList<TranslationSources> Sources { get; }
public WordAlignmentMatrix Alignment { get; }
public IReadOnlyList<Phrase> Phrases { get; }
Expand Down
4 changes: 4 additions & 0 deletions src/SIL.Machine/Translation/TranslationResultBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public class TranslationResultBuilder
private readonly List<double> _confidences;
private readonly List<TranslationSources> _sources;
private readonly List<PhraseInfo> _phrases;
private readonly double _sequenceConfidences;

public TranslationResultBuilder(IReadOnlyList<string> sourceTokens)
{
Expand All @@ -19,6 +20,7 @@ public TranslationResultBuilder(IReadOnlyList<string> sourceTokens)
_confidences = new List<double>();
_sources = new List<TranslationSources>();
_phrases = new List<PhraseInfo>();
_sequenceConfidences = -1.0;
}

public IReadOnlyList<string> SourceTokens { get; }
Expand All @@ -29,6 +31,7 @@ public TranslationResultBuilder(IReadOnlyList<string> sourceTokens)
public IReadOnlyList<double> Confidences => _confidences;
public IReadOnlyList<TranslationSources> Sources => _sources;
public IReadOnlyList<PhraseInfo> Phrases => _phrases;
public double SequenceConfidences => _sequenceConfidences;

public void AppendToken(string token, TranslationSources source, double confidence)
{
Expand Down Expand Up @@ -246,6 +249,7 @@ public TranslationResult ToResult(string translation = null)
SourceTokens,
_targetTokens,
_confidences,
_sequenceConfidences,
sources,
alignment,
phrases
Expand Down
Loading