Skip to content

Commit

Permalink
Include already translated texts as additional context for translation (
Browse files Browse the repository at this point in the history
#154)

If not disabled we will add the already translated text of a page as
context to the translation if it's a re-translation where only a few
texts are changed, and will also add some surrounding context to the
translated text, or even the complete page if that's configured.
  • Loading branch information
stoerr authored Oct 4, 2024
2 parents 7740b62 + cb10aa4 commit 0969c8a
Show file tree
Hide file tree
Showing 9 changed files with 290 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package com.composum.ai.aem.core.impl.autotranslate;

import static com.composum.ai.backend.base.service.chat.impl.GPTTranslationServiceImpl.LASTID;
import static com.composum.ai.backend.base.service.chat.impl.GPTTranslationServiceImpl.MULTITRANSLATION_SEPARATOR_END;
import static com.composum.ai.backend.base.service.chat.impl.GPTTranslationServiceImpl.MULTITRANSLATION_SEPARATOR_START;
import static java.util.Objects.requireNonNull;
import static org.apache.commons.lang3.StringUtils.isNotBlank;

Expand Down Expand Up @@ -155,17 +158,17 @@ public Stats translateLiveCopy(@Nonnull Resource resource,
additionalInstructions.replaceAll(MARKER_DEBUG_ADDITIONAL_INSTRUCTIONS, ""));
}

// We also insert texts that are already translated since they might guide the translation process
configuration = maybeIncludeAlreadyTranslatedTextAsExample(propertiesToTranslate, autoTranslateCaConfig, configuration);

propertiesToTranslate = reducePropertiesToTranslate(propertiesToTranslate, autoTranslateCaConfig);
List<String> valuesToTranslate = propertiesToTranslate.stream()
.filter(p -> autoTranslateConfigService.includeAlreadyTranslatedValues() || !p.isAlreadyCorrectlyTranslated)
.map(PropertyToTranslate::getSourceValue)
.collect(Collectors.toList());

List<String> translatedValues =
translationService.fragmentedTranslation(valuesToTranslate, languageName, configuration,
Collections.singletonList(GPTResponseCheck.KEEP_HREF_TRANSLATION_CHECK));
translatedValues = remapPaths(translatedValues, relationship.getLiveCopy().getBlueprintPath(), relationship.getLiveCopy().getPath()
);
translatedValues = remapPaths(translatedValues, relationship.getLiveCopy().getBlueprintPath(), relationship.getLiveCopy().getPath());

Map<String, LiveRelationship> relationships = new HashMap<>();

Expand Down Expand Up @@ -229,6 +232,85 @@ public Stats translateLiveCopy(@Nonnull Resource resource,
return stats;
}

/**
* Collects the values we need to translate.
* If configured, we also insert texts that are already translated since they might guide the translation process.
*/
protected List<PropertyToTranslate> reducePropertiesToTranslate(List<PropertyToTranslate> propertiesToTranslate, AutoTranslateCaConfig autoTranslateCaConfig) {
boolean includeFullPageInRetranslation = autoTranslateConfigService.includeFullPageInRetranslation()
|| trueTristateCaConfig(autoTranslateCaConfig.includeFullPageInRetranslation());
boolean[] includeIndizes = new boolean[propertiesToTranslate.size()];
for (int i = 0; i < propertiesToTranslate.size(); i++) {
includeIndizes[i] = includeFullPageInRetranslation || !propertiesToTranslate.get(i).isAlreadyCorrectlyTranslated;
}

expandSelection(includeIndizes, 2);

List<PropertyToTranslate> reducedProps = new ArrayList<>();
for (int i = 0; i < propertiesToTranslate.size(); i++) {
if (includeIndizes[i]) {
reducedProps.add(propertiesToTranslate.get(i));
}
}
return reducedProps;
}

/**
* Also include 2 items before those already set, and 2 items after those already set, to have some context.
*/
protected static void expandSelection(boolean[] includeIndizes, int selectRange) {
int lastSetIndex = Integer.MIN_VALUE;
for (int i = 0; i < includeIndizes.length; i++) {
if (includeIndizes[i]) {
lastSetIndex = i;
} else if (i <= lastSetIndex + selectRange) {
includeIndizes[i] = true;
}
}
lastSetIndex = Integer.MAX_VALUE;
for (int i = includeIndizes.length - 1; i >= 0; i--) {
if (includeIndizes[i]) {
lastSetIndex = i;
} else if (i >= lastSetIndex - selectRange) {
includeIndizes[i] = true;
}
}
}

/**
* If configured, we include the already translated parts of the page as example.
*/
protected GPTConfiguration maybeIncludeAlreadyTranslatedTextAsExample(
List<PropertyToTranslate> propertiesToTranslate,
AutoTranslateCaConfig autoTranslateCaConfig, GPTConfiguration configuration) {
boolean includeExistingTranslationsInRetranslation =
autoTranslateConfigService.includeExistingTranslationsInRetranslation() ||
trueTristateCaConfig(autoTranslateCaConfig.includeExistingTranslationsInRetranslation());

String alreadyTranslatedText = propertiesToTranslate.stream()
.filter(p -> p.isAlreadyCorrectlyTranslated)
.map(PropertyToTranslate::getTargetValue)
.collect(Collectors.joining("\n"));

if (includeExistingTranslationsInRetranslation && StringUtils.isNotBlank(alreadyTranslatedText)) {
configuration = configuration.merge(GPTConfiguration.ofContext(
"Retrieve the result of a previous translation of parts of the text. You don't need to translate this - this is just contextual information and you can draw on that for translation examples and context of the translation that is done later.",
// we have to follow the final format or that is confusing for the AI
MULTITRANSLATION_SEPARATOR_START + LASTID + MULTITRANSLATION_SEPARATOR_END +
alreadyTranslatedText +
MULTITRANSLATION_SEPARATOR_START + LASTID + MULTITRANSLATION_SEPARATOR_END
));
}
return configuration;
}

/**
* Is counted as true if there is a true value in the array.
*/
protected boolean trueTristateCaConfig(boolean[] value) {
return value != null && Arrays.asList(value).contains(true);
}

/**
* Checks whether there are href="path" in the translatedValues where path is within blueprintPath
* and replaces those with the according path in the live copy.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,17 @@
@Property(label = "Rules that give additional instructions for translation if certain words or phrases are present in the page.")
AutoTranslateRuleConfig[] rules() default {};

@Property(label = "Include Full Page during Retranslation",
description = "If true we do not only provide changed texts to the AI during re-translating a page with some changes," +
"but give the entire page to provide better context. That is a bit slower and a bit more expensive, but likely" +
"improves the result. This overrides the default from OSGI configuration.")
boolean[] includeFullPageInRetranslation();

@Property(label = "Include Existing Translations in Retranslation",
description = "If true, when retranslating a page with some changes we provide" +
"the existing translations of that page to the AI as well as additional context with examples. " +
"That is a bit slower and a bit more expensive, but likely improves the result." +
"This overrides the default from OSGI configuration.")
boolean[] includeExistingTranslationsInRetranslation() default true;

}
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,16 @@
description = "If true, the translator will use the 'high-intelligence model' (see OpenAI config) for translation. Default: true.")
boolean useHighIntelligenceModel() default true;

@AttributeDefinition(name = "Include Already Translated Values",
description = "If a page is re-translated with only a few modified texts: " +
"If true we include the source texts that do not have to be translated, too, " +
"to provide better context to the translation; otherwise " +
"we only include the texts that have to be translated.")
boolean includeAlreadyTranslatedValues() default true;
@AttributeDefinition(name = "Include Full Page during Retranslation",
description = "If true we do not only provide changed texts to the AI during re-translating a page with some changes," +
"but give the entire page to provide better context. That is a bit slower and a bit more expensive, but likely" +
"improves the result.")
boolean includeFullPageInRetranslation() default true;

@AttributeDefinition(name = "Include Existing Translations in Retranslation",
description = "If true, when retranslating a page with some changes we provide" +
"the existing translations of that page to the AI as well as additional context with examples. " +
"That is a bit slower and a bit more expensive, but likely improves the result.")
boolean includeExistingTranslationsInRetranslation() default true;

}
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,17 @@ public interface AutoTranslateConfigService {
List<String> translateableAttributes(@Nullable Resource resource);

/**
* If a page is re-translated with only a few modified texts:
* If true we include the source texts that do not have to be translated, too,
* to provide better context to the translation; otherwise
* we only include the texts that have to be translated.
* If true, we do not only provide changed texts to the AI during re-translating a page with some changes,
* but give the entire page to provide better context.
* That is a bit slower and a bit more expensive, but likely improves the result.
*/
boolean includeAlreadyTranslatedValues();
boolean includeFullPageInRetranslation();

/**
* If true, we when retranslating a page with some changes we provide the existing translations of that page
* to the AI as well as additional context with examples.
* That is a bit slower and a bit more expensive, but likely improves the result."
*/
boolean includeExistingTranslationsInRetranslation();

}
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,13 @@ public List<String> translateableAttributes(@Nullable Resource resource) {
}

@Override
public boolean includeAlreadyTranslatedValues() {
return config == null || config.includeAlreadyTranslatedValues();
public boolean includeFullPageInRetranslation() {
return config == null || config.includeFullPageInRetranslation();
}

@Override
public boolean includeExistingTranslationsInRetranslation() {
return config == null || config.includeExistingTranslationsInRetranslation();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@


import static com.composum.ai.aem.core.impl.autotranslate.AutoPageTranslateServiceImpl.compileContentPattern;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
Expand All @@ -11,6 +12,7 @@
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;

import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -166,4 +168,47 @@ public void testRemapPaths() {
service.remapPaths((String) null, "/content/blueprint", "/content/livecopy"));
}

@Test
public void expandSelection_includesContextBeforeAndAfter() {
boolean[] includeIndizes = {false, false, true, false, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{true, true, true, true, true}, includeIndizes);
}

@Test
public void expandSelection_noInitialSelection() {
boolean[] includeIndizes = {false, false, false, false, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{false, false, false, false, false}, includeIndizes);
}

@Test
public void expandSelection_singleSelectionAtStart() {
boolean[] includeIndizes = {true, false, false, false, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{true, true, true, false, false}, includeIndizes);
}

@Test
public void expandSelection_singleSelectionAtEnd() {
boolean[] includeIndizes = {false, false, false, false, true};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{false, false, true, true, true}, includeIndizes);
}

@Test
public void expandSelection_multipleSelections() {
boolean[] includeIndizes = {false, true, false, true, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{true, true, true, true, true}, includeIndizes);
}

@Test
public void expandSelection_long() {
boolean[] includeIndizes = {false, false, false, true, false, true, false, false, false};
AutoPageTranslateServiceImpl.expandSelection(includeIndizes, 2);
assertArrayEquals(new boolean[]{false, true, true, true, true, true, true, true, false}, includeIndizes);
}


}
Loading

0 comments on commit 0969c8a

Please sign in to comment.