Skip to content

Commit

Permalink
ICU-22265 Update PersonNameFormatter and its associated classes so th…
Browse files Browse the repository at this point in the history
…at the behavior matches that of the

PersonNameFormatter in CLDR.  Added a new test that tests the ICU PersonNameFormatter against a comprehensive
set of test results from the CLDR PersonNameFormatter.
  • Loading branch information
richgillam committed Mar 8, 2023
1 parent 9e16711 commit e5854c8
Show file tree
Hide file tree
Showing 7 changed files with 506 additions and 67 deletions.
1 change: 1 addition & 0 deletions icu4j/build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@
<junit-fileset>
<fileset dir="${icu4j.core-tests.dir}/${bin.dir}">
<patternset refid="test-classes-patternset"/>
<exclude name="**/ExhaustivePersonNameFormatterTest*" />
</fileset>
</junit-fileset>
</icu-junit>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,7 @@
import static com.ibm.icu.util.UResourceBundle.ARRAY;
import static com.ibm.icu.util.UResourceBundle.STRING;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.*;

import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
Expand All @@ -32,6 +28,7 @@ public class PersonNameFormatterImpl {
private final String initialSequencePattern;
private final boolean capitalizeSurname;
private final String foreignSpaceReplacement;
private final String nativeSpaceReplacement;
private final boolean formatterLocaleUsesSpaces;
private final PersonNameFormatter.Length length;
private final PersonNameFormatter.Usage usage;
Expand All @@ -58,6 +55,7 @@ public PersonNameFormatterImpl(Locale locale,
this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence");
this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement");
this.formatterLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(locale.getLanguage());
this.nativeSpaceReplacement = formatterLocaleUsesSpaces ? " " : "";

// asjust for combinations of parameters that don't make sense in practice
if (usage == PersonNameFormatter.Usage.MONOGRAM) {
Expand Down Expand Up @@ -113,6 +111,7 @@ public PersonNameFormatterImpl(Locale locale, String[] patterns) {
initialSequencePattern = "{0} {1}";
capitalizeSurname = false;
foreignSpaceReplacement = " ";
nativeSpaceReplacement = " ";
formatterLocaleUsesSpaces = true;

// then, set values for the fields we actually care about
Expand All @@ -121,33 +120,43 @@ public PersonNameFormatterImpl(Locale locale, String[] patterns) {

}

@Override
public String toString() {
return "PersonNameFormatter: " + displayOrder + "-" + length + "-" + usage + "-" + formality + ", " + locale;
}

public String formatToString(PersonName name) {
// TODO: Should probably return a FormattedPersonName object

// if the formatter is for a language that doesn't use spaces between words and the name is from a language
// that does, create a formatter for the NAME'S locale and use THAT to format the name
Locale nameLocale = getNameLocale(name);
boolean nameLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(nameLocale.getLanguage());
if (!formatterLocaleUsesSpaces && nameLocaleUsesSpaces) {
PersonNameFormatterImpl nativeFormatter = new PersonNameFormatterImpl(nameLocale, this.length,
if (!nameScriptMatchesLocale(name, this.locale)) {
Locale nameLocale = getNameLocale(name);
PersonNameFormatterImpl nameLocaleFormatter = new PersonNameFormatterImpl(nameLocale, this.length,
this.usage, this.formality, this.displayOrder, this.capitalizeSurname);
String result = nativeFormatter.formatToString(name);

// BUT, if the name is actually written in the formatter locale's script, replace any spaces in the name
// with the foreignSpaceReplacement character
if (!foreignSpaceReplacement.equals(" ") && scriptMatchesLocale(result, this.locale)) {
result = result.replace(" ", this.foreignSpaceReplacement);
}
return result;
return nameLocaleFormatter.formatToString(name);
}

// if we get down to here, we're just doing normal formatting-- if we have both given-first and surname-first
// rules, choose which one to use based on the name's locale and preferred field order
String result = null;
Locale nameLocale = getNameLocale(name);

// choose the GN-first or SN-first pattern based on the name itself and use that to format it
if (snFirstPatterns == null || nameIsGnFirst(name)) {
return getBestPattern(gnFirstPatterns, name).format(name);
result = getBestPattern(gnFirstPatterns, name).format(name);
} else {
return getBestPattern(snFirstPatterns, name).format(name);
result = getBestPattern(snFirstPatterns, name).format(name);
}

// if either of the space-replacement characters is something other than a space,
// check to see if the name locale's language matches the formatter locale's language.
// If they match, replace all spaces with the native space-replacement character,
// and if they don't, replace all spaces with the foreign space-replacement character
if (!nativeSpaceReplacement.equals(" ") || !foreignSpaceReplacement.equals(" ")) {
if (localesMatch(nameLocale, this.locale)) {
result = result.replace(" ", nativeSpaceReplacement);
} else {
result = result.replace(" ", foreignSpaceReplacement);
}
}
return result;
}

public Locale getLocale() {
Expand Down Expand Up @@ -175,7 +184,7 @@ public boolean shouldCapitalizeSurname() {
return capitalizeSurname;
}

private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "th", "yue", "km", "lo"));
private final Set<String> LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "yue", "km", "lo", "my"));

/**
* Returns the value of the resource, as a string array.
Expand Down Expand Up @@ -297,15 +306,20 @@ private Locale getNameLocale(PersonName name) {
}

/**
* Returns true if the script of `s` is one of the default scripts for `locale`.
* This function only checks the script of the first character whose script isn't "common,"
* so it probably won't work right on mixed-script strings.
* Returns true if the characters in the name match one of the scripts for the specified locale.
*/
private boolean scriptMatchesLocale(String s, Locale locale) {
int[] localeScripts = UScript.getCode(locale);
private boolean nameScriptMatchesLocale(PersonName name, Locale formatterLocale) {
// Rather than exhaustively checking all the fields in the name, we just check the given-name
// and surname fields, giving preference to the script of the surname if they're different
// (we concatenate them into one string for simplicity). The "name script" is the script
// of the first character we find whose script isn't "common". If that script is one
// of the scripts used by the specified locale, we have a match.
String nameText = name.getFieldValue(PersonName.NameField.GIVEN, Collections.emptySet())
+ name.getFieldValue(PersonName.NameField.SURNAME, Collections.emptySet());
int[] localeScripts = UScript.getCode(formatterLocale);
int stringScript = UScript.COMMON;
for (int i = 0; stringScript == UScript.COMMON && i < s.length(); i++) {
char c = s.charAt(i);
for (int i = 0; stringScript == UScript.COMMON && i < nameText.length(); i++) {
char c = nameText.charAt(i);
stringScript = UScript.getScript(c);
}

Expand All @@ -316,4 +330,24 @@ private boolean scriptMatchesLocale(String s, Locale locale) {
}
return false;
}

/**
* Returns true if the two locales should be considered equivalent for space-replacement purposes.
*/
private boolean localesMatch(Locale nameLocale, Locale formatterLocale) {
String nameLanguage = nameLocale.getLanguage();
String formatterLanguage = formatterLocale.getLanguage();

if (nameLanguage.equals(formatterLanguage)) {
return true;
}

// HACK to make Japanese and Chinese names use the native format and native space replacement
// (do we want to do something more general here?)
if ((nameLanguage.equals("ja") || nameLanguage.equals("zh")) && (formatterLanguage.equals("ja") || formatterLanguage.equals("zh"))) {
return true;
}

return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl.personname;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.*;

import com.ibm.icu.text.PersonName;

Expand All @@ -27,6 +21,11 @@ public static PersonNamePattern[] makePatterns(String[] patternText, PersonNameF
return result;
}

@Override
public String toString() {
return patternText;
}

private PersonNamePattern(String patternText, PersonNameFormatterImpl formatterImpl) {
this.patternText = patternText;

Expand Down Expand Up @@ -88,6 +87,11 @@ public String format(PersonName name) {
StringBuilder textBefore = new StringBuilder();
StringBuilder textAfter = new StringBuilder();

// if the name doesn't have a surname field and the pattern doesn't have a given-name field,
// we actually format a modified version of the name object where the contents of the
// given-name field has been copied into the surname field
name = hackNameForEmptyFields(name);

// the logic below attempts to implement the following algorithm:
// - If one or more fields at the beginning of the name are empty, also skip all literal text
// from the beginning of the name up to the first populated field.
Expand Down Expand Up @@ -148,7 +152,7 @@ public int numPopulatedFields(PersonName name) {
public int numEmptyFields(PersonName name) {
int result = 0;
for (Element element : patternElements) {
result += element.isPopulated(name) ? 0 : 1;
result += (!element.isLiteral() && !element.isPopulated(name)) ? 1 : 0;
}
return result;
}
Expand All @@ -161,6 +165,11 @@ public int numEmptyFields(PersonName name) {
* @param s2 The literal text after the omitted field.
*/
private String coalesce(StringBuilder s1, StringBuilder s2) {
// if the contents of s2 occur at the end of s1, we just use s1
if (endsWith(s1, s2)) {
s2.setLength(0);
}

// get the range of non-whitespace characters at the beginning of s1
int p1 = 0;
while (p1 < s1.length() && !Character.isWhitespace(s1.charAt(p1))) {
Expand Down Expand Up @@ -191,6 +200,45 @@ private String coalesce(StringBuilder s1, StringBuilder s2) {
return result;
}

/**
* Returns true if s1 ends with s2.
*/
private boolean endsWith(StringBuilder s1, StringBuilder s2) {
int p1 = s1.length() - 1;
int p2 = s2.length() - 1;

while (p1 >= 0 && p2 >= 0 && s1.charAt(p1) == s2.charAt(p2)) {
--p1;
--p2;
}
return p2 < 0;
}

private PersonName hackNameForEmptyFields(PersonName originalName) {
// this is a hack to deal with mononyms (name objects that don't have both a given name and a surname)--
// if the name object has a given-name field but not a surname field and the pattern either doesn't
// have a given-name field or only has "{given-initial}", we return a PersonName object that will
// return the value of the given-name field when asked for the value of the surname field and that
// will return null when asked for the value of the given-name field (all other field values and
// properties of the underlying object are returned unchanged)
PersonName result = originalName;
if (originalName.getFieldValue(PersonName.NameField.SURNAME, Collections.emptySet()) == null) {
boolean patternHasNonInitialGivenName = false;
for (PersonNamePattern.Element element : patternElements) {
if (!element.isLiteral()
&& ((NameFieldImpl)element).fieldID == PersonName.NameField.GIVEN
&& !((NameFieldImpl)element).modifiers.containsKey(PersonName.FieldModifier.INITIAL)) {
patternHasNonInitialGivenName = true;
break;
}
}
if (!patternHasNonInitialGivenName) {
return new GivenToSurnamePersonName(originalName);
}
}
return result;
}

/**
* A single element in a NamePattern. This is either a name field or a range of literal text.
*/
Expand All @@ -210,6 +258,11 @@ public LiteralText(String text) {
this.text = text;
}

@Override
public String toString() {
return text;
}

public boolean isLiteral() {
return true;
}
Expand Down Expand Up @@ -250,6 +303,19 @@ public NameFieldImpl(String fieldNameAndModifiers, PersonNameFormatterImpl forma
}
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("{");
sb.append(fieldID);
for (PersonName.FieldModifier modifier : modifiers.keySet()) {
sb.append("-");
sb.append(modifier.toString());
}
sb.append("}");
return sb.toString();
}

public boolean isLiteral() {
return false;
}
Expand All @@ -266,10 +332,48 @@ public String format(PersonName name) {
}

public boolean isPopulated(PersonName name) {
// just check whether the unmodified field contains a value
Set<PersonName.FieldModifier> modifierIDs = new HashSet<>();
String fieldValue = name.getFieldValue(fieldID, modifierIDs);
return fieldValue != null && !fieldValue.isEmpty();
String result = this.format(name);
return result != null && ! result.isEmpty();
}
}

/**
* Internal class used when formatting a mononym (a PersonName object that only has
* a given-name field). If the name doesn't have a surname field and the pattern
* doesn't have a given-name field (or only has one that produces an initial), we
* use this class to behave as though the value supplied in the given-name field
* had instead been supplied in the surname field.
*/
private static class GivenToSurnamePersonName implements PersonName {
private PersonName underlyingPersonName;

public GivenToSurnamePersonName(PersonName underlyingPersonName) {
this.underlyingPersonName = underlyingPersonName;
}

@Override
public String toString() {
return "Inverted version os " + underlyingPersonName.toString();
}
@Override
public Locale getNameLocale() {
return underlyingPersonName.getNameLocale();
}

@Override
public PreferredOrder getPreferredOrder() {
return underlyingPersonName.getPreferredOrder();
}

@Override
public String getFieldValue(NameField identifier, Set<FieldModifier> modifiers) {
if (identifier == NameField.SURNAME) {
return underlyingPersonName.getFieldValue(NameField.GIVEN, modifiers);
} else if (identifier == NameField.GIVEN) {
return null;
} else {
return underlyingPersonName.getFieldValue(identifier, modifiers);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -324,4 +324,13 @@ private PersonNameFormatter(Locale locale, Length length, Usage usage, Formality
public PersonNameFormatter(Locale locale, String[] patterns) {
this.impl = new PersonNameFormatterImpl(locale, patterns);
}

/**
* @internal For debugging only!
* @deprecated This API is for debugging only.
*/
@Override
public String toString() {
return impl.toString();
}
}
Loading

0 comments on commit e5854c8

Please sign in to comment.