diff --git a/icu4j/build.xml b/icu4j/build.xml index b9027dbc7a19..7424c7395599 100644 --- a/icu4j/build.xml +++ b/icu4j/build.xml @@ -621,6 +621,7 @@ + diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java index 6fb206200365..2cb3bb5094d4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNameFormatterImpl.java @@ -5,11 +5,7 @@ import static com.ibm.icu.util.UResourceBundle.ARRAY; import static com.ibm.icu.util.UResourceBundle.STRING; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.Locale; -import java.util.Set; +import java.util.*; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; @@ -32,6 +28,7 @@ public class PersonNameFormatterImpl { private final String initialSequencePattern; private final boolean capitalizeSurname; private final String foreignSpaceReplacement; + private final String nativeSpaceReplacement; private final boolean formatterLocaleUsesSpaces; private final PersonNameFormatter.Length length; private final PersonNameFormatter.Usage usage; @@ -58,6 +55,7 @@ public PersonNameFormatterImpl(Locale locale, this.initialSequencePattern = rb.getStringWithFallback("personNames/initialPattern/initialSequence"); this.foreignSpaceReplacement = rb.getStringWithFallback("personNames/foreignSpaceReplacement"); this.formatterLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(locale.getLanguage()); + this.nativeSpaceReplacement = formatterLocaleUsesSpaces ? " " : ""; // asjust for combinations of parameters that don't make sense in practice if (usage == PersonNameFormatter.Usage.MONOGRAM) { @@ -113,6 +111,7 @@ public PersonNameFormatterImpl(Locale locale, String[] patterns) { initialSequencePattern = "{0} {1}"; capitalizeSurname = false; foreignSpaceReplacement = " "; + nativeSpaceReplacement = " "; formatterLocaleUsesSpaces = true; // then, set values for the fields we actually care about @@ -121,33 +120,43 @@ public PersonNameFormatterImpl(Locale locale, String[] patterns) { } + @Override + public String toString() { + return "PersonNameFormatter: " + displayOrder + "-" + length + "-" + usage + "-" + formality + ", " + locale; + } + public String formatToString(PersonName name) { // TODO: Should probably return a FormattedPersonName object - // if the formatter is for a language that doesn't use spaces between words and the name is from a language - // that does, create a formatter for the NAME'S locale and use THAT to format the name - Locale nameLocale = getNameLocale(name); - boolean nameLocaleUsesSpaces = !LOCALES_THAT_DONT_USE_SPACES.contains(nameLocale.getLanguage()); - if (!formatterLocaleUsesSpaces && nameLocaleUsesSpaces) { - PersonNameFormatterImpl nativeFormatter = new PersonNameFormatterImpl(nameLocale, this.length, + if (!nameScriptMatchesLocale(name, this.locale)) { + Locale nameLocale = getNameLocale(name); + PersonNameFormatterImpl nameLocaleFormatter = new PersonNameFormatterImpl(nameLocale, this.length, this.usage, this.formality, this.displayOrder, this.capitalizeSurname); - String result = nativeFormatter.formatToString(name); - - // BUT, if the name is actually written in the formatter locale's script, replace any spaces in the name - // with the foreignSpaceReplacement character - if (!foreignSpaceReplacement.equals(" ") && scriptMatchesLocale(result, this.locale)) { - result = result.replace(" ", this.foreignSpaceReplacement); - } - return result; + return nameLocaleFormatter.formatToString(name); } - // if we get down to here, we're just doing normal formatting-- if we have both given-first and surname-first - // rules, choose which one to use based on the name's locale and preferred field order + String result = null; + Locale nameLocale = getNameLocale(name); + + // choose the GN-first or SN-first pattern based on the name itself and use that to format it if (snFirstPatterns == null || nameIsGnFirst(name)) { - return getBestPattern(gnFirstPatterns, name).format(name); + result = getBestPattern(gnFirstPatterns, name).format(name); } else { - return getBestPattern(snFirstPatterns, name).format(name); + result = getBestPattern(snFirstPatterns, name).format(name); + } + + // if either of the space-replacement characters is something other than a space, + // check to see if the name locale's language matches the formatter locale's language. + // If they match, replace all spaces with the native space-replacement character, + // and if they don't, replace all spaces with the foreign space-replacement character + if (!nativeSpaceReplacement.equals(" ") || !foreignSpaceReplacement.equals(" ")) { + if (localesMatch(nameLocale, this.locale)) { + result = result.replace(" ", nativeSpaceReplacement); + } else { + result = result.replace(" ", foreignSpaceReplacement); + } } + return result; } public Locale getLocale() { @@ -175,7 +184,7 @@ public boolean shouldCapitalizeSurname() { return capitalizeSurname; } - private final Set LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "th", "yue", "km", "lo")); + private final Set LOCALES_THAT_DONT_USE_SPACES = new HashSet<>(Arrays.asList("ja", "zh", "yue", "km", "lo", "my")); /** * Returns the value of the resource, as a string array. @@ -297,15 +306,20 @@ private Locale getNameLocale(PersonName name) { } /** - * Returns true if the script of `s` is one of the default scripts for `locale`. - * This function only checks the script of the first character whose script isn't "common," - * so it probably won't work right on mixed-script strings. + * Returns true if the characters in the name match one of the scripts for the specified locale. */ - private boolean scriptMatchesLocale(String s, Locale locale) { - int[] localeScripts = UScript.getCode(locale); + private boolean nameScriptMatchesLocale(PersonName name, Locale formatterLocale) { + // Rather than exhaustively checking all the fields in the name, we just check the given-name + // and surname fields, giving preference to the script of the surname if they're different + // (we concatenate them into one string for simplicity). The "name script" is the script + // of the first character we find whose script isn't "common". If that script is one + // of the scripts used by the specified locale, we have a match. + String nameText = name.getFieldValue(PersonName.NameField.GIVEN, Collections.emptySet()) + + name.getFieldValue(PersonName.NameField.SURNAME, Collections.emptySet()); + int[] localeScripts = UScript.getCode(formatterLocale); int stringScript = UScript.COMMON; - for (int i = 0; stringScript == UScript.COMMON && i < s.length(); i++) { - char c = s.charAt(i); + for (int i = 0; stringScript == UScript.COMMON && i < nameText.length(); i++) { + char c = nameText.charAt(i); stringScript = UScript.getScript(c); } @@ -316,4 +330,24 @@ private boolean scriptMatchesLocale(String s, Locale locale) { } return false; } + + /** + * Returns true if the two locales should be considered equivalent for space-replacement purposes. + */ + private boolean localesMatch(Locale nameLocale, Locale formatterLocale) { + String nameLanguage = nameLocale.getLanguage(); + String formatterLanguage = formatterLocale.getLanguage(); + + if (nameLanguage.equals(formatterLanguage)) { + return true; + } + + // HACK to make Japanese and Chinese names use the native format and native space replacement + // (do we want to do something more general here?) + if ((nameLanguage.equals("ja") || nameLanguage.equals("zh")) && (formatterLanguage.equals("ja") || formatterLanguage.equals("zh"))) { + return true; + } + + return false; + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java index e91edcb02486..409fe83c4ce4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/personname/PersonNamePattern.java @@ -2,13 +2,7 @@ // License & terms of use: http://www.unicode.org/copyright.html package com.ibm.icu.impl.personname; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.StringTokenizer; +import java.util.*; import com.ibm.icu.text.PersonName; @@ -27,6 +21,11 @@ public static PersonNamePattern[] makePatterns(String[] patternText, PersonNameF return result; } + @Override + public String toString() { + return patternText; + } + private PersonNamePattern(String patternText, PersonNameFormatterImpl formatterImpl) { this.patternText = patternText; @@ -88,6 +87,11 @@ public String format(PersonName name) { StringBuilder textBefore = new StringBuilder(); StringBuilder textAfter = new StringBuilder(); + // if the name doesn't have a surname field and the pattern doesn't have a given-name field, + // we actually format a modified version of the name object where the contents of the + // given-name field has been copied into the surname field + name = hackNameForEmptyFields(name); + // the logic below attempts to implement the following algorithm: // - If one or more fields at the beginning of the name are empty, also skip all literal text // from the beginning of the name up to the first populated field. @@ -148,7 +152,7 @@ public int numPopulatedFields(PersonName name) { public int numEmptyFields(PersonName name) { int result = 0; for (Element element : patternElements) { - result += element.isPopulated(name) ? 0 : 1; + result += (!element.isLiteral() && !element.isPopulated(name)) ? 1 : 0; } return result; } @@ -161,6 +165,11 @@ public int numEmptyFields(PersonName name) { * @param s2 The literal text after the omitted field. */ private String coalesce(StringBuilder s1, StringBuilder s2) { + // if the contents of s2 occur at the end of s1, we just use s1 + if (endsWith(s1, s2)) { + s2.setLength(0); + } + // get the range of non-whitespace characters at the beginning of s1 int p1 = 0; while (p1 < s1.length() && !Character.isWhitespace(s1.charAt(p1))) { @@ -191,6 +200,45 @@ private String coalesce(StringBuilder s1, StringBuilder s2) { return result; } + /** + * Returns true if s1 ends with s2. + */ + private boolean endsWith(StringBuilder s1, StringBuilder s2) { + int p1 = s1.length() - 1; + int p2 = s2.length() - 1; + + while (p1 >= 0 && p2 >= 0 && s1.charAt(p1) == s2.charAt(p2)) { + --p1; + --p2; + } + return p2 < 0; + } + + private PersonName hackNameForEmptyFields(PersonName originalName) { + // this is a hack to deal with mononyms (name objects that don't have both a given name and a surname)-- + // if the name object has a given-name field but not a surname field and the pattern either doesn't + // have a given-name field or only has "{given-initial}", we return a PersonName object that will + // return the value of the given-name field when asked for the value of the surname field and that + // will return null when asked for the value of the given-name field (all other field values and + // properties of the underlying object are returned unchanged) + PersonName result = originalName; + if (originalName.getFieldValue(PersonName.NameField.SURNAME, Collections.emptySet()) == null) { + boolean patternHasNonInitialGivenName = false; + for (PersonNamePattern.Element element : patternElements) { + if (!element.isLiteral() + && ((NameFieldImpl)element).fieldID == PersonName.NameField.GIVEN + && !((NameFieldImpl)element).modifiers.containsKey(PersonName.FieldModifier.INITIAL)) { + patternHasNonInitialGivenName = true; + break; + } + } + if (!patternHasNonInitialGivenName) { + return new GivenToSurnamePersonName(originalName); + } + } + return result; + } + /** * A single element in a NamePattern. This is either a name field or a range of literal text. */ @@ -210,6 +258,11 @@ public LiteralText(String text) { this.text = text; } + @Override + public String toString() { + return text; + } + public boolean isLiteral() { return true; } @@ -250,6 +303,19 @@ public NameFieldImpl(String fieldNameAndModifiers, PersonNameFormatterImpl forma } } + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("{"); + sb.append(fieldID); + for (PersonName.FieldModifier modifier : modifiers.keySet()) { + sb.append("-"); + sb.append(modifier.toString()); + } + sb.append("}"); + return sb.toString(); + } + public boolean isLiteral() { return false; } @@ -266,10 +332,48 @@ public String format(PersonName name) { } public boolean isPopulated(PersonName name) { - // just check whether the unmodified field contains a value - Set modifierIDs = new HashSet<>(); - String fieldValue = name.getFieldValue(fieldID, modifierIDs); - return fieldValue != null && !fieldValue.isEmpty(); + String result = this.format(name); + return result != null && ! result.isEmpty(); + } + } + + /** + * Internal class used when formatting a mononym (a PersonName object that only has + * a given-name field). If the name doesn't have a surname field and the pattern + * doesn't have a given-name field (or only has one that produces an initial), we + * use this class to behave as though the value supplied in the given-name field + * had instead been supplied in the surname field. + */ + private static class GivenToSurnamePersonName implements PersonName { + private PersonName underlyingPersonName; + + public GivenToSurnamePersonName(PersonName underlyingPersonName) { + this.underlyingPersonName = underlyingPersonName; + } + + @Override + public String toString() { + return "Inverted version os " + underlyingPersonName.toString(); + } + @Override + public Locale getNameLocale() { + return underlyingPersonName.getNameLocale(); + } + + @Override + public PreferredOrder getPreferredOrder() { + return underlyingPersonName.getPreferredOrder(); + } + + @Override + public String getFieldValue(NameField identifier, Set modifiers) { + if (identifier == NameField.SURNAME) { + return underlyingPersonName.getFieldValue(NameField.GIVEN, modifiers); + } else if (identifier == NameField.GIVEN) { + return null; + } else { + return underlyingPersonName.getFieldValue(identifier, modifiers); + } } } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java b/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java index 8cd0a5e1fc8e..517e2565f5da 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/PersonNameFormatter.java @@ -324,4 +324,13 @@ private PersonNameFormatter(Locale locale, Length length, Usage usage, Formality public PersonNameFormatter(Locale locale, String[] patterns) { this.impl = new PersonNameFormatterImpl(locale, patterns); } + + /** + * @internal For debugging only! + * @deprecated This API is for debugging only. + */ + @Override + public String toString() { + return impl.toString(); + } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java index f1139da8a937..5ece4156472b 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SimplePersonName.java @@ -94,9 +94,16 @@ public SimplePersonName build() { if (fieldValues.get("surname") == null) { String surnamePrefix = fieldValues.get("surname-prefix"); String surnameCore = fieldValues.get("surname-core"); + + StringBuilder sb = new StringBuilder(); if (surnamePrefix != null && surnameCore != null) { fieldValues.put("surname", surnamePrefix + " " + surnameCore); + } else if (surnamePrefix != null) { + fieldValues.put("surname", surnamePrefix); + } else if (surnameCore != null) { + fieldValues.put("surname", surnameCore); } + // if both "surname-prefix" and "surname-core" are empty, don't fill in "surname" either } return new SimplePersonName(locale, preferredOrder, fieldValues); @@ -202,6 +209,23 @@ public String getFieldValue(NameField nameField, Set modifiers) { return result; } + /** + * @internal Debugging only! + * @return + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + for (String key : fieldValues.keySet()) { + if (sb.length() > 0) { + sb.append(","); + } + sb.append(key + "=" + fieldValues.get(key)); + } + sb.append(",locale=" + nameLocale); + return sb.toString(); + } + private static String makeModifiedFieldName(NameField fieldName, Collection modifiers) { StringBuilder result = new StringBuilder(); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ExhaustivePersonNameFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ExhaustivePersonNameFormatterTest.java new file mode 100644 index 000000000000..2bed7b3ac42d --- /dev/null +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ExhaustivePersonNameFormatterTest.java @@ -0,0 +1,223 @@ +// © 2023 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package com.ibm.icu.dev.test.format; + +import com.ibm.icu.text.PersonName; +import com.ibm.icu.text.PersonNameFormatter; +import com.ibm.icu.text.SimplePersonName; + +import java.io.*; +import java.util.*; + +/** + * This is a test designed to parse the files generated by GeneratePersonNameTestData.java in + * the CLDR project. It takes one command-line parameter-- the path to the directory that + * contains the test files (common/testData/personNameTest in the CLDR source tree). + * This isn't set up as a unit test because of the dependency on the CLDR files (I didn't + * want to copy all of those over into the ICU tree) and because I thought the test would + * take too long to run. + */ +public class ExhaustivePersonNameFormatterTest { + public static void main(String[] args) throws IOException { + if (args.length < 1) { + throw new IllegalArgumentException("No data file directory specified!"); + } + + String dataFilePath = args[0]; + File dataFileDir = new File(dataFilePath); + + if (!dataFileDir.isDirectory()) { + throw new IllegalArgumentException(dataFilePath + " is not a directory!"); + } + + int filesWithErrors = 0; + int filesWithoutErrors = 0; + int skippedFiles = 0; + int totalErrors = 0; + + for (String filename : dataFileDir.list()) { + File dataFile = new File(dataFileDir, filename); + if (dataFile.isDirectory() || !filename.endsWith(".txt")) { + System.out.println("Skipping " + filename + "..."); + continue; + } + String[] FILENAMES_TO_SKIP = {"gaa.txt", "dsb.txt", "syr.txt", "hsb.txt", "lij.txt"}; + if (Arrays.asList(FILENAMES_TO_SKIP).contains(filename)) { + // extra check to narrow down the files for debugging + System.out.println("Skipping " + filename + "..."); + ++skippedFiles; + continue; + } + int testErrors = runTest(dataFile); + if (testErrors == 0) { + ++filesWithoutErrors; + } else { + ++filesWithErrors; + totalErrors += testErrors; + } + } + + System.out.println(); + System.out.println("Files without errors: " + filesWithoutErrors); + System.out.println("Files with errors: " + filesWithErrors); + if (skippedFiles > 0) { + System.out.println("Skipped files: " + skippedFiles); + } + System.out.println("Total number of errors: " + totalErrors); + } + + private static int runTest(File testFile) throws IOException { + LineNumberReader in = new LineNumberReader(new InputStreamReader(new FileInputStream(testFile))); + String line = null; + PersonNameTester tester = new PersonNameTester(testFile.getName()); + + do { + line = in.readLine(); + tester.processLine(line, in.getLineNumber()); + } while (line != null); + + System.out.println(testFile.getAbsolutePath() + " had " + tester.getErrorCount() + " errors"); + return tester.getErrorCount(); + } + + private static class PersonNameTester { + SimplePersonName name = null; + SimplePersonName.Builder nameBuilder = null; + String expectedResult = null; + Locale formatterLocale = null; + int errorCount = 0; + + public PersonNameTester(String testFileName) { + formatterLocale = Locale.forLanguageTag(testFileName.substring(0, testFileName.length() - ".txt".length()).replace('_', '-')); + } + + public void processLine(String line, int lineNumber) { + if (line == null || line.isEmpty() || line.startsWith("#")) { + return; + } + + String[] lineFields = line.split(";"); + String opcode = lineFields[0].trim(); + String[] parameters = Arrays.copyOfRange(lineFields,1, lineFields.length); + + processCommand(opcode, parameters, lineNumber); + } + + public int getErrorCount() { + return errorCount; + } + + private void processCommand(String opcode, String[] parameters, int lineNumber) { + if (opcode.equals("enum")) { + processEnumLine(); + } else if (opcode.equals("name")) { + processNameLine(parameters, lineNumber); + } else if (opcode.equals("expectedResult")) { + processExpectedResultLine(parameters, lineNumber); + } else if (opcode.equals("parameters")) { + processParametersLine(parameters, lineNumber); + } else if (opcode.equals("endName")) { + processEndNameLine(); + } else { + System.err.println("Unknown command '" + opcode + "' at line " + lineNumber); + } + } + + private void processEnumLine() { + // this test isn't actually going to do anything with "enum" lines + } + + private void processNameLine(String[] parameters, int lineNumber) { + if (checkState(name == null, "name", lineNumber) + && checkNumParams(parameters, 2, "name", lineNumber)) { + if (nameBuilder == null) { + nameBuilder = SimplePersonName.builder(); + } + + String fieldName = parameters[0].trim(); + String fieldValue = parameters[1].trim(); + + if (fieldName.equals("locale")) { + nameBuilder.setLocale(Locale.forLanguageTag(fieldValue.replace("_", "-"))); + } else { + String[] fieldNamePieces = fieldName.split("-"); + PersonName.NameField nameField = PersonName.NameField.forString(fieldNamePieces[0]); + List fieldModifiers = new ArrayList<>(); + for (int i = 1; i < fieldNamePieces.length; i++) { + fieldModifiers.add(PersonName.FieldModifier.forString(fieldNamePieces[i])); + } + nameBuilder.addField(nameField, fieldModifiers, fieldValue); + } + } + } + + private void processExpectedResultLine(String[] parameters, int lineNumber) { + if (checkState(name != null || nameBuilder != null, "expectedResult", lineNumber) + && checkNumParams(parameters, 1, "expectedResult", lineNumber)) { + if (name == null) { + name = nameBuilder.build(); + nameBuilder = null; + } + expectedResult = parameters[0].trim(); + } + } + + private void processParametersLine(String[] parameters, int lineNumber) { + if (checkState(name != null && expectedResult != null, "parameters", lineNumber) + && checkNumParams(parameters, 4, "parameters", lineNumber)) { + String optionsStr = parameters[0].trim(); + String lengthStr = parameters[1].trim(); + String usageStr = parameters[2].trim(); + String formalityStr = parameters[3].trim(); + + PersonNameFormatter.Builder builder = PersonNameFormatter.builder(); + builder.setLocale(formatterLocale); + if (optionsStr.equals("sorting")) { + builder.setDisplayOrder(PersonNameFormatter.DisplayOrder.SORTING); + } + builder.setLength(PersonNameFormatter.Length.valueOf(lengthStr.toUpperCase())); + builder.setUsage(PersonNameFormatter.Usage.valueOf(usageStr.toUpperCase())); + builder.setFormality(PersonNameFormatter.Formality.valueOf(formalityStr.toUpperCase())); + + PersonNameFormatter formatter = builder.build(); + String actualResult = formatter.formatToString(name); + + checkResult(actualResult, lineNumber); + } + } + + private void processEndNameLine() { + name = null; + expectedResult = null; + nameBuilder = null; + } + + private boolean checkNumParams(String[] parameters, int expectedLength, String opcode, int lineNumber) { + boolean result = parameters.length == expectedLength; + if (!result) { + reportError("'" + opcode + "' line doesn't have " + expectedLength + " parameters", lineNumber); + } + return result; + } + + private boolean checkState(boolean state, String opcode, int lineNumber) { + if (!state) { + reportError("Misplaced '" + opcode + "' line", lineNumber); + } + return state; + } + + private boolean checkResult(String actualResult, int lineNumber) { + boolean result = expectedResult.equals(actualResult); + if (!result) { + reportError("Expected '" + expectedResult + "', got '" + actualResult + "'", lineNumber); + } + return result; + } + + private void reportError(String error, int lineNumber) { + System.out.println(" " + error + " at line " + lineNumber); + ++errorCount; + } + } +} diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java index d4ce68ab9b00..234e7d7ff226 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/PersonNameFormatterTest.java @@ -218,6 +218,15 @@ public void TestPrefixCore() { { "en_US", "LONG", "MONOGRAM", "FORMAL", "DEFAULT", "", "WVDP" }, { "en_US", "LONG", "MONOGRAM", "INFORMAL", "DEFAULT", "", "WVDP" }, }), + new NameAndTestCases("locale=en_US,given=John,surname-core=Smith", new String[][] { + // if the PersonName object just fills in the "surname-core" field, treat it as the "surname" field + { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "John Smith" }, + { "en_US", "LONG", "REFERRING", "INFORMAL", "DEFAULT", "", "John Smith" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "DEFAULT", "", "John Smith" }, + { "en_US", "MEDIUM", "REFERRING", "INFORMAL", "DEFAULT", "", "John Smith" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "DEFAULT", "", "J. Smith" }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "DEFAULT", "", "John S." }, + }), }, false); } @@ -226,8 +235,8 @@ public void TestInitialGeneration() { executeTestCases(new NameAndTestCases[]{ new NameAndTestCases("locale=en_US,given=George,given2=Herbert Walker,surname=Bush", new String[][] { { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "George Herbert Walker Bush" }, - { "en_US", "MEDIUM", "REFERRING", "FORMAL", "DEFAULT", "", "George H. W. Bush" }, - { "en_US", "SHORT", "REFERRING", "FORMAL", "DEFAULT", "", "G. H. W. Bush" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "DEFAULT", "", "George H.W. Bush" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "DEFAULT", "", "G.H.W. Bush" }, { "en_US", "SHORT", "REFERRING", "INFORMAL", "DEFAULT", "", "George B." }, { "en_US", "LONG", "MONOGRAM", "FORMAL", "DEFAULT", "", "GHB" }, { "en_US", "LONG", "MONOGRAM", "INFORMAL", "DEFAULT", "", "GB" }, @@ -236,19 +245,19 @@ public void TestInitialGeneration() { { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "Ralph Vaughan Williams" }, { "en_US", "MEDIUM", "REFERRING", "FORMAL", "DEFAULT", "", "Ralph Vaughan Williams" }, { "en_US", "SHORT", "REFERRING", "FORMAL", "DEFAULT", "", "R. Vaughan Williams" }, - { "en_US", "SHORT", "REFERRING", "INFORMAL", "DEFAULT", "", "Ralph V. W." }, + { "en_US", "SHORT", "REFERRING", "INFORMAL", "DEFAULT", "", "Ralph V.W." }, { "en_US", "LONG", "MONOGRAM", "FORMAL", "DEFAULT", "", "RV" }, { "en_US", "LONG", "MONOGRAM", "INFORMAL", "DEFAULT", "", "RV" }, }), new NameAndTestCases("locale=en_US,given=John Paul,given2=Stephen David George,surname=Smith", new String[][] { { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "John Paul Stephen David George Smith" }, - { "en_US", "MEDIUM", "REFERRING", "FORMAL", "DEFAULT", "", "John Paul S. D. G. Smith" }, - { "en_US", "SHORT", "REFERRING", "FORMAL", "DEFAULT", "", "J. P. S. D. G. Smith" }, + { "en_US", "MEDIUM", "REFERRING", "FORMAL", "DEFAULT", "", "John Paul S.D.G. Smith" }, + { "en_US", "SHORT", "REFERRING", "FORMAL", "DEFAULT", "", "J.P.S.D.G. Smith" }, { "en_US", "SHORT", "REFERRING", "INFORMAL", "DEFAULT", "", "John Paul S." }, { "en_US", "LONG", "MONOGRAM", "FORMAL", "DEFAULT", "", "JSS" }, { "en_US", "LONG", "MONOGRAM", "INFORMAL", "DEFAULT", "", "JS" }, }), - }, true); + }, false); } @Test @@ -299,8 +308,8 @@ public void TestMultiplePatterns() { public void TestNameOrder() { executeTestCases(new NameAndTestCases[]{ // the name's locale is used to determine the field order. For the English name formatter, if the - // name is English, the order is GN first. If it's Japanese, it's SN first. This is true whether the - // Japanese name is written in Latin letters or Han characters + // name is English, the order is GN first. If it's Japanese, it's SN first. And if the name is written + // in Japanese characters, we just use the Japanese formatter. new NameAndTestCases("locale=en_US,given=Shinzo,surname=Abe", new String[][] { { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "Shinzo Abe" }, }), @@ -308,7 +317,7 @@ public void TestNameOrder() { { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "Abe Shinzo" }, }), new NameAndTestCases("locale=ja_JP,given=晋三,surname=安倍", new String[][] { - { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "安倍 晋三" }, + { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "安倍晋三" }, }), // the name can also declare its order directly, with the optional "preferredOrder" field. If it does this, @@ -342,35 +351,48 @@ public void TestCapitalizedSurname() { @Test public void TestNameSpacing() { executeTestCases(new NameAndTestCases[]{ - // if the formatter locale uses spaces, the result will use its formats (complete with spaces), - // regardless of locale + // if the name uses the same characters as the formatter locale, even if the name locale doesn't + // match (i.e., the name is transliterated), we use the formatter's format and the name's + // field order new NameAndTestCases("locale=ja_JP,given=Hayao,surname=Miyazaki", new String[][] { { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "Miyazaki Hayao" }, }), - new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] { - { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎 駿" }, - }), - // if the formatter locale doesn't use spaces and the name's locale doesn't either, just use - // the native formatter + // if the name is in a script the formatter's locale doesn't use, we just use a formatter for + // whatever the name locale is new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] { - { "ja_JP", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎駿" }, - { "zh_CN", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎 駿" }, + { "en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎駿" }, }), - - // if the formatter locale doesn't use spaces and the name's locale does, use the name locale's formatter, - // but if the name is still using the formatter locale's script, use the native formatter's - // "foreign space replacement" character instead of spaces new NameAndTestCases("locale=en_US,given=Albert,surname=Einstein", new String[][] { { "ja_JP", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "Albert Einstein" }, { "zh_CN", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "Albert Einstein" }, }), + + // if the name is in a script the formatter's locale does use, we use it, but if the name locale's + // language doesn't match the formatter locale's language, we replace any spaces in the result + // with the foreignSpaceReplacement character new NameAndTestCases("locale=en_US,given=アルベルト,surname=アインシュタイン", new String[][] { { "ja_JP", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "アルベルト・アインシュタイン" }, }), new NameAndTestCases("locale=en_US,given=阿尔伯特,surname=爱因斯坦", new String[][] { { "zh_CN", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "阿尔伯特·爱因斯坦" }, }), + + // if the name's script and locale both match the formatter, we format as normal, but replace + // any spaces in the result with the nativeSpaceReplacement character (which, for Japanese, + // is the empty string, giving us the name without spaces) + new NameAndTestCases("locale=ja_JP,given=駿,surname=宮崎", new String[][] { + { "ja_JP", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎駿" }, + { "zh_CN", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎駿" }, + }), + // (Thai, despite not using spaces between words, DOES use spaces between the given name and surname_ + new NameAndTestCases("locale=th_TH,given=ไอริณ,surname=กล้าหาญ", new String[][] { + { "th_TH", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "ไอริณ กล้าหาญ" }, + }), + // (Lao, on the other hand, does NOT put a space between the given name and surname) + new NameAndTestCases("locale=lo_LA,given=ໄອຣີນ,surname=ແອດເລີ", new String[][] { + { "lo_LA", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "ໄອຣີນແອດເລີ" }, + }), }, false); } @@ -379,16 +401,38 @@ public void TestScriptGuessing() { executeTestCases(new NameAndTestCases[]{ // here, we're leaving out the locale on the name object. In the first case, we // see the Latin letters and assume English, giving us GN-first ordering. In the - // second, we see the Han characters and guess Japanese, giving us SN-first ordering. + // second, we see the Han characters and guess Japanese, giving us SN-first ordering + // (and the Japanese format with no space between the fields). new NameAndTestCases("given=Hayao,surname=Miyazaki", new String[][]{ {"en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "Hayao Miyazaki"}, }), new NameAndTestCases("given=駿,surname=宮崎", new String[][]{ - {"en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎 駿"}, + {"en_US", "LONG", "REFERRING", "FORMAL", "DEFAULT", "", "宮崎駿"}, }), }, false); } + @Test + public void TestMissingSurname() { + executeTestCases(new NameAndTestCases[]{ + // test handling of monomyns: names that only have a given name. Formatting patterns that only + // use the surname field will display as empty (or, in some of the examples below, with just + // the title) unless we do something special. The special thing we do is that when the pattern + // has no given-name field and the name object has no surname field, we behave as though the + // contents of the given-name field are in the surname field. (Note that this only happens + // for the "given" and "surname" fields; "given2" and "surname2" don't have this logic.) + new NameAndTestCases("title=Ms.,given=Zendaya", new String[][]{ + {"en_US", "MEDIUM", "ADDRESSING", "FORMAL", "DEFAULT", "", "Ms. Zendaya"}, + {"en_US", "SHORT", "ADDRESSING", "FORMAL", "DEFAULT", "", "Ms. Zendaya"}, + {"en_US", "MEDIUM", "ADDRESSING", "INFORMAL", "DEFAULT", "", "Zendaya"}, + {"en_US", "SHORT", "ADDRESSING", "INFORMAL", "DEFAULT", "", "Zendaya"}, + {"en_US", "SHORT", "MONOGRAM", "FORMAL", "DEFAULT", "", "Z"}, + {"en_US", "SHORT", "REFERRING", "FORMAL", "DEFAULT", "", "Zendaya"}, + {"en_US", "SHORT", "REFERRING", "FORMAL", "SORTING", "", "Zendaya"}, + }), + }, false); + } + @Test public void TestLiteralTextElision2() { // a more extensive text of the literal text elision logic