Skip to content

Commit

Permalink
maint: avoid duplicated boolean properties and bad script extensions (#…
Browse files Browse the repository at this point in the history
…202)

`ucptest` was misbehaving and showing the wrong properties and
finding the wrong characters.
  • Loading branch information
carenas committed Feb 3, 2023
1 parent 4678857 commit 9c905ce
Show file tree
Hide file tree
Showing 8 changed files with 634 additions and 782 deletions.
7 changes: 4 additions & 3 deletions maint/GenerateCommon.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def getbpropslist():
if re.match(pat, bplast) != None:
break
else:
bplist.append(bplast)
if bplast not in bplist:
bplist.append(bplast)

file.close()

Expand Down Expand Up @@ -204,7 +205,7 @@ def collect_property_names():
if match_obj == None:
continue

if match_obj.group(2) in bool_properties:
if match_obj.group(2) != match_obj.group(1) and match_obj.group(2) in bool_properties:
if match_obj.group(3) == None:
abbreviations[match_obj.group(2)] = (match_obj.group(1),)
else:
Expand Down Expand Up @@ -294,7 +295,7 @@ def open_output(default):
try:
file = open(output_name, "w")
except IOError:
print ("** Couldn't open %s" % output_name)
print("** Couldn't open %s" % output_name)
sys.exit(1)

script_name = sys.argv[0]
Expand Down
2 changes: 1 addition & 1 deletion maint/GenerateTest26.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def to_string_char(ch_idx):
input_file = open(output_directory + "testinput26", "w")
output_file = open(output_directory + "testoutput26", "w")
except IOError:
print ("** Couldn't open output files")
print("** Couldn't open output files")
sys.exit(1)

write_both("# These tests are generated by maint/GenerateTest26.py, do not edit.\n\n")
Expand Down
2 changes: 1 addition & 1 deletion maint/GenerateUcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ def write_bitsets(list, item_size):
size = len(records) * record_size
stage1, stage2 = compress_table(table, block_size)
size += get_tables_size(stage1, stage2)
#print "/* block size %5d => %5d bytes */" % (block_size, size)
#print("/* block size {:3d} => {:5d} bytes */".format(block_size, size))
if size < min_size:
min_size = size
min_stage1, min_stage2 = stage1, stage2
Expand Down
2 changes: 1 addition & 1 deletion maint/GenerateUcpTables.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def stdnames(x):

for name in bool_properties:
utt_table.append((stdname(name), name, 'PT_BOOL'))
if name in abbreviations:
if name in abbreviations:
for abbrev in abbreviations[name]:
utt_table.append((stdname(abbrev), name, 'PT_BOOL'))

Expand Down
33 changes: 24 additions & 9 deletions maint/ucptest.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,7 @@ if (scriptx != 0)
if (bprops != 0)
{
const char *sep = "";
const uint32_t *p = PRIV(ucd_boolprop_sets) +
bprops * ucd_boolprop_sets_item_size;
const uint32_t *p = PRIV(ucd_boolprop_sets) + bprops;
printf(", [");
for (int i = 0; i < ucp_Bprop_Count; i++)
if (MAPBIT(p, i) != 0)
Expand Down Expand Up @@ -497,13 +496,13 @@ while (*s != 0)
if (strcmp(CS name, "script") == 0 ||
strcmp(CS name, "scriptx") == 0)
{
BOOL x = (name[6] == 'x');
BOOL scriptx_not = FALSE;
for (t = value; *t != 0; t++) *t = tolower(*t);

if (value[0] == '!')
{
if (name[6] == 'x') scriptx_not = TRUE;
else script_not = TRUE;
if (x) scriptx_not = TRUE; else script_not = TRUE;
offset = 1;
}

Expand All @@ -514,7 +513,21 @@ while (*s != 0)
PRIV(utt_names) + u->name_offset) == 0)
{
c = u->value;
if (name[6] == 'x')
if (x && !scriptx_not && u->type == PT_SC)
{
if (script < 0)
{
x = FALSE;
script = -1;
script_not = scriptx_not;
}
else if (!script_not)
{
printf("No characters found\n");
return;
}
}
if (x)
{
scriptx_list[scriptx_count++] = scriptx_not? (-c):c;
}
Expand Down Expand Up @@ -689,12 +702,15 @@ for (c = 0; c <= 0x10ffff; c++)
/* Positive requirment */
if (scriptx_list[i] >= 0)
{
if ((bits_scriptx[x] & (1u<<y)) != 0) found++;
if (scriptx_list[i] == UCD_SCRIPT(c) ||
((scriptx_list[i] < ucp_Unknown) &&
(bits_scriptx[x] & (1u<<y)) != 0)) found++;
}
/* Negative requirement */
else
{
if ((bits_scriptx[x] & (1u<<y)) == 0) found++;
if ((-(scriptx_list[i]) < ucp_Unknown) &&
(bits_scriptx[x] & (1u<<y)) == 0) found++;
}
}

Expand All @@ -703,8 +719,7 @@ for (c = 0; c <= 0x10ffff; c++)

if (bprop_count > 0)
{
const uint32_t *bits_bprop = PRIV(ucd_boolprop_sets) +
UCD_BPROPS(c) * ucd_boolprop_sets_item_size;
const uint32_t *bits_bprop = PRIV(ucd_boolprop_sets) + UCD_BPROPS(c);
unsigned int found = 0;

for (i = 0; i < bprop_count; i++)
Expand Down
1 change: 1 addition & 0 deletions maint/ucptestdata/testinput2
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
find script Han
find type Pe script Common scriptx Hangul
find script !latin scriptx sundanese
find type Sk
find type Pd
find gbreak LVT
Expand Down
652 changes: 326 additions & 326 deletions maint/ucptestdata/testoutput1

Large diffs are not rendered by default.

717 changes: 276 additions & 441 deletions maint/ucptestdata/testoutput2

Large diffs are not rendered by default.

0 comments on commit 9c905ce

Please sign in to comment.