From dc2491d2243aad6e0ad86b9888cc79458b475f25 Mon Sep 17 00:00:00 2001 From: Philip Withnall Date: Mon, 21 Oct 2024 16:57:46 +0100 Subject: [PATCH] gen-unicode-tables.pl: Add more error checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We’re essentially trying to build a minimal perfect hash function, and `vals` is the map which represents that function. If we redefine a member of `vals`, the map is no longer a partial function — one input value (a Unicode codepoint) has two output values (compose table indices). So it’s bad if a member of `vals` gets redefined, and we want to be notified if that happens. As it happens, some of the new codepoints in Unicode 16.0 cause these checks to fail. For example, U+16121 Gurung Khema Vowel Sign U decomposes to U+1611E U+1611E. This causes `vals{U+1611E}` to be defined to an index from the `first` map, and then redefined to an index from the `second` map. The following few commits will fix this, but let’s get the checks in first. Signed-off-by: Philip Withnall --- glib/gen-unicode-tables.pl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/glib/gen-unicode-tables.pl b/glib/gen-unicode-tables.pl index 18c1995d5..645ed67f1 100755 --- a/glib/gen-unicode-tables.pl +++ b/glib/gen-unicode-tables.pl @@ -1354,12 +1354,18 @@ sub output_composition_table printf OUT "#define COMPOSE_FIRST_SINGLE_START %d\n", $total; for $record (@first_singletons) { my $code = $record->[0]; + if (defined $vals{$code}) { + die "redefining $code as first-singleton"; + } $vals{$code} = $i++ + $total; $last = $code if $code > $last; } $total += @first_singletons; printf OUT "#define COMPOSE_SECOND_START %d\n", $total; for $code (keys %second) { + if (defined $vals{$code}) { + die "redefining $code as second"; + } $vals{$code} = $second{$code} + $total; $last = $code if $code > $last; } @@ -1368,6 +1374,9 @@ sub output_composition_table printf OUT "#define COMPOSE_SECOND_SINGLE_START %d\n\n", $total; for $record (@second_singletons) { my $code = $record->[0]; + if (defined $vals{$code}) { + die "redefining $code as second-singleton"; + } $vals{$code} = $i++ + $total; $last = $code if $code > $last; }