From e8f31c43dba4276c69f9a9b4a0db366eb10ed2ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= Date: Thu, 15 Sep 2022 00:32:57 +0200 Subject: [PATCH] tools/update-unicode-data.sh: Add a script to handle the unicode updates We need to perform multiple actions on unicode updates, so let's handle all the commands in a single script so that we don't have to remember all the details each time. --- tools/update-unicode-data.sh | 38 ++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100755 tools/update-unicode-data.sh diff --git a/tools/update-unicode-data.sh b/tools/update-unicode-data.sh new file mode 100755 index 000000000..2d751db15 --- /dev/null +++ b/tools/update-unicode-data.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +if [ ! -d "$1" ]; then + echo "Usage $(basename "$0") UCD-directory [version]" + exit 1 +fi + +ucd=$(realpath "$1") +version=$2 +glib_dir=$(git -C "$(dirname "$0")" rev-parse --show-toplevel) + +# shellcheck disable=SC2144 # we only want to match a file like this +if ! [ -f "$ucd"/UnicodeData*.txt ] || ! [ -f "$ucd"/CaseFolding.*txt ]; then + echo "'$ucd' does not look like an Unicode Database directory"; +fi + +if [ -z "$version" ]; then + readme=("$ucd"/ReadMe*.txt) + version=$(sed -n "s,.*Version \([0-9.]\+\) of the Unicode Standard.*,\1,p" \ + "${readme[@]}") + + if [ -z "$version" ]; then + echo "Invalid version found" + exit 1 + fi +fi + +cd "$glib_dir" || exit 1 + +echo "Updating generated code to Unicode version $version" +set -xe + +(cd glib && ./gen-unicode-tables.pl -both "$version" "$ucd") +glib/tests/gen-casefold-txt.py "$version" \ + "$ucd"/CaseFolding*.txt > glib/tests/casefold.txt +glib/tests/gen-casemap-txt.py "$version" \ + "$ucd"/UnicodeData*.txt \ + "$ucd"/SpecialCasing*.txt > glib/tests/casemap.txt