glib-compile-schemas: get strict about whitespace

It's important to have strict rules for handling of whitespace in translated strings in GSettings schema files so that the tools extracting the messages will end up with the same messages as the runtime calling gettext(). The rules are designed to be simple and unambiguous yet cover most normal uses in a convenient way. Those rules are as follows (with rationale): - for <default> tags, the text content has its leading and trailing whitespace stripped off, but internal whitespace is not modified in any way. This allows for slightly more flexible use of whitespace without causing that whitespace to appear in the strings for translation. - for <summary> and <description> tags, the content is split into paragraphs. Paragraphs are separated by two or more sequential newline characters. Each paragraph has its leading and trailing whitespace removed and all other whitespace is normalised to a single ascii space character. Finally, the paragraphs are rejoined, inserting exactly two newlines between them. This allows for longer explanations (particularly in the description tag) using a natural format that, when normalised, will display nicely in toolkits. This patch implements the rules for <default> tags. The schema compiler currently ignores <summary> and <description> tags.
2025-02-04 02:06:18 +01:00 · 2012-01-28 01:09:08 +01:00 · 2012-01-28 01:09:08 +01:00 · 9a7a98bf3b
commit 9a7a98bf3b
parent 37af8167b6
1 changed files with 63 additions and 13 deletions
--- a/gio/glib-compile-schemas.c
+++ b/gio/glib-compile-schemas.c
@ -36,6 +36,22 @@
 #include "gvdb/gvdb-builder.h"
 #include "strinfo.c"

+static void
+strip_string (GString *string)
+{
+  gint i;
+
+  for (i = 0; g_ascii_isspace (string->str[i]); i++);
+  g_string_erase (string, 0, i);
+
+  if (string->len > 0)
+    {
+      /* len > 0, so there must be at least one non-whitespace character */
+      for (i = string->len - 1; g_ascii_isspace (string->str[i]); i--);
+      g_string_truncate (string, i + 1);
+    }
+}
+
 /* Handling of <enum> {{{1 */
 typedef struct
 {
@ -629,6 +645,23 @@ key_state_serialise (KeyState *state)
          /* translation */
          if (state->l10n)
            {
+              /* We are going to store the untranslated default for
+               * runtime translation according to the current locale.
+               * We need to strip leading and trailing whitespace from
+               * the string so that it's exactly the same as the one
+               * that ended up in the .po file for translation.
+               *
+               * We want to do this so that
+               *
+               *   <default l10n='messages'>
+               *     ['a', 'b', 'c']
+               *   </default>
+               *
+               * ends up in the .po file like "['a', 'b', 'c']",
+               * omitting the extra whitespace at the start and end.
+               */
+              strip_string (state->unparsed_default_value);
+
              if (state->l10n_context)
                {
                  gint len;
@ -1498,21 +1531,38 @@ text (GMarkupParseContext  *context,
      GError              **error)
 {
  ParseState *state = user_data;
-  gsize i;

-  for (i = 0; i < text_len; i++)
-    if (!g_ascii_isspace (text[i]))
-      {
-        if (state->string)
-          g_string_append_len (state->string, text, text_len);
+  if (state->string)
+    {
+      /* we are expecting a string, so store the text data.
+       *
+       * we store the data verbatim here and deal with whitespace
+       * later on.  there are two reasons for that:
+       *
+       *  1) whitespace is handled differently depending on the tag
+       *     type.
+       *
+       *  2) we could do leading whitespace removal by refusing to
+       *     insert it into state->string if it's at the start, but for
+       *     trailing whitespace, we have no idea if there is another
+       *     text() call coming or not.
+       */
+      g_string_append_len (state->string, text, text_len);
+    }
+  else
+    {
+      /* string is not expected: accept (and ignore) pure whitespace */
+      gsize i;

-        else
-          g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT,
-                       _("text may not appear inside <%s>"),
-                       g_markup_parse_context_get_element (context));
-
-        break;
-      }
+      for (i = 0; i < text_len; i++)
+        if (!g_ascii_isspace (text[i]))
+          {
+            g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_INVALID_CONTENT,
+                         _("text may not appear inside <%s>"),
+                         g_markup_parse_context_get_element (context));
+            break;
+          }
+    }
 }

 /* Write to GVDB {{{1 */