gstrfuncs: Add replacement for string-to-number functions

Very often when we want to convert a string to number, we assume that
the string contains only a number. We have g_ascii_strto* family of
functions to do the conversion but they are awkward to use - one has
to check if errno is zero, end_ptr is not NULL and *end_ptr points to
the terminating nul and then do the bounds checking. Many projects
need this kind of functionality, so it gets reimplemented all the
time.

This commit adds some replacement functions that convert a string to a
signed or unsigned number that also follows the usual way of error
reporting - returning FALSE on failure and filling an error output
parameter.
This commit is contained in:
Krzesimir Nowak 2017-04-27 12:53:51 +02:00
parent 58ecc57ca7
commit 4fe89b0437
4 changed files with 486 additions and 0 deletions

View File

@ -1475,6 +1475,15 @@ g_ascii_dtostr
g_ascii_formatd
g_strtod
<SUBSECTION>
GNumberParserError
G_NUMBER_PARSER_ERROR
g_ascii_string_to_signed
g_ascii_string_to_unsigned
<SUBSECTION Private>
g_number_parser_error_quark
<SUBSECTION>
g_strchug
g_strchomp

View File

@ -3165,3 +3165,213 @@ g_strv_contains (const gchar * const *strv,
return FALSE;
}
static gboolean
str_has_sign (const gchar *str)
{
return str[0] == '-' || str[0] == '+';
}
static gboolean
str_has_hex_prefix (const gchar *str)
{
return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
}
/**
* g_ascii_string_to_signed:
* @str: a string
* @base: base of a parsed number
* @min: a lower bound (inclusive)
* @max: an upper bound (inclusive)
* @out_num: (out) (optional): a return location for a number
* @error: a return location for #GError
*
* A convenience function for converting a string to a signed number.
*
* This function assumes that @str contains only a number of the given
* @base that is within inclusive bounds limited by @min and @max. If
* this is true, then the converted number is stored in @out_num. An
* empty string is not a valid input. A string with leading or
* trailing whitespace is also an invalid input.
*
* @base can be between 2 and 36 inclusive. Hexadecimal numbers must
* not be prefixed with "0x" or "0X". Such a problem does not exist
* for octal numbers, since they were usually prefixed with a zero
* which does not change the value of the parsed number.
*
* Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
* domain. If the input is invalid, the error code will be
* %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
* bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
*
* See g_ascii_strtoll() if you have more complex needs such as
* parsing a string which starts with a number, but then has other
* characters.
*
* Returns: %TRUE if @str was a number, otherwise %FALSE.
*
* Since: 2.54
*/
gboolean
g_ascii_string_to_signed (const gchar *str,
guint base,
gint64 min,
gint64 max,
gint64 *out_num,
GError **error)
{
gint64 number;
const gchar *end_ptr = NULL;
gint saved_errno = 0;
g_return_val_if_fail (str != NULL, FALSE);
g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
g_return_val_if_fail (min <= max, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
if (str[0] == '\0')
{
g_set_error_literal (error,
G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
_("Empty string is not a number"));
return FALSE;
}
errno = 0;
number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
saved_errno = errno;
if (/* We do not allow leading whitespace, but g_ascii_strtoll
* accepts it and just skips it, so we need to check for it
* ourselves.
*/
g_ascii_isspace (str[0]) ||
/* We don't support hexadecimal numbers prefixed with 0x or
* 0X.
*/
(base == 16 &&
(str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
(saved_errno != 0 && saved_errno != ERANGE) ||
end_ptr == NULL ||
*end_ptr != '\0')
{
g_set_error (error,
G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
_("“%s” is not a signed number"), str);
return FALSE;
}
if (saved_errno == ERANGE || number < min || number > max)
{
g_set_error (error,
G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
_("Number “%" G_GINT64_FORMAT "” is out of bounds"
" [%" G_GINT64_FORMAT ", %" G_GINT64_FORMAT "]"),
number, min, max);
return FALSE;
}
if (out_num != NULL)
*out_num = number;
return TRUE;
}
/**
* g_ascii_string_to_unsigned:
* @str: a string
* @base: base of a parsed number
* @min: a lower bound (inclusive)
* @max: an upper bound (inclusive)
* @out_num: (out) (optional): a return location for a number
* @error: a return location for #GError
*
* A convenience function for converting a string to an unsigned number.
*
* This function assumes that @str contains only a number of the given
* @base that is within inclusive bounds limited by @min and @max. If
* this is true, then the converted number is stored in @out_num. An
* empty string is not a valid input. A string with leading or
* trailing whitespace is also an invalid input.
*
* @base can be between 2 and 36 inclusive. Hexadecimal numbers must
* not be prefixed with "0x" or "0X". Such a problem does not exist
* for octal numbers, since they were usually prefixed with a zero
* which does not change the value of the parsed number.
*
* Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
* domain. If the input is invalid, the error code will be
* %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
* bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
*
* See g_ascii_strtoull() if you have more complex needs such as
* parsing a string which starts with a number, but then has other
* characters.
*
* Returns: %TRUE if @str was a number, otherwise %FALSE.
*
* Since: 2.54
*/
gboolean
g_ascii_string_to_unsigned (const gchar *str,
guint base,
guint64 min,
guint64 max,
guint64 *out_num,
GError **error)
{
guint64 number;
const gchar *end_ptr = NULL;
gint saved_errno = 0;
g_return_val_if_fail (str != NULL, FALSE);
g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
g_return_val_if_fail (min <= max, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
if (str[0] == '\0')
{
g_set_error_literal (error,
G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
_("Empty string is not a number"));
return FALSE;
}
errno = 0;
number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
saved_errno = errno;
if (/* We do not allow leading whitespace, but g_ascii_strtoull
* accepts it and just skips it, so we need to check for it
* ourselves.
*/
g_ascii_isspace (str[0]) ||
/* Unsigned number should have no sign.
*/
str_has_sign (str) ||
/* We don't support hexadecimal numbers prefixed with 0x or
* 0X.
*/
(base == 16 && str_has_hex_prefix (str)) ||
(saved_errno != 0 && saved_errno != ERANGE) ||
end_ptr == NULL ||
*end_ptr != '\0')
{
g_set_error (error,
G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
_("“%s” is not an unsigned number"), str);
return FALSE;
}
if (saved_errno == ERANGE || number < min || number > max)
{
g_set_error (error,
G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
_("Number “%" G_GUINT64_FORMAT "” is out of bounds"
" [%" G_GUINT64_FORMAT ", %" G_GUINT64_FORMAT "]"),
number, min, max);
return FALSE;
}
if (out_num != NULL)
*out_num = number;
return TRUE;
}
G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)

View File

@ -32,6 +32,7 @@
#include <stdarg.h>
#include <glib/gmacros.h>
#include <glib/gtypes.h>
#include <glib/gerror.h>
G_BEGIN_DECLS
@ -306,6 +307,52 @@ GLIB_AVAILABLE_IN_2_44
gboolean g_strv_contains (const gchar * const *strv,
const gchar *str);
/* Convenience ASCII string to number API */
/**
* GNumberParserError:
* @G_NUMBER_PARSER_ERROR_INVALID: String was not a valid number.
* @G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS: String was a number, but out of bounds.
*
* Error codes returned by functions converting a string to a number.
*
* Since: 2.54
*/
typedef enum
{
G_NUMBER_PARSER_ERROR_INVALID,
G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
} GNumberParserError;
/**
* G_NUMBER_PARSER_ERROR:
*
* Domain for errors returned by functions converting a string to a
* number.
*
* Since: 2.54
*/
#define G_NUMBER_PARSER_ERROR (g_number_parser_error_quark ())
GLIB_AVAILABLE_IN_2_54
GQuark g_number_parser_error_quark (void);
GLIB_AVAILABLE_IN_2_54
gboolean g_ascii_string_to_signed (const gchar *str,
guint base,
gint64 min,
gint64 max,
gint64 *out_num,
GError **error);
GLIB_AVAILABLE_IN_2_54
gboolean g_ascii_string_to_unsigned (const gchar *str,
guint base,
guint64 min,
guint64 max,
guint64 *out_num,
GError **error);
G_END_DECLS
#endif /* __G_STRFUNCS_H__ */

View File

@ -1501,6 +1501,224 @@ test_strv_contains (void)
g_assert_false (g_strv_contains (strv_empty, ""));
}
typedef enum
{
SIGNED,
UNSIGNED
} SignType;
typedef struct
{
const gchar *str;
SignType sign_type;
guint base;
gint min;
gint max;
gint expected;
gboolean should_fail;
GNumberParserError error_code;
} TestData;
const TestData test_data[] = {
/* typical cases for signed */
{ "0", SIGNED, 10, -2, 2, 0, FALSE, 0 },
{ "+0", SIGNED, 10, -2, 2, 0, FALSE, 0 },
{ "-0", SIGNED, 10, -2, 2, 0, FALSE, 0 },
{ "-2", SIGNED, 10, -2, 2, -2, FALSE, 0 },
{ "2", SIGNED, 10, -2, 2, 2, FALSE, 0 },
{ "+2", SIGNED, 10, -2, 2, 2, FALSE, 0 },
{ "3", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
{ "+3", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
{ "-3", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
/* typical cases for unsigned */
{ "-1", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "1", UNSIGNED, 10, 0, 2, 1, FALSE, 0 },
{ "+1", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "0", UNSIGNED, 10, 0, 2, 0, FALSE, 0 },
{ "+0", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "-0", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "2", UNSIGNED, 10, 0, 2, 2, FALSE, 0 },
{ "+2", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "3", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
{ "+3", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
/* min == max cases for signed */
{ "-2", SIGNED, 10, -2, -2, -2, FALSE, 0 },
{ "-1", SIGNED, 10, -2, -2, 0, TRUE, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
{ "-3", SIGNED, 10, -2, -2, 0, TRUE, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
/* min == max cases for unsigned */
{ "2", UNSIGNED, 10, 2, 2, 2, FALSE, 0 },
{ "3", UNSIGNED, 10, 2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
{ "1", UNSIGNED, 10, 2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS },
/* invalid inputs */
{ "", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "a", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "a", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "1a", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "1a", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "- 1", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
/* leading/trailing whitespace */
{ " 1", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ " 1", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "1 ", SIGNED, 10, -2, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "1 ", UNSIGNED, 10, 0, 2, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
/* hexadecimal numbers */
{ "a", SIGNED, 16, 0, 15, 10, FALSE, 0 },
{ "a", UNSIGNED, 16, 0, 15, 10, FALSE, 0 },
{ "0xa", SIGNED, 16, 0, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "0xa", UNSIGNED, 16, 0, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "-0xa", SIGNED, 16, -15, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "-0xa", UNSIGNED, 16, 0, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "+0xa", SIGNED, 16, 0, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "+0xa", UNSIGNED, 16, 0, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "- 0xa", SIGNED, 16, -15, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "- 0xa", UNSIGNED, 16, 0, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "+ 0xa", SIGNED, 16, -15, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
{ "+ 0xa", UNSIGNED, 16, 0, 15, 0, TRUE, G_NUMBER_PARSER_ERROR_INVALID },
};
static void
test_ascii_string_to_number_usual (void)
{
gsize idx;
for (idx = 0; idx < G_N_ELEMENTS (test_data); ++idx)
{
GError *error = NULL;
const TestData *data = &test_data[idx];
gboolean result;
gint value;
switch (data->sign_type)
{
case SIGNED:
{
gint64 value64 = 0;
result = g_ascii_string_to_signed (data->str,
data->base,
data->min,
data->max,
&value64,
&error);
value = value64;
g_assert_cmpint (value, ==, value64);
break;
}
case UNSIGNED:
{
guint64 value64 = 0;
result = g_ascii_string_to_unsigned (data->str,
data->base,
data->min,
data->max,
&value64,
&error);
value = value64;
g_assert_cmpint (value, ==, value64);
break;
}
default:
g_assert_not_reached ();
}
if (data->should_fail)
{
g_assert_false (result);
g_assert_error (error, G_NUMBER_PARSER_ERROR, data->error_code);
g_clear_error (&error);
}
else
{
g_assert_true (result);
g_assert_no_error (error);
g_assert_cmpint (value, ==, data->expected);
}
}
}
static void
test_ascii_string_to_number_pathological (void)
{
GError *error = NULL;
const gchar *crazy_high = "999999999999999999999999999999999999";
const gchar *crazy_low = "-999999999999999999999999999999999999";
const gchar *max_uint64 = "18446744073709551615";
const gchar *max_int64 = "9223372036854775807";
const gchar *min_int64 = "-9223372036854775808";
guint64 uvalue = 0;
gint64 svalue = 0;
g_assert_false (g_ascii_string_to_unsigned (crazy_high,
10,
0,
G_MAXUINT64,
NULL,
&error));
g_assert_error (error, G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS);
g_clear_error (&error);
g_assert_false (g_ascii_string_to_unsigned (crazy_low,
10,
0,
G_MAXUINT64,
NULL,
&error));
// crazy_low is a signed number so it is not a valid unsigned number
g_assert_error (error, G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID);
g_clear_error (&error);
g_assert_false (g_ascii_string_to_signed (crazy_high,
10,
G_MININT64,
G_MAXINT64,
NULL,
&error));
g_assert_error (error, G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS);
g_clear_error (&error);
g_assert_false (g_ascii_string_to_signed (crazy_low,
10,
G_MININT64,
G_MAXINT64,
NULL,
&error));
g_assert_error (error, G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS);
g_clear_error (&error);
g_assert_true (g_ascii_string_to_unsigned (max_uint64,
10,
0,
G_MAXUINT64,
&uvalue,
&error));
g_assert_no_error (error);
g_assert_cmpint (uvalue, ==, G_MAXUINT64);
g_assert_true (g_ascii_string_to_signed (max_int64,
10,
G_MININT64,
G_MAXINT64,
&svalue,
&error));
g_assert_no_error (error);
g_assert_cmpint (svalue, ==, G_MAXINT64);
g_assert_true (g_ascii_string_to_signed (min_int64,
10,
G_MININT64,
G_MAXINT64,
&svalue,
&error));
g_assert_no_error (error);
g_assert_cmpint (svalue, ==, G_MININT64);
}
int
main (int argc,
char *argv[])
@ -1537,6 +1755,8 @@ main (int argc,
g_test_add_func ("/strfuncs/strup", test_strup);
g_test_add_func ("/strfuncs/transliteration", test_transliteration);
g_test_add_func ("/strfuncs/strv-contains", test_strv_contains);
g_test_add_func ("/strfuncs/ascii-string-to-num/usual", test_ascii_string_to_number_usual);
g_test_add_func ("/strfuncs/ascii-string-to-num/pathological", test_ascii_string_to_number_pathological);
return g_test_run();
}