mirror of
https://gitlab.gnome.org/GNOME/glib.git
synced 2024-11-14 21:36:13 +01:00
589 lines
16 KiB
Plaintext
589 lines
16 KiB
Plaintext
<!-- ##### SECTION Title ##### -->
|
|
Lexical Scanner
|
|
|
|
<!-- ##### SECTION Short_Description ##### -->
|
|
a general purpose lexical scanner.
|
|
|
|
<!-- ##### SECTION Long_Description ##### -->
|
|
<para>
|
|
The #GScanner and its associated functions provide a general purpose
|
|
lexical scanner.
|
|
</para>
|
|
<para>
|
|
FIXME: really needs an example and more detail, but I don't completely
|
|
understand it myself. Look at gtkrc.c for some code using the scanner.
|
|
</para>
|
|
|
|
<!-- ##### SECTION See_Also ##### -->
|
|
<para>
|
|
|
|
</para>
|
|
|
|
<!-- ##### SECTION Stability_Level ##### -->
|
|
|
|
|
|
<!-- ##### STRUCT GScanner ##### -->
|
|
<para>
|
|
The data structure representing a lexical scanner.
|
|
</para>
|
|
<para>
|
|
You should set <structfield>input_name</structfield> after creating
|
|
the scanner, since it is used by the default message handler when
|
|
displaying warnings and errors. If you are scanning a file, the file
|
|
name would be a good choice.
|
|
</para>
|
|
<para>
|
|
The <structfield>user_data</structfield> and
|
|
<structfield>max_parse_errors</structfield> fields are not used.
|
|
If you need to associate extra data with the scanner you can place them here.
|
|
</para>
|
|
<para>
|
|
If you want to use your own message handler you can set the
|
|
<structfield>msg_handler</structfield> field. The type of the message
|
|
handler function is declared by #GScannerMsgFunc.
|
|
</para>
|
|
|
|
@user_data:
|
|
@max_parse_errors:
|
|
@parse_errors:
|
|
@input_name:
|
|
@qdata:
|
|
@config:
|
|
@token:
|
|
@value:
|
|
@line:
|
|
@position:
|
|
@next_token:
|
|
@next_value:
|
|
@next_line:
|
|
@next_position:
|
|
@symbol_table:
|
|
@input_fd:
|
|
@text:
|
|
@text_end:
|
|
@buffer:
|
|
@scope_id:
|
|
@msg_handler:
|
|
|
|
<!-- ##### FUNCTION g_scanner_new ##### -->
|
|
<para>
|
|
Creates a new #GScanner.
|
|
The @config_templ structure specifies the initial settings of the scanner,
|
|
which are copied into the #GScanner <structfield>config</structfield> field.
|
|
If you pass %NULL then the default settings are used.
|
|
</para>
|
|
|
|
@config_templ: the initial scanner settings.
|
|
@Returns: the new #GScanner.
|
|
|
|
|
|
<!-- ##### STRUCT GScannerConfig ##### -->
|
|
<para>
|
|
Specifies the #GScanner settings.
|
|
</para>
|
|
<para>
|
|
<structfield>cset_skip_characters</structfield> specifies which characters
|
|
should be skipped by the scanner (the default is the whitespace characters:
|
|
space, tab, carriage-return and line-feed).
|
|
</para>
|
|
<para>
|
|
<structfield>cset_identifier_first</structfield> specifies the characters
|
|
which can start identifiers (the default is #G_CSET_a_2_z, "_", and
|
|
#G_CSET_A_2_Z).
|
|
</para>
|
|
<para>
|
|
<structfield>cset_identifier_nth</structfield> specifies the characters
|
|
which can be used in identifiers, after the first character (the default
|
|
is #G_CSET_a_2_z, "_0123456789", #G_CSET_A_2_Z, #G_CSET_LATINS,
|
|
#G_CSET_LATINC).
|
|
</para>
|
|
<para>
|
|
<structfield>cpair_comment_single</structfield> specifies the characters
|
|
at the start and end of single-line comments. The default is "#\n" which
|
|
means that single-line comments start with a '#' and continue until a '\n'
|
|
(end of line).
|
|
</para>
|
|
<para>
|
|
<structfield>case_sensitive</structfield> specifies if symbols are
|
|
case sensitive (the default is %FALSE).
|
|
</para>
|
|
<para>
|
|
<structfield>skip_comment_multi</structfield> specifies if multi-line
|
|
comments are skipped and not returned as tokens (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>skip_comment_single</structfield> specifies if single-line
|
|
comments are skipped and not returned as tokens (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_comment_multi</structfield> specifies if multi-line
|
|
comments are recognized (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_identifier</structfield> specifies if identifiers
|
|
are recognized (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_identifier_1char</structfield> specifies if single-character
|
|
identifiers are recognized (the default is %FALSE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_identifier_NULL</structfield> specifies if
|
|
<literal>NULL</literal> is reported as #G_TOKEN_IDENTIFIER_NULL.
|
|
(the default is %FALSE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_symbols</structfield> specifies if symbols are
|
|
recognized (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_binary</structfield> specifies if binary numbers
|
|
are recognized (the default is %FALSE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_octal</structfield> specifies if octal numbers
|
|
are recognized (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_float</structfield> specifies if floating point numbers
|
|
are recognized (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_hex</structfield> specifies if hexadecimal numbers
|
|
are recognized (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_hex_dollar</structfield> specifies if '$' is recognized
|
|
as a prefix for hexadecimal numbers (the default is %FALSE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_string_sq</structfield> specifies if strings can be
|
|
enclosed in single quotes (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>scan_string_dq</structfield> specifies if strings can be
|
|
enclosed in double quotes (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>numbers_2_int</structfield> specifies if binary, octal and
|
|
hexadecimal numbers are reported as #G_TOKEN_INT (the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>int_2_float</structfield> specifies if all numbers are
|
|
reported as #G_TOKEN_FLOAT (the default is %FALSE).
|
|
</para>
|
|
<para>
|
|
<structfield>identifier_2_string</structfield> specifies if identifiers
|
|
are reported as strings (the default is %FALSE).
|
|
</para>
|
|
<para>
|
|
<structfield>char_2_token</structfield> specifies if characters
|
|
are reported by setting <literal>token = ch</literal> or as #G_TOKEN_CHAR
|
|
(the default is %TRUE).
|
|
</para>
|
|
<para>
|
|
<structfield>symbol_2_token</structfield> specifies if symbols
|
|
are reported by setting <literal>token = v_symbol</literal> or as
|
|
#G_TOKEN_SYMBOL (the default is %FALSE).
|
|
</para>
|
|
<para>
|
|
<structfield>scope_0_fallback</structfield> specifies if a symbol
|
|
is searched for in the default scope in addition to the current scope
|
|
(the default is %FALSE).
|
|
</para>
|
|
|
|
@cset_skip_characters:
|
|
@cset_identifier_first:
|
|
@cset_identifier_nth:
|
|
@cpair_comment_single:
|
|
@case_sensitive:
|
|
@skip_comment_multi:
|
|
@skip_comment_single:
|
|
@scan_comment_multi:
|
|
@scan_identifier:
|
|
@scan_identifier_1char:
|
|
@scan_identifier_NULL:
|
|
@scan_symbols:
|
|
@scan_binary:
|
|
@scan_octal:
|
|
@scan_float:
|
|
@scan_hex:
|
|
@scan_hex_dollar:
|
|
@scan_string_sq:
|
|
@scan_string_dq:
|
|
@numbers_2_int:
|
|
@int_2_float:
|
|
@identifier_2_string:
|
|
@char_2_token:
|
|
@symbol_2_token:
|
|
@scope_0_fallback:
|
|
@store_int64:
|
|
@padding_dummy:
|
|
|
|
<!-- ##### FUNCTION g_scanner_input_file ##### -->
|
|
<para>
|
|
Prepares to scan a file.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@input_fd: a file descriptor.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_sync_file_offset ##### -->
|
|
<para>
|
|
Rewinds the filedescriptor to the current buffer position and blows
|
|
the file read ahead buffer. This is useful for third party uses of
|
|
the scanners filedescriptor, which hooks onto the current scanning
|
|
position.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_input_text ##### -->
|
|
<para>
|
|
Prepares to scan a text buffer.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@text: the text buffer to scan.
|
|
@text_len: the length of the text buffer.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_peek_next_token ##### -->
|
|
<para>
|
|
Gets the next token, without removing it from the input stream.
|
|
The token data is placed in the
|
|
<structfield>next_token</structfield>,
|
|
<structfield>next_value</structfield>,
|
|
<structfield>next_line</structfield>, and
|
|
<structfield>next_position</structfield> fields of the #GScanner structure.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Returns: the type of the token.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_get_next_token ##### -->
|
|
<para>
|
|
Gets the next token, removing it from the input stream.
|
|
The token data is placed in the
|
|
<structfield>token</structfield>,
|
|
<structfield>value</structfield>,
|
|
<structfield>line</structfield>, and
|
|
<structfield>position</structfield> fields of the #GScanner structure.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Returns: the type of the token.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_cur_line ##### -->
|
|
<para>
|
|
Gets the current line in the input stream (counting from 1).
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Returns: the current line.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_cur_position ##### -->
|
|
<para>
|
|
Gets the current position in the current line (counting from 0).
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Returns: the current position on the line.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_cur_token ##### -->
|
|
<para>
|
|
Gets the current token type.
|
|
This is simply the <structfield>token</structfield> field in the #GScanner
|
|
structure.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Returns: the current token type.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_cur_value ##### -->
|
|
<para>
|
|
Gets the current token value.
|
|
This is simply the <structfield>value</structfield> field in the #GScanner
|
|
structure.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Returns: the current token value.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_eof ##### -->
|
|
<para>
|
|
Returns %TRUE if the scanner has reached the end of the file or text buffer.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Returns: %TRUE if the scanner has reached the end of the file or text buffer.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_set_scope ##### -->
|
|
<para>
|
|
Sets the current scope.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@scope_id: the new scope id.
|
|
@Returns: the old scope id.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_scope_add_symbol ##### -->
|
|
<para>
|
|
Adds a symbol to the given scope.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@scope_id: the scope id.
|
|
@symbol: the symbol to add.
|
|
@value: the value of the symbol.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_scope_foreach_symbol ##### -->
|
|
<para>
|
|
Calls the given function for each of the symbol/value pairs in the
|
|
given scope of the #GScanner. The function is passed the symbol and
|
|
value of each pair, and the given @user_data parameter.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@scope_id: the scope id.
|
|
@func: the function to call for each symbol/value pair.
|
|
@user_data: user data to pass to the function.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_scope_lookup_symbol ##### -->
|
|
<para>
|
|
Looks up a symbol in a scope and return its value. If the
|
|
symbol is not bound in the scope, %NULL is returned.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@scope_id: the scope id.
|
|
@symbol: the symbol to look up.
|
|
@Returns: the value of @symbol in the given scope, or %NULL
|
|
if @symbol is not bound in the given scope.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_scope_remove_symbol ##### -->
|
|
<para>
|
|
Removes a symbol from a scope.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@scope_id: the scope id.
|
|
@symbol: the symbol to remove.
|
|
|
|
|
|
<!-- ##### MACRO g_scanner_freeze_symbol_table ##### -->
|
|
<para>
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Deprecated:2.2: This macro does nothing.
|
|
|
|
|
|
<!-- ##### MACRO g_scanner_thaw_symbol_table ##### -->
|
|
<para>
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@Deprecated:2.2: This macro does nothing.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_lookup_symbol ##### -->
|
|
<para>
|
|
Looks up a symbol in the current scope and return its value. If the
|
|
symbol is not bound in the current scope, %NULL is returned.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@symbol: the symbol to look up.
|
|
@Returns: the value of @symbol in the current scope, or %NULL
|
|
if @symbol is not bound in the current scope.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_warn ##### -->
|
|
<para>
|
|
Outputs a warning message, via the #GScanner message handler.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@format: the message format. See the <function>printf()</function>
|
|
documentation.
|
|
@Varargs: the parameters to insert into the format string.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_error ##### -->
|
|
<para>
|
|
Outputs an error message, via the #GScanner message handler.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@format: the message format. See the <function>printf()</function>
|
|
documentation.
|
|
@Varargs: the parameters to insert into the format string.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_unexp_token ##### -->
|
|
<para>
|
|
Outputs a message through the scanner's msg_handler, resulting from an
|
|
unexpected token in the input stream.
|
|
Note that you should not call g_scanner_peek_next_token() followed by
|
|
g_scanner_unexp_token() without an intermediate call to
|
|
g_scanner_get_next_token(), as g_scanner_unexp_token() evaluates the
|
|
scanner's current token (not the peeked token) to construct part
|
|
of the message.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@expected_token: the expected token.
|
|
@identifier_spec: a string describing how the scanner's user refers to
|
|
identifiers (%NULL defaults to "identifier").
|
|
This is used if @expected_token is #G_TOKEN_IDENTIFIER
|
|
or #G_TOKEN_IDENTIFIER_NULL.
|
|
@symbol_spec: a string describing how the scanner's user refers to
|
|
symbols (%NULL defaults to "symbol").
|
|
This is used if @expected_token is #G_TOKEN_SYMBOL or
|
|
any token value greater than #G_TOKEN_LAST.
|
|
@symbol_name: the name of the symbol, if the scanner's current token
|
|
is a symbol.
|
|
@message: a message string to output at the end of the warning/error, or %NULL.
|
|
@is_error: if %TRUE it is output as an error. If %FALSE it is output as a
|
|
warning.
|
|
|
|
|
|
<!-- ##### USER_FUNCTION GScannerMsgFunc ##### -->
|
|
<para>
|
|
Specifies the type of the message handler function.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@message: the message.
|
|
@error: %TRUE if the message signals an error, %FALSE if it
|
|
signals a warning.
|
|
|
|
|
|
<!-- ##### FUNCTION g_scanner_destroy ##### -->
|
|
<para>
|
|
Frees all memory used by the #GScanner.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
|
|
|
|
<!-- ##### ENUM GTokenType ##### -->
|
|
<para>
|
|
The possible types of token returned from each g_scanner_get_next_token() call.
|
|
</para>
|
|
|
|
@G_TOKEN_EOF: the end of the file.
|
|
@G_TOKEN_LEFT_PAREN: a '(' character.
|
|
@G_TOKEN_LEFT_CURLY: a '{' character.
|
|
@G_TOKEN_RIGHT_CURLY: a '}' character.
|
|
|
|
<!-- ##### UNION GTokenValue ##### -->
|
|
<para>
|
|
A union holding the value of the token.
|
|
</para>
|
|
|
|
|
|
<!-- ##### ENUM GErrorType ##### -->
|
|
<para>
|
|
The possible errors, used in the <structfield>v_error</structfield> field
|
|
of #GTokenValue, when the token is a #G_TOKEN_ERROR.
|
|
</para>
|
|
|
|
@G_ERR_UNKNOWN: unknown error.
|
|
@G_ERR_UNEXP_EOF: unexpected end of file.
|
|
@G_ERR_UNEXP_EOF_IN_STRING: unterminated string constant.
|
|
@G_ERR_UNEXP_EOF_IN_COMMENT: unterminated comment.
|
|
@G_ERR_NON_DIGIT_IN_CONST: non-digit character in a number.
|
|
@G_ERR_DIGIT_RADIX: digit beyond radix in a number.
|
|
@G_ERR_FLOAT_RADIX: non-decimal floating point number.
|
|
@G_ERR_FLOAT_MALFORMED: malformed floating point number.
|
|
|
|
<!-- ##### MACRO G_CSET_a_2_z ##### -->
|
|
<para>
|
|
The set of lowercase ASCII alphabet characters.
|
|
Used for specifying valid identifier characters in #GScannerConfig.
|
|
</para>
|
|
|
|
|
|
|
|
<!-- ##### MACRO G_CSET_A_2_Z ##### -->
|
|
<para>
|
|
The set of uppercase ASCII alphabet characters.
|
|
Used for specifying valid identifier characters in #GScannerConfig.
|
|
</para>
|
|
|
|
|
|
|
|
<!-- ##### MACRO G_CSET_DIGITS ##### -->
|
|
<para>
|
|
The set of digits.
|
|
Used for specifying valid identifier characters in #GScannerConfig.
|
|
</para>
|
|
|
|
|
|
|
|
<!-- ##### MACRO G_CSET_LATINC ##### -->
|
|
<para>
|
|
The set of uppercase ISO 8859-1 alphabet characters which are
|
|
not ASCII characters.
|
|
Used for specifying valid identifier characters in #GScannerConfig.
|
|
</para>
|
|
|
|
|
|
|
|
<!-- ##### MACRO G_CSET_LATINS ##### -->
|
|
<para>
|
|
The set of lowercase ISO 8859-1 alphabet characters which are
|
|
not ASCII characters.
|
|
Used for specifying valid identifier characters in #GScannerConfig.
|
|
</para>
|
|
|
|
|
|
|
|
<!-- ##### MACRO g_scanner_add_symbol ##### -->
|
|
<para>
|
|
Adds a symbol to the default scope.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@symbol: the symbol to add.
|
|
@value: the value of the symbol.
|
|
@Deprecated:2.2: Use g_scanner_scope_add_symbol() instead.
|
|
|
|
|
|
<!-- ##### MACRO g_scanner_remove_symbol ##### -->
|
|
<para>
|
|
Removes a symbol from the default scope.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@symbol: the symbol to remove.
|
|
@Deprecated:2.2: Use g_scanner_scope_remove_symbol() instead.
|
|
|
|
|
|
<!-- ##### MACRO g_scanner_foreach_symbol ##### -->
|
|
<para>
|
|
Calls a function for each symbol in the default scope.
|
|
</para>
|
|
|
|
@scanner: a #GScanner.
|
|
@func: the function to call with each symbol.
|
|
@data: data to pass to the function.
|
|
@Deprecated:2.2: Use g_scanner_scope_foreach_symbol() instead.
|
|
|
|
|