gio: Port GThreadedResolver to use res_nquery() to fix thread-safety

res_query() uses global state in the form of the struct __res_state
which contains the contents of resolv.conf (and other things). On Linux,
this state seems to be thread-local, so there is no problem. On OS X,
however, it is not, and hence multiple res_query() calls from parallel
threads will compete and return bogus results.

The fix for this is to use res_nquery(), introduced in BIND 8.2, which
takes an explicit state argument. This allows us to manually store the
state thread-locally. If res_nquery() isn’t available, we fall back to
res_query(). It should be available on OS X though. As a data point,
it’s available on Fedora 27.

There’s a slight complication in the fact that OS X requires the state
to be freed using res_ndestroy() rather than res_nclose(). Linux uses
res_nclose().

(See, for example, the NetBSD man page:
https://www.unix.com/man-page/netbsd/3/res_ninit/. The Linux one is
incomplete and not so useful:
http://man7.org/linux/man-pages/man3/resolver.3.html.)

The new code will call res_ninit() once per res_nquery() task. This is
not optimal, but no worse than before — since res_query() was being
called in a worker thread, on Linux, it would implicitly initialise the
thread-local struct __res_state when it was called. We’ve essentially
just made that explicit. In practical terms, this means a
stat("/etc/resolv.conf") call per res_nquery() task.

In future, we could improve this by using an explicit thread pool with
some manually-created worker threads, each of which initialises a struct
__res_state on spawning, and only updates it on receiving
the #GResolver::reload signal.

Signed-off-by: Philip Withnall <withnall@endlessm.com>

https://bugzilla.gnome.org/show_bug.cgi?id=792050
This commit is contained in:
Philip Withnall 2018-01-05 14:26:35 +00:00
parent 235f4958a9
commit 40be86bb0e
5 changed files with 153 additions and 0 deletions

View File

@ -346,6 +346,18 @@
/* Define to 1 if you have the 'res_init' function. */
#mesondefine HAVE_RES_INIT
/* Define to 1 if you have the 'res_nclose' function. */
#mesondefine HAVE_RES_NCLOSE
/* Define to 1 if you have the 'res_ndestroy' function. */
#mesondefine HAVE_RES_NDESTROY
/* Define to 1 if you have the 'res_ninit' function. */
#mesondefine HAVE_RES_NINIT
/* Define to 1 if you have the 'res_nquery' function. */
#mesondefine HAVE_RES_NQUERY
/* Define to 1 if you have the <sched.h> header file. */
#mesondefine HAVE_SCHED_H

View File

@ -356,6 +356,18 @@
/* Define to 1 if you have the 'res_init' function. */
/* #undef HAVE_RES_INIT */
/* Define to 1 if you have the 'res_nclose' function. */
/* #undef HAVE_RES_NCLOSE */
/* Define to 1 if you have the 'res_ndestroy' function. */
/* #undef HAVE_RES_NDESTROY */
/* Define to 1 if you have the 'res_ninit' function. */
/* #undef HAVE_RES_NINIT */
/* Define to 1 if you have the 'res_nquery' function. */
/* #undef HAVE_RES_NQUERY */
/* Define to 1 if you have the <sched.h> header file. */
/* #undef HAVE_SCHED_H */

View File

@ -1016,6 +1016,7 @@ AS_IF([test $glib_native_win32 = yes], [
[AC_MSG_ERROR(Could not find socket())]))
save_libs="$LIBS"
LIBS="$LIBS $NETWORK_LIBS"
AC_MSG_CHECKING([for res_init])
AC_TRY_LINK([#include <sys/types.h>
#include <netinet/in.h>
@ -1026,6 +1027,54 @@ AS_IF([test $glib_native_win32 = yes], [
],[AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_RES_INIT, 1, [Define to 1 if you have the 'res_init' function.])
],[AC_MSG_RESULT([no])])
AC_MSG_CHECKING([for res_nclose])
AC_TRY_LINK([#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
],[
struct __res_state res;
res_nclose(&res);
],[AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_RES_NCLOSE, 1, [Define to 1 if you have the 'res_nclose' function.])
],[AC_MSG_RESULT([no])])
AC_MSG_CHECKING([for res_ndestroy])
AC_TRY_LINK([#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
],[
struct __res_state res;
res_ndestroy(&res);
],[AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_RES_NDESTROY, 1, [Define to 1 if you have the 'res_ndestroy' function.])
],[AC_MSG_RESULT([no])])
AC_MSG_CHECKING([for res_ninit])
AC_TRY_LINK([#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
],[
struct __res_state res;
res_ninit(&res);
],[AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_RES_NINIT, 1, [Define to 1 if you have the 'res_ninit' function.])
],[AC_MSG_RESULT([no])])
AC_MSG_CHECKING([for res_nquery])
AC_TRY_LINK([#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
],[
struct __res_state res;
res_nquery(&res, "test", 0, 0, (void *)0, 0);
],[AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_RES_NQUERY, 1, [Define to 1 if you have the 'res_nquery' function.])
],[AC_MSG_RESULT([no])])
LIBS="$save_libs"
])
AC_SUBST(NETWORK_LIBS)

View File

@ -824,12 +824,36 @@ do_lookup_records (GTask *task,
GByteArray *answer;
gint rrtype;
#ifdef HAVE_RES_NQUERY
/* Load the resolver state. This is done once per worker thread, and the
* #GResolver::reload signal is ignored (since we always reload). This could
* be improved by having an explicit worker thread pool, with each thread
* containing some state which is initialised at thread creation time and
* updated in response to #GResolver::reload.
*
* What we have currently is not particularly worse than using res_query() in
* worker threads, since it would transparently call res_init() for each new
* worker thread. (Although the workers would get reused by the
* #GThreadPool.) */
struct __res_state res;
if (res_ninit (&res) != 0)
{
g_task_return_new_error (task, G_RESOLVER_ERROR, G_RESOLVER_ERROR_INTERNAL,
_("Error resolving “%s”"), lrd->rrname);
return;
}
#endif
rrtype = g_resolver_record_type_to_rrtype (lrd->record_type);
answer = g_byte_array_new ();
for (;;)
{
g_byte_array_set_size (answer, len * 2);
#if defined(HAVE_RES_NQUERY)
len = res_nquery (&res, lrd->rrname, C_IN, rrtype, answer->data, answer->len);
#else
len = res_query (lrd->rrname, C_IN, rrtype, answer->data, answer->len);
#endif
/* If answer fit in the buffer then we're done */
if (len < 0 || len < (gint)answer->len)
@ -845,6 +869,14 @@ do_lookup_records (GTask *task,
records = g_resolver_records_from_res_query (lrd->rrname, rrtype, answer->data, len, herr, &error);
g_byte_array_free (answer, TRUE);
#if defined(HAVE_RES_NDESTROY)
res_ndestroy (&res);
#elif defined(HAVE_RES_NCLOSE)
res_nclose (&res);
#elif defined(HAVE_RES_NINIT)
#error "Your platform has res_ninit() but not res_nclose() or res_ndestroy(). Please file a bug at https://bugzilla.gnome.org/enter_bug.cgi?product=glib"
#endif
#else
DNS_STATUS status;

View File

@ -87,6 +87,54 @@ if host_system != 'windows'
glib_conf.set('HAVE_RES_INIT', 1)
endif
# res_nclose()
if cc.links('''#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
int main (int argc, char ** argv) {
struct __res_state res;
return res_nclose(&res);
}''', args : network_args, name : 'res_nclose()')
glib_conf.set('HAVE_RES_NCLOSE', 1)
endif
# res_ndestroy()
if cc.links('''#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
int main (int argc, char ** argv) {
struct __res_state res;
return res_ndestroy(&res);
}''', args : network_args, name : 'res_ndestroy()')
glib_conf.set('HAVE_RES_NDESTROY', 1)
endif
# res_ninit()
if cc.links('''#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
int main (int argc, char ** argv) {
struct __res_state res;
return res_ninit(&res);
}''', args : network_args, name : 'res_ninit()')
glib_conf.set('HAVE_RES_NINIT', 1)
endif
# res_nquery()
if cc.links('''#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <resolv.h>
int main (int argc, char ** argv) {
struct __res_state res;
return res_nquery(&res, "test", 0, 0, (void *)0, 0);
}''', args : network_args, name : 'res_nquery()')
glib_conf.set('HAVE_RES_NQUERY', 1)
endif
if cc.compiles('''#include <netinet/in.h>
struct ip_mreqn foo;''',
name : 'struct ip_mreqn')