From b998d78f17088d891b822344ebdc8b5266e7a5e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Thu, 19 Jan 2017 16:56:43 +0100 Subject: [PATCH] Add code for libidn2 (IDNA 2008 + TR46 non-transitional) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set HAVE_LIBIDN2 instead of HAVE_LIBIDN for IDNA2008/TR46. With IDNA2003, german sharp s will be stranslated into ss. Thus straße.de and strasse.de will result in the same whois lookup. TR46 preprocessing also takes care for a few other things, like decomposed Unicode characters. --- Makefile | 5 +++++ whois.c | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 2634a9a..79aa922 100644 --- a/Makefile +++ b/Makefile @@ -32,10 +32,15 @@ ifdef LOCALEDIR DEFS += -DLOCALEDIR=\"$(BASEDIR)$(prefix)/share/locale\" endif +ifdef HAVE_LIBIDN2 +whois_LDADD += -lidn2 +DEFS += -DHAVE_LIBIDN2 +else ifdef HAVE_LIBIDN whois_LDADD += -lidn DEFS += -DHAVE_LIBIDN endif +endif ifdef HAVE_ICONV whois_OBJECTS += simple_recode.o diff --git a/whois.c b/whois.c index 7c987ae..711a7d5 100644 --- a/whois.c +++ b/whois.c @@ -31,7 +31,9 @@ #ifdef HAVE_REGEXEC #include #endif -#ifdef HAVE_LIBIDN +#ifdef HAVE_LIBIDN2 +#include +#elif defined HAVE_LIBIDN #include #endif #ifdef HAVE_INET_PTON @@ -653,7 +655,7 @@ char *queryformat(const char *server, const char *flags, const char *query) simple_recode_input_charset = "utf-8"; /* then try UTF-8 */ #endif -#ifdef HAVE_LIBIDN +#if defined HAVE_LIBIDN || defined HAVE_LIBIDN2 # define DENIC_PARAM_ACE ",ace" #else # define DENIC_PARAM_ACE "" @@ -939,9 +941,8 @@ int openconn(const char *server, const char *port) hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_ADDRCONFIG; -#ifdef HAVE_LIBIDN - hints.ai_flags |= AI_IDN; -#endif + + server = normalize_domain(server); if ((err = getaddrinfo(server, port ? port : "nicname", &hints, &res)) != 0) { @@ -1145,7 +1146,7 @@ const char *is_new_gtld(const char *s) char *normalize_domain(const char *dom) { char *p, *ret; -#ifdef HAVE_LIBIDN +#if defined HAVE_LIBIDN || defined HAVE_LIBIDN2 char *domain_start = NULL; #endif @@ -1160,7 +1161,7 @@ char *normalize_domain(const char *dom) p--; } -#ifdef HAVE_LIBIDN +#if defined HAVE_LIBIDN || defined HAVE_LIBIDN2 /* find the start of the last word if there are spaces in the query */ for (p = ret; *p; p++) if (*p == ' ') @@ -1170,9 +1171,18 @@ char *normalize_domain(const char *dom) char *q, *r; int prefix_len; +#ifdef HAVE_LIBIDN2 +#if IDN2_VERSION_NUMBER >= 0x00140000 + if (idn2_lookup_ul(domain_start, &q, IDN2_NONTRANSITIONAL) != IDN2_OK) + return ret; +#else + if (idn2_lookup_ul(domain_start, &q, IDN2_NFC_INPUT) != IDN2_OK) + return ret; +#endif +#else if (idna_to_ascii_lz(domain_start, &q, 0) != IDNA_SUCCESS) return ret; - +#endif /* reassemble the original query in a new buffer */ prefix_len = domain_start - ret; r = malloc(prefix_len + strlen(q) + 1); @@ -1186,8 +1196,18 @@ char *normalize_domain(const char *dom) } else { char *q; +#ifdef HAVE_LIBIDN2 +#if IDN2_VERSION_NUMBER >= 0x00140000 + if (idn2_lookup_ul(ret, &q, IDN2_NONTRANSITIONAL) != IDN2_OK) + return ret; +#else + if (idn2_lookup_ul(ret, &q, IDN2_NFC_INPUT) != IDN2_OK) + return ret; +#endif +#else if (idna_to_ascii_lz(ret, &q, 0) != IDNA_SUCCESS) return ret; +#endif free(ret); return q;