diff -r -u bin/named/client.c-orig bin/named/client.c
--- bin/named/client.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/named/client.c 2004-01-01 00:00:00.000000000 +0000
@@ -994,6 +994,11 @@
}
if (result != ISC_R_SUCCESS)
goto done;
+ /*
+ * Stop after the question if TC was set for rate limiting.
+ */
+ if ((client->message->flags & DNS_MESSAGEFLAG_TC) != 0)
+ goto renderend;
result = dns_message_rendersection(client->message,
DNS_SECTION_ANSWER,
DNS_MESSAGERENDER_PARTIAL |
@@ -1134,6 +1139,49 @@
#endif
/*
+ * Try to rate limit error responses.
+ */
+ if (client->view != NULL && client->view->rrl != NULL) {
+ isc_boolean_t wouldlog;
+ char log_buf[DNS_RRL_LOG_BUF_LEN];
+ dns_rrl_result_t rrl_result;
+
+ INSIST(rcode != dns_rcode_noerror &&
+ rcode != dns_rcode_nxdomain);
+ wouldlog = (ns_g_server->log_queries &&
+ isc_log_wouldlog(ns_g_lctx, DNS_RRL_LOG_DROP));
+ rrl_result = dns_rrl(client->view, &client->peeraddr,
+ TCP_CLIENT(client),
+ dns_rdataclass_in, dns_rdatatype_none,
+ NULL, rcode, client->now,
+ wouldlog, log_buf, sizeof(log_buf));
+ if (rrl_result != DNS_RRL_RESULT_OK) {
+ /*
+ * Log dropped errors in the query category
+ * so that they are not lost in silence.
+ * Starts of rate-limited bursts are logged in
+ * NS_LOGCATEGORY_RRL.
+ */
+ if (wouldlog) {
+ ns_client_log(client, NS_LOGCATEGORY_QUERIES,
+ NS_LOGMODULE_CLIENT,
+ DNS_RRL_LOG_DROP,
+ "%s", log_buf);
+ }
+ /*
+ * Some error responses cannot be 'slipped',
+ * so don't try.
+ * This will counted with dropped queries in the
+ * QryDropped counter.
+ */
+ if (!client->view->rrl->log_only) {
+ ns_client_next(client, DNS_R_DROP);
+ return;
+ }
+ }
+ }
+
+ /*
* Message may be an in-progress reply that we had trouble
* with, in which case QR will be set. We need to clear QR before
* calling dns_message_reply() to avoid triggering an assertion.
diff -r -u bin/named/config.c-orig bin/named/config.c
--- bin/named/config.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/named/config.c 2004-01-01 00:00:00.000000000 +0000
@@ -227,6 +227,17 @@
notify no;\n\
allow-new-zones no;\n\
\n\
+ # Prevent use of this zone in DNS amplified reflection DoS attacks\n\
+ # Notice the size of the authors.bind response.\n\
+ rate-limit {\n\
+ responses-per-second 1;\n\
+ window 10;\n\
+ slip 0;\n\
+ IPv4-prefix-length 16;\n\
+ IPv6-prefix-length 32;\n\
+ min-table-size 10;\n\
+ };\n\
+\n\
zone \"version.bind\" chaos {\n\
type master;\n\
database \"_builtin version\";\n\
diff -r -u bin/named/include/named/query.h-orig bin/named/include/named/query.h
--- bin/named/include/named/query.h-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/named/include/named/query.h 2004-01-01 00:00:00.000000000 +0000
@@ -85,6 +85,7 @@
#define NS_QUERYATTR_CACHEACLOK 0x2000
#define NS_QUERYATTR_DNS64 0x4000
#define NS_QUERYATTR_DNS64EXCLUDE 0x8000
+#define NS_QUERYATTR_RRL_CHECKED 0x10000
isc_result_t
diff -r -u bin/named/include/named/server.h-orig bin/named/include/named/server.h
--- bin/named/include/named/server.h-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/named/include/named/server.h 2004-01-01 00:00:00.000000000 +0000
@@ -165,7 +165,10 @@
dns_nsstatscounter_updatefail = 34,
dns_nsstatscounter_updatebadprereq = 35,
- dns_nsstatscounter_max = 36
+ dns_nsstatscounter_ratedropped = 36,
+ dns_nsstatscounter_rateslipped = 37,
+
+ dns_nsstatscounter_max = 38
};
void
diff -r -u bin/named/query.c-orig bin/named/query.c
--- bin/named/query.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/named/query.c 2004-01-01 00:00:00.000000000 +0000
@@ -5746,6 +5746,104 @@
resume:
CTRACE("query_find: resume");
+ /*
+ * Rate limit these responses to this client.
+ */
+ if (client->view->rrl != NULL &&
+ fname != NULL && dns_name_isabsolute(fname) &&
+ (client->query.attributes & NS_QUERYATTR_RRL_CHECKED) == 0) {
+ dns_rdataset_t nc_rdataset;
+ dns_rcode_t rcode;
+ isc_boolean_t wouldlog;
+ char log_buf[DNS_RRL_LOG_BUF_LEN];
+ isc_result_t nc_result;
+ dns_rrl_result_t rrl_result;
+
+ client->query.attributes |= NS_QUERYATTR_RRL_CHECKED;
+
+ wouldlog = isc_log_wouldlog(ns_g_lctx, DNS_RRL_LOG_DROP);
+ tname = fname;
+ if (result == DNS_R_NXDOMAIN) {
+ /*
+ * Use the database origin name to rate limit NXDOMAIN
+ */
+ if (db != NULL)
+ tname = dns_db_origin(db);
+ rcode = dns_rcode_nxdomain;
+ } else if (result == DNS_R_NCACHENXDOMAIN &&
+ rdataset != NULL &&
+ dns_rdataset_isassociated(rdataset) &&
+ (rdataset->attributes &
+ DNS_RDATASETATTR_NEGATIVE) != 0) {
+ /*
+ * Try to use owner name in the negative cache SOA.
+ */
+ dns_fixedname_init(&fixed);
+ dns_rdataset_init(&nc_rdataset);
+ for (nc_result = dns_rdataset_first(rdataset);
+ nc_result == ISC_R_SUCCESS;
+ nc_result = dns_rdataset_next(rdataset)) {
+ dns_ncache_current(rdataset,
+ dns_fixedname_name(&fixed),
+ &nc_rdataset);
+ if (nc_rdataset.type == dns_rdatatype_soa) {
+ dns_rdataset_disassociate(&nc_rdataset);
+ tname = dns_fixedname_name(&fixed);
+ break;
+ }
+ dns_rdataset_disassociate(&nc_rdataset);
+ }
+ rcode = dns_rcode_nxdomain;
+ } else {
+ rcode = dns_rcode_noerror;
+ }
+ rrl_result = dns_rrl(client->view, &client->peeraddr,
+ ISC_TF((client->attributes
+ & NS_CLIENTATTR_TCP) != 0),
+ client->message->rdclass, qtype, tname,
+ rcode, client->now,
+ wouldlog, log_buf, sizeof(log_buf));
+ if (rrl_result != DNS_RRL_RESULT_OK) {
+ /*
+ * Log dropped or slipped responses in the query
+ * category so that requests are not silently lost.
+ * Starts of rate-limited bursts are logged in
+ * DNS_LOGCATEGORY_RRL.
+ *
+ * Dropped responses are counted with dropped queries
+ * in QryDropped while slipped responses are counted
+ * with other truncated responses in RespTruncated.
+ */
+ if (wouldlog && ns_g_server->log_queries) {
+ ns_client_log(client, NS_LOGCATEGORY_QUERIES,
+ NS_LOGMODULE_CLIENT,
+ DNS_RRL_LOG_DROP,
+ "%s", log_buf);
+ }
+ if (!client->view->rrl->log_only) {
+ if (rrl_result == DNS_RRL_RESULT_DROP) {
+ /*
+ * These will also be counted in
+ * dns_nsstatscounter_dropped
+ */
+ inc_stats(client,
+ dns_nsstatscounter_ratedropped);
+ QUERY_ERROR(DNS_R_DROP);
+ } else {
+ /*
+ * These will also be counted in
+ * dns_nsstatscounter_truncatedresp
+ */
+ inc_stats(client,
+ dns_nsstatscounter_rateslipped);
+ client->message->flags |=
+ DNS_MESSAGEFLAG_TC;
+ }
+ goto cleanup;
+ }
+ }
+ }
+
if (!ISC_LIST_EMPTY(client->view->rpz_zones) &&
(RECURSIONOK(client) || !client->view->rpz_recursive_only) &&
rpz_ck_dnssec(client, result, rdataset, sigrdataset) &&
@@ -7168,12 +7266,14 @@
}
if (eresult != ISC_R_SUCCESS &&
- (!PARTIALANSWER(client) || WANTRECURSION(client))) {
+ (!PARTIALANSWER(client) || WANTRECURSION(client)
+ || eresult == DNS_R_DROP)) {
if (eresult == DNS_R_DUPLICATE || eresult == DNS_R_DROP) {
/*
* This was a duplicate query that we are
- * recursing on. Don't send a response now.
- * The original query will still cause a response.
+ * recursing on or the result of rate limiting.
+ * Don't send a response now for a duplicate query,
+ * because the original will still cause a response.
*/
query_next(client, eresult);
} else {
diff -r -u bin/named/server.c-orig bin/named/server.c
--- bin/named/server.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/named/server.c 2004-01-01 00:00:00.000000000 +0000
@@ -1561,6 +1561,201 @@
return (result);
}
+#define CHECK_RRL(obj, cond, pat, val) \
+ do { \
+ if (!(cond)) { \
+ cfg_obj_log(obj, ns_g_lctx, ISC_LOG_ERROR, \
+ pat, val); \
+ result = ISC_R_RANGE; \
+ goto cleanup; \
+ } \
+ } while (0)
+
+static isc_result_t
+configure_rrl(dns_view_t *view, const cfg_obj_t *config, const cfg_obj_t *map) {
+ const cfg_obj_t *obj;
+ dns_rrl_t *rrl;
+ isc_result_t result;
+ int min_entries, i, j;
+
+ /*
+ * Most DNS servers have few clients, but intentinally open
+ * recursive and authoritative servers often have many.
+ * So start with a small number of entries unless told otherwise
+ * to reduce cold-start costs.
+ */
+ min_entries = 1000;
+ obj = NULL;
+ result = cfg_map_get(map, "min-table-size", &obj);
+ if (result == ISC_R_SUCCESS) {
+ min_entries = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, min_entries > 1,
+ "invalid '{min-table-size %d;}'", min_entries);
+ }
+ result = dns_rrl_init(&rrl, view, min_entries);
+ if (result != ISC_R_SUCCESS)
+ return (result);
+
+ i = ISC_MAX(10000, min_entries);
+ obj = NULL;
+ result = cfg_map_get(map, "max-table-size", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i >= min_entries,
+ "invalid '{max-table-size %d;}'", i);
+ }
+ rrl->max_entries = i;
+
+ obj = NULL;
+ result = cfg_map_get(map, "responses-per-second", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE,
+ "invalid '{responses-per-second %d;}'", i);
+ }
+ rrl->responses_per_second = i;
+ rrl->scaled_responses_per_second = rrl->responses_per_second;
+
+ /*
+ * The default error rate is the response rate,
+ * and so off by default.
+ */
+ i = rrl->responses_per_second;
+ obj = NULL;
+ result = cfg_map_get(map, "errors-per-second", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE,
+ "invalid '{errors-per-second %d;}'", i);
+ }
+ rrl->errors_per_second = i;
+ rrl->scaled_errors_per_second = rrl->errors_per_second;
+ /*
+ * The default NXDOMAIN rate is the response rate,
+ * and so off by default.
+ */
+ i = rrl->responses_per_second;
+ obj = NULL;
+ result = cfg_map_get(map, "nxdomains-per-second", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE,
+ "invalid '{nxdomains-per-second %d;}'", i);
+ }
+ rrl->nxdomains_per_second = i;
+ rrl->scaled_nxdomains_per_second = rrl->nxdomains_per_second;
+
+ /*
+ * The all-per-second rate is off by default.
+ */
+ i = 0;
+ obj = NULL;
+ result = cfg_map_get(map, "all-per-second", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE,
+ "invalid '{all-per-second %d;}'", i);
+ CHECK_RRL(obj, i == 0 || (i >= rrl->responses_per_second*4 &&
+ i >= rrl->errors_per_second*4 &&
+ i >= rrl->nxdomains_per_second*4),
+ "'{all-per-second %d;}' must be"
+ " at least 4 times responses-per-second,"
+ "errors_per_second, and nxdomains_per_second",
+ i);
+ }
+ rrl->all_per_second = i;
+ rrl->scaled_all_per_second = rrl->all_per_second;
+
+ i = 2;
+ obj = NULL;
+ result = cfg_map_get(map, "slip", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i <= DNS_RRL_MAX_SLIP, "invalid '{slip %d;}'", i);
+ }
+ rrl->slip = i;
+ rrl->scaled_slip = rrl->slip;
+
+ i = 15;
+ obj = NULL;
+ result = cfg_map_get(map, "window", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i >= 1 && i <= DNS_RRL_MAX_WINDOW,
+ "invalid '{window %d;}'", i);
+ }
+ rrl->window = i;
+
+ i = 0;
+ obj = NULL;
+ result = cfg_map_get(map, "qps-scale", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i >= 1, "invalid '{qps-scale %d;}'", i);
+ }
+ rrl->qps_scale = i;
+ rrl->qps = 1.0;
+
+ i = 24;
+ obj = NULL;
+ result = cfg_map_get(map, "IPv4-prefix-length", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i >= 8 && i <= 32,
+ "invalid '{IPv4-prefix-length %d;}'", i);
+ }
+ rrl->ipv4_prefixlen = i;
+ if (i == 32)
+ rrl->ipv4_mask = 0xffffffff;
+ else
+ rrl->ipv4_mask = htonl(0xffffffff << (32-i));
+
+ i = 56;
+ obj = NULL;
+ result = cfg_map_get(map, "IPv6-prefix-length", &obj);
+ if (result == ISC_R_SUCCESS) {
+ i = cfg_obj_asuint32(obj);
+ CHECK_RRL(obj, i >= 16 && i <= 128,
+ "invalid '{IPv6-prefix-length %d;}'", i);
+ }
+ rrl->ipv6_prefixlen = i;
+ memset(rrl->ipv6_mask, 0xff, sizeof(rrl->ipv6_mask));
+ for (j = 0; j < 4; ++j) {
+ if (i == 0) {
+ rrl->ipv6_mask[j] = 0;
+ } else if (i < 32) {
+ rrl->ipv6_mask[j] = htonl(0xffffffff << (32-i));
+ i = 0;
+ } else {
+ rrl->ipv6_mask[j] = 0xffffffff;
+ i -= 32;
+ }
+ }
+
+ obj = NULL;
+ result = cfg_map_get(map, "exempt-clients", &obj);
+ if (result == ISC_R_SUCCESS) {
+ result = cfg_acl_fromconfig(obj, config, ns_g_lctx,
+ ns_g_aclconfctx, ns_g_mctx,
+ 0, &rrl->exempt);
+ CHECK_RRL(obj, result == ISC_R_SUCCESS,
+ "invalid %s", "address_match_list");
+ }
+
+ obj = NULL;
+ result = cfg_map_get(map, "log-only", &obj);
+ if (result == ISC_R_SUCCESS && cfg_obj_asboolean(obj))
+ rrl->log_only = ISC_TRUE;
+ else
+ rrl->log_only = ISC_FALSE;
+
+ return (ISC_R_SUCCESS);
+
+ cleanup:
+ dns_rrl_view_destroy(view);
+ return (result);
+}
+
/*
* Configure 'view' according to 'vconfig', taking defaults from 'config'
* where values are missing in 'vconfig'.
@@ -2925,6 +3120,14 @@
}
}
+ obj = NULL;
+ result = ns_config_get(maps, "rate-limit", &obj);
+ if (result == ISC_R_SUCCESS) {
+ result = configure_rrl(view, config, obj);
+ if (result != ISC_R_SUCCESS)
+ goto cleanup;
+ }
+
result = ISC_R_SUCCESS;
cleanup:
diff -r -u bin/named/statschannel.c-orig bin/named/statschannel.c
--- bin/named/statschannel.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/named/statschannel.c 2004-01-01 00:00:00.000000000 +0000
@@ -202,6 +202,10 @@
SET_NSSTATDESC(updatebadprereq,
"updates rejected due to prerequisite failure",
"UpdateBadPrereq");
+ SET_NSSTATDESC(ratedropped, "responses dropped for rate limits",
+ "RateDropped");
+ SET_NSSTATDESC(rateslipped, "responses truncated for rate limits",
+ "RateSlipped");
INSIST(i == dns_nsstatscounter_max);
/* Initialize resolver statistics */
diff -r -u bin/tests/system/README-orig bin/tests/system/README
--- bin/tests/system/README-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/README 2004-01-01 00:00:00.000000000 +0000
@@ -17,6 +17,7 @@
nsupdate/ Dynamic update and IXFR tests
resolver/ Regression tests for resolver bugs that have been fixed
(not a complete resolver test suite)
+ rrl/ query rate limiting
rpz/ Tests of response policy zone (RPZ) rewriting
stub/ Tests of stub zone functionality
unknown/ Unknown type and class tests
diff -r -u bin/tests/system/conf.sh.in-orig bin/tests/system/conf.sh.in
--- bin/tests/system/conf.sh.in-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/conf.sh.in 2004-01-01 00:00:00.000000000 +0000
@@ -58,7 +58,7 @@
@CHECKDS@ checknames checkzone database dlv dlvauto dlz dlzexternal
dname dns64 dnssec ecdsa forward glue gost ixfr inline limits
logfileconfig lwresd masterfile masterformat metadata notify
- nsupdate pending pkcs11 redirect resolver rndc rpz rrsetorder
+ nsupdate pending pkcs11 redirect resolver rndc rpz rrl rrsetorder
rsabigexponent sortlist smartsign staticstub stub tkey tsig
tsiggss unknown upforwd verify views xfer xferquota zonechecks"
diff -r -u bin/tests/system/rrl/clean.sh-orig bin/tests/system/rrl/clean.sh
--- bin/tests/system/rrl/clean.sh-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/clean.sh 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,22 @@
+# Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# $Id$
+
+
+# Clean up after rrl tests.
+
+rm -f dig.out*
+rm -f */named.memstats */named.run ns*/log* */named.rpz */session.key
+rm -f ns3/bl*.db */*.jnl */*.core */*.pid
diff -r -u bin/tests/system/rrl/ns1/named.conf-orig bin/tests/system/rrl/ns1/named.conf
--- bin/tests/system/rrl/ns1/named.conf-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns1/named.conf 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+controls { /* empty */ };
+
+options {
+ query-source address 10.53.0.1;
+ notify-source 10.53.0.1;
+ transfer-source 10.53.0.1;
+ port 5300;
+ session-keyfile "session.key";
+ pid-file "named.pid";
+ listen-on { 10.53.0.1; };
+ listen-on-v6 { none; };
+ notify no;
+};
+
+zone "." {type master; file "root.db";};
diff -r -u bin/tests/system/rrl/ns1/root.db-orig bin/tests/system/rrl/ns1/root.db
--- bin/tests/system/rrl/ns1/root.db-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns1/root.db 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,32 @@
+; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+$TTL 120
+@ SOA ns. hostmaster.ns. ( 1 3600 1200 604800 60 )
+@ NS ns.
+ns. A 10.53.0.1
+. A 10.53.0.1
+
+; limit responses from here
+tld2. NS ns.tld2.
+ns.tld2. A 10.53.0.2
+
+; limit recursion to here
+tld3. NS ns.tld3.
+ns.tld3. A 10.53.0.3
+
+; generate SERVFAIL
+tld4. NS ns.tld3.
diff -r -u bin/tests/system/rrl/ns2/hints-orig bin/tests/system/rrl/ns2/hints
--- bin/tests/system/rrl/ns2/hints-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns2/hints 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,19 @@
+; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+
+. 0 NS ns1.
+ns1. 0 A 10.53.0.1
diff -r -u bin/tests/system/rrl/ns2/named.conf-orig bin/tests/system/rrl/ns2/named.conf
--- bin/tests/system/rrl/ns2/named.conf-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns2/named.conf 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+controls { /* empty */ };
+
+options {
+ query-source address 10.53.0.2;
+ notify-source 10.53.0.2;
+ transfer-source 10.53.0.2;
+ port 5300;
+ session-keyfile "session.key";
+ pid-file "named.pid";
+ listen-on { 10.53.0.2; };
+ listen-on-v6 { none; };
+ notify no;
+
+ rate-limit {
+ responses-per-second 2;
+ all-per-second 70;
+ IPv4-prefix-length 24;
+ IPv6-prefix-length 64;
+ slip 3;
+ /* qps-scale 2; */
+ exempt-clients { 10.53.0.7; };
+ window 1;
+ max-table-size 100;
+ min-table-size 2;
+ };
+};
+
+/*
+ * These log settings have no effect unless "-g" is removed from ../../start.pl
+ */
+logging {
+ channel debug {
+ file "log-debug";
+ print-category yes; print-severity yes; severity debug 10;
+ };
+ channel queries {
+ file "log-queries";
+ print-category yes; print-severity yes; severity info;
+ };
+ category rate-limit { debug; queries; };
+ category queries { debug; queries; };
+};
+
+zone "." { type hint; file "hints"; };
+
+zone "tld2."{ type master; file "tld2.db"; };
diff -r -u bin/tests/system/rrl/ns2/tld2.db-orig bin/tests/system/rrl/ns2/tld2.db
--- bin/tests/system/rrl/ns2/tld2.db-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns2/tld2.db 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,43 @@
+; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+
+; rate limit response from this zone
+
+$TTL 120
+@ SOA tld2. hostmaster.ns.tld2. ( 1 3600 1200 604800 60 )
+ NS ns
+ NS .
+ns A 10.53.0.2
+
+a1 A 192.168.2.1
+
+*.a2 A 192.168.2.2
+
+; a3 is in tld3
+
+; a4 does not exist to give NXDOMAIN
+
+; a5 for TCP requests
+a5 A 192.168.2.5
+
+; a6 for whitelisted clients
+a6 A 192.168.2.6
+
+; a7 for SERVFAIL
+
+; a8 for all-per-second limit
+$GENERATE 101-180 all$.a8 A 192.168.2.8
diff -r -u bin/tests/system/rrl/ns3/hints-orig bin/tests/system/rrl/ns3/hints
--- bin/tests/system/rrl/ns3/hints-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns3/hints 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,19 @@
+; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+
+. 0 NS ns1.
+ns1. 0 A 10.53.0.1
diff -r -u bin/tests/system/rrl/ns3/named.conf-orig bin/tests/system/rrl/ns3/named.conf
--- bin/tests/system/rrl/ns3/named.conf-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns3/named.conf 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+controls { /* empty */ };
+
+options {
+ query-source address 10.53.0.3;
+ notify-source 10.53.0.3;
+ transfer-source 10.53.0.3;
+ port 5300;
+ session-keyfile "session.key";
+ pid-file "named.pid";
+ listen-on { 10.53.0.3; };
+ listen-on-v6 { none; };
+ notify no;
+};
+
+zone "." { type hint; file "hints"; };
+
+zone "tld3."{ type master; file "tld3.db"; };
diff -r -u bin/tests/system/rrl/ns3/tld3.db-orig bin/tests/system/rrl/ns3/tld3.db
--- bin/tests/system/rrl/ns3/tld3.db-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/ns3/tld3.db 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,26 @@
+; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+;
+; Permission to use, copy, modify, and/or distribute this software for any
+; purpose with or without fee is hereby granted, provided that the above
+; copyright notice and this permission notice appear in all copies.
+;
+; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+; PERFORMANCE OF THIS SOFTWARE.
+
+; $Id$
+
+
+; rate limit response from this zone
+
+$TTL 120
+@ SOA tld3. hostmaster.ns.tld3. ( 1 3600 1200 604800 60 )
+ NS ns
+ NS .
+ns A 10.53.0.3
+
+*.a3 A 192.168.3.3
diff -r -u bin/tests/system/rrl/setup.sh-orig bin/tests/system/rrl/setup.sh
--- bin/tests/system/rrl/setup.sh-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/setup.sh 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,22 @@
+#!/bin/sh
+#
+# Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# $Id$
+
+SYSTEMTESTTOP=..
+. $SYSTEMTESTTOP/conf.sh
+. ./clean.sh
+
diff -r -u bin/tests/system/rrl/tests.sh-orig bin/tests/system/rrl/tests.sh
--- bin/tests/system/rrl/tests.sh-orig 2004-01-01 00:00:00.000000000 +0000
+++ bin/tests/system/rrl/tests.sh 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,210 @@
+# Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+# PERFORMANCE OF THIS SOFTWARE.
+
+# $Id$
+
+# test response rate limiting
+
+SYSTEMTESTTOP=..
+. $SYSTEMTESTTOP/conf.sh
+
+#set -x
+
+ns1=10.53.0.1 # root, defining the others
+ns2=10.53.0.2 # test server
+ns3=10.53.0.3 # secondary test server
+ns7=10.53.0.7 # whitelisted client
+
+USAGE="$0: [-x]"
+while getopts "x" c; do
+ case $c in
+ x) set -x;;
+ *) echo "$USAGE" 1>&2; exit 1;;
+ esac
+done
+shift `expr $OPTIND - 1 || true`
+if test "$#" -ne 0; then
+ echo "$USAGE" 1>&2
+ exit 1
+fi
+# really quit on control-C
+trap 'exit 1' 1 2 15
+
+
+ret=0
+setret () {
+ ret=1
+ echo "$*"
+}
+
+
+# Wait until soon after the start of a second to make results consistent.
+# The start of a second credits a rate limit.
+# This would be far easier in C or by assuming a modern version of perl.
+sec_start () {
+ START=`date`
+ while true; do
+ NOW=`date`
+ if test "$START" != "$NOW"; then
+ return
+ fi
+ $PERL -e 'select(undef, undef, undef, 0.05)' || true
+ done
+}
+
+
+# $1=result name $2=domain name $3=dig options
+digcmd () {
+ OFILE=$1; shift
+ DIG_DOM=$1; shift
+ ARGS="+noadd +noauth +nosearch +time=1 +tries=1 +ignore $* -p 5300 $DIG_DOM @$ns2"
+ #echo I:dig $ARGS 1>&2
+ START=`date +%y%m%d%H%M.%S`
+ RESULT=`$DIG $ARGS 2>&1 | tee $OFILE=TEMP \
+ | sed -n -e 's/^[^;].* \([^ ]\{1,\}\)$/\1/p' \
+ -e 's/;; flags.* tc .*/TC/p' \
+ -e 's/;; .* status: NXDOMAIN.*/NXDOMAIN/p' \
+ -e 's/;; .* status: SERVFAIL.*/SERVFAIL/p' \
+ -e 's/;; connection timed out.*/drop/p' \
+ -e 's/;; communications error to.*/drop/p' \
+ | tr -d '\n'`
+ mv "$OFILE=TEMP" "$OFILE=$RESULT"
+ touch -t $START "$OFILE=$RESULT"
+}
+
+
+# $1=number of tests $2=target domain $3=dig options
+CNT=1
+burst () {
+ BURST_LIMIT=$1; shift
+ BURST_DOM_BASE="$1"; shift
+ while test "$BURST_LIMIT" -ge 1; do
+ if test $CNT -lt 10; then
+ CNT="0$CNT"
+ fi
+ if test $CNT -lt 100; then
+ CNT="0$CNT"
+ fi
+ eval BURST_DOM="$BURST_DOM_BASE"
+ FILE="dig.out-$BURST_DOM-$CNT"
+ rm -f $FILE=*
+ digcmd $FILE $BURST_DOM $* &
+ CNT=`expr $CNT + 1`
+ BURST_LIMIT=`expr "$BURST_LIMIT" - 1`
+ done
+}
+
+
+# $1=domain $2=IP address $3=# of IP addresses $4=TC $5=drop
+# $6=NXDOMAIN $7=SERVFAIL or other errors
+ck_result() {
+ BAD=
+ wait
+ ADDRS=`ls dig.out-$1-*=$2 2>/dev/null | wc -l | tr -d ' '`
+ TC=`ls dig.out-$1-*=TC 2>/dev/null | wc -l | tr -d ' '`
+ DROP=`ls dig.out-$1-*=drop 2>/dev/null | wc -l | tr -d ' '`
+ NXDOMAIN=`ls dig.out-$1-*=NXDOMAIN 2>/dev/null | wc -l | tr -d ' '`
+ SERVFAIL=`ls dig.out-$1-*=SERVFAIL 2>/dev/null | wc -l | tr -d ' '`
+ if test $ADDRS -ne "$3"; then
+ setret "I:$ADDRS instead of $3 $2 responses for $1"
+ BAD=yes
+ fi
+ if test $TC -ne "$4"; then
+ setret "I:$TC instead of $4 truncation responses for $1"
+ BAD=yes
+ fi
+ if test $DROP -ne "$5"; then
+ setret "I:$DROP instead of $5 dropped responses for $1"
+ BAD=yes
+ fi
+ if test $NXDOMAIN -ne "$6"; then
+ setret "I:$NXDOMAIN instead of $6 NXDOMAIN responses for $1"
+ BAD=yes
+ fi
+ if test $SERVFAIL -ne "$7"; then
+ setret "I:$SERVFAIL instead of $7 error responses for $1"
+ BAD=yes
+ fi
+ if test -z "$BAD"; then
+ rm -f dig.out-$1-*
+ fi
+}
+
+
+#########
+sec_start
+
+# basic rate limiting
+burst 3 a1.tld2
+# 1 second delay allows an additional response.
+sleep 1
+burst 21 a1.tld2
+# request 30 different qnames to try a wild card
+burst 30 'x$CNT.a2.tld2'
+
+# IP TC drop NXDOMAIN SERVFAIL
+# check for 24 results
+# including the 1 second delay
+ck_result a1.tld2 192.168.2.1 3 7 14 0 0
+
+# Check the wild card answers.
+# The parent name of the 30 requests is counted.
+ck_result 'x*.a2.tld2' 192.168.2.2 2 9 19 0 0
+
+
+#########
+sec_start
+
+burst 1 'y$CNT.a3.tld3'; wait; burst 20 'y$CNT.a3.tld3'
+burst 20 'z$CNT.a4.tld2'
+
+# Recursion.
+# The first answer is counted separately because it is counted against
+# the rate limit on recursing to the server for a3.tld3. The remaining 20
+# are counted as local responses from the cache.
+ck_result 'y*.a3.tld3' 192.168.3.3 3 6 12 0 0
+
+# NXDOMAIN responses are also limited based on the parent name.
+ck_result 'z*.a4.tld2' x 0 6 12 2 0
+
+
+#########
+sec_start
+
+burst 20 a5.tld2 +tcp
+burst 20 a6.tld2 -b $ns7
+burst 20 a7.tld4
+
+# TCP responses are not rate limited
+ck_result a5.tld2 192.168.2.5 20 0 0 0 0
+
+# whitelisted client is not rate limited
+ck_result a6.tld2 192.168.2.6 20 0 0 0 0
+
+# Errors such as SERVFAIL are rate limited. The numbers are confusing, because
+# other rate limiting can be triggered before SERVFAIL is reached.
+ck_result a7.tld4 192.168.2.1 0 5 13 0 2
+
+
+#########
+sec_start
+
+# all-per-second
+CNT=101
+burst 80 'all$CNT.a8.tld2'
+ck_result 'a*.a8.tld2' 192.168.2.8 70 3 7 0 0
+
+
+echo "I:exit status: $ret"
+exit $ret
diff -r -u doc/arm/Bv9ARM-book.xml-orig doc/arm/Bv9ARM-book.xml
--- doc/arm/Bv9ARM-book.xml-orig 2004-01-01 00:00:00.000000000 +0000
+++ doc/arm/Bv9ARM-book.xml 2004-01-01 00:00:00.000000000 +0000
@@ -4803,6 +4803,24 @@
+
+
+ rate-limit
+
+
+
+ The start, periodic, and final notices of rate limiting
+ of a stream of responses are logged at
+ info severity in this category.
+ Various internal performance data such as expansions
+ of the table is logged debug 1 level and higher.
+ Rate limiting of individual requests
+ is logged in the queries category
+ and can be controlled with the
+ querylog option.
+
+
+
@@ -5334,6 +5352,21 @@
resolver-query-timeout number ;
deny-answer-addresses { address_match_list } except-from { namelist } ;
deny-answer-aliases { namelist } except-from { namelist } ;
+ rate-limit {
+ responses-per-second number ;
+ errors-per-second number ;
+ nxdomains-per-second number ;
+ all-per-second number ;
+ window number ;
+ log-only yes_or_no ;
+ qps-scale number ;
+ IPv4-prefix-length number ;
+ IPv6-prefix-length number ;
+ slip number ;
+ exempt-clients { address_match_list } ;
+ max-table-size number ;
+ min-table-size number ;
+ } ;
response-policy { zone_name
policy given | disabled | passthru | nxdomain | nodata | cname domain
recursive-only yes_or_no max-policy-ttl number ;
@@ -9737,6 +9770,204 @@
48.zz.2.2001.rpz-nsip CNAME .
+
+
+ Rate Limiting
+
+ Excessive essentially identical UDP responses
+ can be discarded by configuring a
+ rate-limit clause in an
+ options statement.
+ This mechanism keeps BIND 9 from being used
+ in amplifying reflection denial of service attacks
+ as well as partially protecting BIND 9 itself from
+ some denial of service attacks.
+ Very short truncated responses can be sent to provide
+ rate-limited responses to legitimate
+ clients within a range of attacked and forged IP addresses,
+ Legitimate clients react to truncated response by retrying
+ with TCP.
+
+
+
+ Rate limiting works by setting
+ responses-per-second
+ to a number of repetitions per second for responses for a given name
+ and record type to a DNS client.
+
+
+
+ Responses-per-second is a limit on
+ identical responses instead of a limit on all responses or
+ even all responses to a single client.
+ 10 identical responses per second is a generous limit except perhaps
+ when many clients are using a single IP address via network
+ address translation (NAT).
+ The default limit of zero specifies an unbounded limit to turn off
+ rate-limiting in a view or to only rate-limit NXDOMAIN or other
+ errors.
+
+
+
+ The notion of "identical responses"
+ and "single DNS client" cannot be simplistic.
+ All responses to a CIDR block with prefix
+ length specified with IPv4-prefix-length
+ (default 24) or IPv6-prefix-length
+ (default 56) are assumed to come from a single DNS client.
+ Requests for a name that result in DNS NXDOMAIN
+ errors are considered identical.
+ This controls some attacks using random names, but
+ accommodates servers that expect many legitimate NXDOMAIN responses
+ such as anti-spam blacklists.
+ By default the limit on NXDOMAIN errors is the same as the
+ responses-per-second value,
+ but it can be set separately with
+ nxdomains-per-second.
+ All requests for all names or types that result in DNS errors
+ such as SERVFAIL and FORMERR (but not NXDOMAIN) are considered
+ identical.
+ This controls attacks using invalid requests or distant,
+ broken authoritative servers.
+ By default the limit on errors is the same as the
+ responses-per-second value,
+ but it can be set separately with
+ errors-per-second.
+
+
+
+ Rate limiting uses a "credit" or "token bucket" scheme.
+ Each identical response has a conceptual account
+ that is given responses-per-second,
+ errors-per-second, and
+ nxdomains-per-second credits every second.
+ A DNS request triggering some desired response debits
+ the account by one.
+ Responses are not sent while the account is negative.
+ The account cannot become more positive than
+ the per-second limit
+ or more negative than window
+ times the per-second limit.
+ A DNS client that sends requests that are not
+ answered can penalized for up to window seconds
+ (default 15).
+
+
+
+ Responses generated from local wildcards are counted and limited
+ as if they were for the parent domain name.
+ This prevents flooding by requesting random.wild.example.com.
+ For similar reasons, NXDOMAIN responses are counted and rate
+ limited their owner name, the nearest valid domain name to the
+ query name with an SOA record.
+
+
+
+ Many attacks using DNS involve UDP requests with forged source
+ addresses.
+ Rate limiting prevents the use of BIND 9 to flood a network
+ with responses to requests with forged source addresses,
+ but could let a third party block responses to legitimate requests.
+ There is a mechanism that can answer some legitimate
+ requests from a client whose address is being forged in a flood.
+ Setting slip to 2 (its default) causes every
+ other UDP request to be answered with a small response
+ claiming that the response would have been truncated.
+ The small size and relative infrequency of the response make
+ it unattractive for abuse of third parties.
+ slip must be between 0 and 10.
+ 0 "slips" or sends no rate limiting truncated responses.
+ Some error responses cannot be replaced with responses
+ with the TC flag, and so are instead
+ leaked at the slip rate.
+
+
+
+ When the approximate query per second rate exceeds
+ the qps-scale value,
+ the responses-per-second,
+ errors-per-second,
+ nxdomains-per-second,
+ and slip values are reduced by the
+ ratio of the current rate to the qps-scale value.
+ This feature can tighten defenses during attacks.
+ For example, with
+ qps-scale 250; responses-per-second 20; and
+ a total query rate of 1000 queries/second for all queries from
+ all DNS clients including via TCP,
+ then the effective responses/second limit changes to
+ (250/1000)*20 or 5.
+ The limits for IP addresses using TCP are not reduced.
+ Responses sent via TCP are not subject to rate limits
+ but are counted to approximate the query per second rate.
+
+
+
+ Communities of DNS clients can be given their own parameters or no
+ rate limiting by putting
+ rate-limit statements in view
+ statements instead of the global option
+ statement.
+ A rate-limit statement in a view replaces
+ instead of being merged with a rate-limit
+ statement among the main options.
+ DNS clients within a view can be exempted from rate limits
+ with the exempt-clients clause.
+
+
+
+ UDP responses of all kinds can be limited with the
+ all-per-second phrase.
+ This rate limiting is similar to the rate limiting offered by
+ firewalls. When performed in a DNS server it is inferior to
+ the other rate-limit forms, because it ignores
+ the contents of responses to a block of IP addresses.
+ The rate limiting provided by
+ responses-per-second,
+ errors-per-second, and
+ nxdomains-per-second on a DNS server
+ is often invisible to the victim of a DNS reflection attack.
+ Unless the forged requests of the attack are the same as the
+ legitimate requests of the victim, the victim's requests are
+ not affected.
+ A all-per-second limit must be
+ at least 4 times as large as the other limits,
+ because single DNS clients often send bursts of legitimate
+ requests.
+ For example, the receipt of a single mail message can prompt
+ requests from an SMTP server for NS, PTR, A, and AAAA records
+ as the incoming SMTP/TCP/IP connection is considered.
+ The SMTP server can need additional NS, A, AAAA, MX, TXT, and SPF
+ records as it considers the STMP Mail From
+ command.
+
+
+
+ The maximum size of the table used to track requests and so
+ rate limit responses is set with max-table-size.
+ Each entry in the table is between 40 and 80 bytes.
+ The default of 10,000 is suitable for a server receiving
+ 5000 DNS requests/second.
+ 10,000 entries require about 1 megabyte.
+ To reduce cold start costs including those in growing the
+ table, min-table-size (default 1000)
+ can set the minimum table size.
+ Enable logging to monitor expansions of the table and inform
+ non-default choices for the initial and maximum table size.
+
+
+
+ Use log-only yes to test rate limiting parameters
+ without actually dropping any requests.
+
+
+
+ Responses dropped by rate limits are included in the
+ RateDropped and QryDropped
+ statistics.
+ Responses that truncated by rate limits are included in
+ RateSlipped and RespTruncated.
+
@@ -14385,6 +14616,32 @@
+
+
+ RateDropped
+
+
+
+
+
+
+ Responses dropped by rate limits.
+
+
+
+
+
+ RateSlipped
+
+
+
+
+
+
+ Responses truncated by rate limits.
+
+
+
diff -r -u lib/dns/Makefile.in-orig lib/dns/Makefile.in
--- lib/dns/Makefile.in-orig 2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/Makefile.in 2004-01-01 00:00:00.000000000 +0000
@@ -66,8 +66,8 @@
portlist.@O@ private.@O@ \
rbt.@O@ rbtdb.@O@ rbtdb64.@O@ rcode.@O@ rdata.@O@ \
rdatalist.@O@ rdataset.@O@ rdatasetiter.@O@ rdataslab.@O@ \
- request.@O@ resolver.@O@ result.@O@ rootns.@O@ rpz.@O@ \
- rriterator.@O@ sdb.@O@ \
+ request.@O@ resolver.@O@ result.@O@ rootns.@O@ \
+ rpz.@O@ rrl.@O@ rriterator.@O@ sdb.@O@ \
sdlz.@O@ soa.@O@ ssu.@O@ ssu_external.@O@ \
stats.@O@ tcpmsg.@O@ time.@O@ timer.@O@ tkey.@O@ \
tsec.@O@ tsig.@O@ ttl.@O@ update.@O@ validator.@O@ \
@@ -93,7 +93,7 @@
name.c ncache.c nsec.c nsec3.c order.c peer.c portlist.c \
rbt.c rbtdb.c rbtdb64.c rcode.c rdata.c rdatalist.c \
rdataset.c rdatasetiter.c rdataslab.c request.c \
- resolver.c result.c rootns.c rpz.c rriterator.c \
+ resolver.c result.c rootns.c rpz.c rrl.c rriterator.c \
sdb.c sdlz.c soa.c ssu.c ssu_external.c \
stats.c tcpmsg.c time.c timer.c tkey.c \
tsec.c tsig.c ttl.c update.c validator.c \
diff -r -u lib/dns/include/dns/log.h-orig lib/dns/include/dns/log.h
--- lib/dns/include/dns/log.h-orig 2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/include/dns/log.h 2004-01-01 00:00:00.000000000 +0000
@@ -43,6 +43,7 @@
#define DNS_LOGCATEGORY_DELEGATION_ONLY (&dns_categories[10])
#define DNS_LOGCATEGORY_EDNS_DISABLED (&dns_categories[11])
#define DNS_LOGCATEGORY_RPZ (&dns_categories[12])
+#define DNS_LOGCATEGORY_RRL (&dns_categories[13])
/* Backwards compatibility. */
#define DNS_LOGCATEGORY_GENERAL ISC_LOGCATEGORY_GENERAL
diff -r -u lib/dns/include/dns/rrl.h-orig lib/dns/include/dns/rrl.h
--- lib/dns/include/dns/rrl.h-orig 2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/include/dns/rrl.h 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+#ifndef DNS_RRL_H
+#define DNS_RRL_H 1
+
+/*
+ * Rate limit DNS responses.
+ */
+
+#include
+
+#include
+#include
+#include
+
+ISC_LANG_BEGINDECLS
+
+
+/*
+ * Memory allocation or other failures.
+ */
+#define DNS_RRL_LOG_FAIL ISC_LOG_WARNING
+/*
+ * dropped or slipped responses.
+ */
+#define DNS_RRL_LOG_DROP ISC_LOG_INFO
+/*
+ * Major events in dropping or slipping.
+ */
+#define DNS_RRL_LOG_DEBUG1 ISC_LOG_DEBUG(3)
+/*
+ * Limit computations.
+ */
+#define DNS_RRL_LOG_DEBUG2 ISC_LOG_DEBUG(4)
+/*
+ * Less interesting.
+ */
+#define DNS_RRL_LOG_DEBUG3 ISC_LOG_DEBUG(9)
+
+
+#define DNS_RRL_LOG_ERR_LEN 64
+#define DNS_RRL_LOG_BUF_LEN (sizeof("would continue limiting") + \
+ DNS_RRL_LOG_ERR_LEN + \
+ sizeof(" responses to ") + \
+ ISC_NETADDR_FORMATSIZE + \
+ sizeof("/128 for IN ") + \
+ DNS_RDATATYPE_FORMATSIZE + \
+ DNS_NAME_FORMATSIZE)
+
+
+typedef struct dns_rrl_hash dns_rrl_hash_t;
+
+/*
+ * Response types.
+ */
+typedef enum {
+ DNS_RRL_RTYPE_FREE,
+ DNS_RRL_RTYPE_QUERY,
+ DNS_RRL_RTYPE_NXDOMAIN,
+ DNS_RRL_RTYPE_ERROR,
+ DNS_RRL_RTYPE_ALL,
+ DNS_RRL_RTYPE_TCP,
+} dns_rrl_rtype_t;
+
+/*
+ * A rate limit bucket key.
+ * This should be small to limit the total size of the database.
+ */
+typedef struct dns_rrl_key dns_rrl_key_t;
+struct dns_rrl_key {
+ isc_uint32_t ip[4];
+ isc_uint32_t qname_hash;
+ dns_rdatatype_t qtype;
+ dns_rrl_rtype_t rtype :3;
+ isc_boolean_t qclass :3;
+ isc_boolean_t ipv6 :1;
+};
+
+/*
+ * A rate-limit entry.
+ * This should be small to limit the total size of the database.
+ * With gcc on ARM, the key should have __attribute((__packed__)) to
+ * avoid padding to a multiple of 8 bytes.
+ */
+typedef struct dns_rrl_entry dns_rrl_entry_t;
+typedef ISC_LIST(dns_rrl_entry_t) dns_rrl_bin_t;
+struct dns_rrl_entry {
+ ISC_LINK(dns_rrl_entry_t) lru;
+ ISC_LINK(dns_rrl_entry_t) hlink;
+ dns_rrl_bin_t *bin;
+ isc_stdtime_t last_used;
+ isc_int32_t responses;
+# define DNS_RRL_MAX_WINDOW 600
+# define DNS_RRL_MAX_RATE (ISC_INT32_MAX / DNS_RRL_MAX_WINDOW)
+ dns_rrl_key_t key;
+ unsigned int slip_cnt :4;
+# define DNS_RRL_MAX_SLIP 10
+ unsigned int log_secs :10;
+# define DNS_RRL_MAX_LOG_SECS 600
+# define DNS_RRL_STOP_LOG_SECS 60
+ isc_boolean_t logged :1;
+ unsigned int log_qname :8;
+# define DNS_RRL_NUM_QNAMES 256
+};
+
+/*
+ * A hash table of rate-limit entries.
+ */
+struct dns_rrl_hash {
+ isc_stdtime_t check_time;
+ int length;
+ dns_rrl_bin_t bins[1];
+};
+
+/*
+ * A block of rate-limit entries.
+ */
+typedef struct dns_rrl_block dns_rrl_block_t;
+struct dns_rrl_block {
+ ISC_LINK(dns_rrl_block_t) link;
+ int size;
+ dns_rrl_entry_t entries[1];
+};
+
+/*
+ * A rate limited qname buffers.
+ */
+typedef struct dns_rrl_qname_buf dns_rrl_qname_buf_t;
+struct dns_rrl_qname_buf {
+ ISC_LINK(dns_rrl_qname_buf_t) link;
+ const dns_rrl_entry_t *e;
+ unsigned int index;
+ dns_fixedname_t qname;
+};
+
+/*
+ * Per-view query rate limit parameters and a pointer to database.
+ */
+typedef struct dns_rrl dns_rrl_t;
+struct dns_rrl {
+ isc_mutex_t lock;
+ isc_mem_t *mctx;
+
+ isc_boolean_t log_only;
+ int responses_per_second;
+ int errors_per_second;
+ int nxdomains_per_second;
+ int all_per_second;
+ int window;
+ int slip;
+ double qps_scale;
+ int max_entries;
+
+ dns_acl_t *exempt;
+
+ int num_entries;
+
+ int qps_responses;
+ isc_stdtime_t qps_time;
+ double qps;
+ int scaled_responses_per_second;
+ int scaled_errors_per_second;
+ int scaled_nxdomains_per_second;
+ int scaled_all_per_second;
+ int scaled_slip;
+
+ isc_stdtime_t prune_time;
+
+ unsigned int probes;
+ unsigned int searches;
+
+ ISC_LIST(dns_rrl_block_t) blocks;
+ ISC_LIST(dns_rrl_entry_t) lru;
+
+ dns_rrl_hash_t *hash;
+ dns_rrl_hash_t *old_hash;
+
+ int ipv4_prefixlen;
+ isc_uint32_t ipv4_mask;
+ int ipv6_prefixlen;
+ isc_uint32_t ipv6_mask[4];
+
+ dns_rrl_entry_t *log_ended;
+ ISC_LIST(dns_rrl_qname_buf_t) qname_free;
+ int num_qnames;
+ dns_rrl_qname_buf_t *qnames[DNS_RRL_NUM_QNAMES];
+};
+
+typedef enum {
+ DNS_RRL_RESULT_OK,
+ DNS_RRL_RESULT_DROP,
+ DNS_RRL_RESULT_SLIP,
+} dns_rrl_result_t;
+
+dns_rrl_result_t
+dns_rrl(dns_view_t *view,
+ const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp,
+ dns_rdataclass_t rdclass, dns_rdatatype_t qtype,
+ dns_name_t *qname, dns_rcode_t rcode, isc_stdtime_t now,
+ isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len);
+
+void
+dns_rrl_view_destroy(dns_view_t *view);
+
+isc_result_t
+dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries);
+
+ISC_LANG_ENDDECLS
+
+#endif /* DNS_RRL_H */
diff -r -u lib/dns/include/dns/view.h-orig lib/dns/include/dns/view.h
--- lib/dns/include/dns/view.h-orig 2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/include/dns/view.h 2004-01-01 00:00:00.000000000 +0000
@@ -73,6 +73,7 @@
#include
#include
+#include
#include
#include
#include
@@ -142,6 +143,7 @@
dns_rbt_t * answeracl_exclude;
dns_rbt_t * denyanswernames;
dns_rbt_t * answernames_exclude;
+ dns_rrl_t * rrl;
isc_boolean_t provideixfr;
isc_boolean_t requestnsid;
dns_ttl_t maxcachettl;
diff -r -u lib/dns/log.c-orig lib/dns/log.c
--- lib/dns/log.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/log.c 2004-01-01 00:00:00.000000000 +0000
@@ -45,6 +45,7 @@
{ "delegation-only", 0 },
{ "edns-disabled", 0 },
{ "rpz", 0 },
+ { "rate-limit", 0 },
{ NULL, 0 }
};
diff -r -u lib/dns/rrl.c-orig lib/dns/rrl.c
--- lib/dns/rrl.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/rrl.c 2004-01-01 00:00:00.000000000 +0000
@@ -0,0 +1,1242 @@
+/*
+ * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC")
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
+ * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+ * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/* $Id$ */
+
+/*! \file */
+
+/*
+ * Rate limit DNS responses.
+ */
+
+/* #define ISC_LIST_CHECKINIT */
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+static void
+log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e,
+ char *log_buf, unsigned int log_buf_len);
+
+
+/*
+ * Get a modulus for a hash function that is tolerably likely to be
+ * relatively prime to most inputs. Of course, we get a prime for for initial
+ * values not larger than the square of the last prime. We often get a prime
+ * after that.
+ * This works well in practice for hash tables up to at least 100
+ * times the square of the last prime and better than a multiplicative hash.
+ */
+static int
+hash_divisor(unsigned int initial) {
+ static isc_uint16_t primes[] = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41,
+ 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97,
+#if 0
+ 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
+ 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
+ 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
+ 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
+ 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
+ 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
+ 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
+ 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
+ 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
+ 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
+ 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
+ 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009,
+#endif
+ };
+ int divisions, tries;
+ unsigned int result;
+ isc_uint16_t *pp, p;
+
+ result = initial;
+
+ if (primes[sizeof(primes)/sizeof(primes[0])-1] >= result) {
+ pp = primes;
+ while (*pp < result)
+ ++pp;
+ return (*pp);
+ }
+
+ if ((result & 1) == 0)
+ ++result;
+
+ divisions = 0;
+ tries = 1;
+ pp = primes;
+ do {
+ p = *pp++;
+ ++divisions;
+ if ((result % p) == 0) {
+ ++tries;
+ result += 2;
+ pp = primes;
+ }
+ } while (pp < &primes[sizeof(primes)/sizeof(primes[0])]);
+
+ if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
+ "%d hash_divisor() divisions in %d tries"
+ " to get %d from %d",
+ divisions, tries, result, initial);
+
+ return (result);
+}
+
+/*
+ * Convert a timestamp to a number of seconds in the past.
+ */
+static inline int
+delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) {
+ int delta;
+
+ delta = now - ts;
+ if (delta >= 0)
+ return (delta);
+
+ /*
+ * The timestamp is in the future. That future might result from
+ * re-ordered requests, because we use timestamps on requests
+ * instead of consulting a clock. Timestamps in the distant future are
+ * assumed to result from clock changes. When the clock changes to
+ * the past, make existing timestamps appear to be in the past.
+ */
+ if (delta < -5)
+ return (now);
+ return (0);
+}
+
+static isc_result_t
+add_rrl_entries(dns_rrl_t *rrl, int new) {
+ unsigned int bsize;
+ dns_rrl_block_t *b;
+ dns_rrl_entry_t *e;
+ double rate;
+ int i;
+
+ if (rrl->num_entries+new >= rrl->max_entries && rrl->max_entries != 0) {
+ if (rrl->num_entries >= rrl->max_entries)
+ return (ISC_R_SUCCESS);
+ new = rrl->max_entries - rrl->num_entries;
+ if (new <= 0)
+ return (ISC_R_NOMEMORY);
+ }
+
+ /*
+ * Try to log expansions so that the user can tune max-table-size
+ * and min-table-size.
+ */
+ if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) &&
+ rrl->hash != NULL ) {
+ rate = rrl->probes;
+ if (rrl->searches != 0)
+ rate /= rrl->searches;
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
+ "increase from %d to %d RRL entries with"
+ " %d bins; average search length %.1f",
+ rrl->num_entries, rrl->num_entries+new,
+ rrl->hash->length, rate);
+ }
+
+ bsize = sizeof(dns_rrl_block_t) + (new-1)*sizeof(dns_rrl_entry_t);
+ b = isc_mem_get(rrl->mctx, bsize);
+ if (b == NULL) {
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
+ "isc_mem_get(%d) failed for RRL entries",
+ bsize);
+ return (ISC_R_NOMEMORY);
+ }
+ memset(b, 0, bsize);
+ b->size = bsize;
+
+ e = b->entries;
+ rrl->log_ended = e;
+ for (i = 0; i < new; ++i, ++e) {
+ ISC_LINK_INIT(e, hlink);
+ ISC_LIST_INITANDAPPEND(rrl->lru, e, lru);
+ }
+ rrl->num_entries += new;
+ ISC_LIST_INITANDAPPEND(rrl->blocks, b, link);
+
+ return (ISC_R_SUCCESS);
+}
+
+static inline dns_rrl_bin_t *
+get_rrl_bin(dns_rrl_hash_t *hash, unsigned int hval) {
+ return (&hash->bins[hval % hash->length]);
+}
+
+static void
+free_old_hash(dns_rrl_t *rrl) {
+ dns_rrl_hash_t *old_hash;
+ dns_rrl_bin_t *old_bin;
+ dns_rrl_entry_t *e;
+
+ old_hash = rrl->old_hash;
+ for (old_bin = &old_hash->bins[0];
+ old_bin < &old_hash->bins[old_hash->length];
+ ++old_bin) {
+ while ((e = ISC_LIST_HEAD(*old_bin)) != NULL) {
+ ISC_LIST_UNLINK(*e->bin, e, hlink);
+ e->bin = NULL;
+ }
+ }
+
+ isc_mem_put(rrl->mctx, old_hash,
+ sizeof(*old_hash)
+ + (old_hash->length-1)*sizeof(old_hash->bins[0]));
+ rrl->old_hash = NULL;
+}
+
+static isc_result_t
+expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) {
+ dns_rrl_hash_t *hash;
+ int old_bins, new_bins, hsize;
+ double rate;
+
+ if (rrl->old_hash != NULL)
+ free_old_hash(rrl);
+
+ /*
+ * Most searches fail and so go to the end of the chain.
+ * Use a small hash table load factor.
+ */
+ old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length;
+ new_bins = old_bins/8 + old_bins;
+ if (new_bins < rrl->num_entries)
+ new_bins = rrl->num_entries;
+ new_bins = hash_divisor(new_bins);
+
+ hsize = sizeof(dns_rrl_hash_t) + (new_bins-1)*sizeof(hash->bins[0]);
+ hash = isc_mem_get(rrl->mctx, hsize);
+ if (hash == NULL) {
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL,
+ "isc_mem_get(%d) failed for"
+ " RRL hash table",
+ hsize);
+ return (ISC_R_NOMEMORY);
+ }
+ memset(hash, 0, hsize);
+ hash->length = new_bins;
+
+ if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1) && old_bins != 0) {
+ rate = rrl->probes;
+ if (rrl->searches != 0)
+ rate /= rrl->searches;
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST,
+ DNS_RRL_LOG_DEBUG1,
+ "increase from %d to %d RRL bins for"
+ " %d entries; average search length %.1f",
+ old_bins, new_bins, rrl->num_entries, rate);
+ }
+
+ rrl->old_hash = rrl->hash;
+ if (rrl->old_hash != NULL)
+ rrl->old_hash->check_time = now;
+ rrl->hash = hash;
+
+ return (ISC_R_SUCCESS);
+}
+
+static void
+rrl_entry_ref(dns_rrl_t *rrl, dns_rrl_entry_t *e, dns_rrl_bin_t *new_bin,
+ int probes, isc_stdtime_t now)
+{
+ /*
+ * Make the entry most recently used.
+ */
+ if (ISC_LIST_HEAD(rrl->lru) != e) {
+ if (e == rrl->log_ended) {
+ if (e->lru.next != NULL)
+ rrl->log_ended = e->lru.next;
+ else
+ rrl->log_ended = e->lru.prev;
+ }
+ ISC_LIST_UNLINK(rrl->lru, e, lru);
+ ISC_LIST_PREPEND(rrl->lru, e, lru);
+ }
+
+ /*
+ * Move the entry to the head of its hash chain.
+ */
+ if (ISC_LIST_HEAD(*new_bin) != e) {
+ if (e->bin != NULL)
+ ISC_LIST_UNLINK(*e->bin, e, hlink);
+ ISC_LIST_PREPEND(*new_bin, e, hlink);
+ e->bin = new_bin;
+ }
+
+ /*
+ * Expand the hash table if it is time and necessary.
+ * This will leave the newly referenced entry in a chain in the
+ * old hash table. It will migrate to the new hash table the next
+ * time it is used or be cut loose when the old hash table is destroyed.
+ */
+ rrl->probes += probes;
+ ++rrl->searches;
+ if (rrl->searches > 100 &&
+ delta_rrl_time(rrl->hash->check_time, now) >= 10) {
+ if (rrl->probes/rrl->searches > 2)
+ expand_rrl_hash(rrl, now);
+ rrl->hash->check_time = now;
+ rrl->probes = 0;
+ rrl->searches = 0;
+ }
+}
+
+static inline isc_boolean_t
+rrl_key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) {
+ return (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0 ? ISC_TRUE : ISC_FALSE);
+}
+
+/*
+ * Construct the database key.
+ * Use a hash of the DNS query name to save space in the database.
+ * Collisions result in legitimate rate limiting responses for one
+ * query name also limiting responses for other names to the
+ * same client. This is rare and benign enough given the large
+ * space costs compared to keeping the entire name in the database
+ * entry or the time costs of dynamic allocation.
+ */
+static isc_uint32_t
+make_key(dns_rrl_t *rrl, dns_rrl_key_t *key,
+ const isc_sockaddr_t *client_addr,
+ dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass,
+ dns_rrl_rtype_t rtype)
+{
+ isc_uint32_t hval;
+ int i;
+
+ memset(key, 0, sizeof(*key));
+
+ key->rtype = rtype;
+ hval = rtype;
+ if (rtype == DNS_RRL_RTYPE_QUERY ||
+ rtype == DNS_RRL_RTYPE_NXDOMAIN) {
+ /*
+ * Map dns_rdataclass_reserved0 = 0 -> 2
+ * dns_rdataclass_in = 1 -> 3
+ * dns_rdataclass_chaos = 3 -> 5
+ * dns_rdataclass_hs = 4 -> 6
+ * dns_rdataclass_none = 254 -> 0
+ * dns_rdataclass_any = 255 -> 1
+ * and trust that there will never be significant changes.
+ */
+ key->qclass = qclass+2;
+ key->qtype = qtype;
+ hval += qtype<<8;
+ }
+
+ if (qname != NULL && qname->labels != 0) {
+ /*
+ * Ignore the first label of wildcards.
+ */
+ if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 &&
+ (i = dns_name_countlabels(qname)) > 1) {
+ dns_fixedname_t suffixf;
+ dns_name_t *suffix;
+
+ dns_fixedname_init(&suffixf);
+ suffix = dns_fixedname_name(&suffixf);
+ dns_name_split(qname, i-1, NULL, suffix);
+ key->qname_hash = dns_name_hashbylabel(suffix,
+ ISC_FALSE);
+ } else {
+ key->qname_hash = dns_name_hashbylabel(qname,
+ ISC_FALSE);
+ }
+ hval += key->qname_hash;
+ }
+
+ switch (client_addr->type.sa.sa_family) {
+ case AF_INET:
+ key->ip[3] = (client_addr->type.sin.sin_addr.s_addr &
+ rrl->ipv4_mask);
+ hval = (hval>>31) + (hval<<1) + key->ip[3];
+ break;
+ case AF_INET6:
+ key->ipv6 = ISC_TRUE;
+ memcpy(key->ip, &client_addr->type.sin6.sin6_addr,
+ sizeof(key->ip));
+ for (i = 0; i < 4; ++i) {
+ key->ip[i] &= rrl->ipv6_mask[i];
+ hval = (hval>>31) + (hval<<1) + key->ip[i];
+ }
+ break;
+ }
+
+ return (hval);
+}
+
+static inline int
+response_balance(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) {
+ int balance;
+
+ balance = e->responses;
+ if (balance < 0)
+ switch (e->key.rtype) {
+ case DNS_RRL_RTYPE_QUERY:
+ balance += age * rrl->responses_per_second;
+ break;
+ case DNS_RRL_RTYPE_NXDOMAIN:
+ balance += age * rrl->nxdomains_per_second;
+ break;
+ case DNS_RRL_RTYPE_ERROR:
+ balance += age * rrl->errors_per_second;
+ break;
+ case DNS_RRL_RTYPE_ALL:
+ balance += age * rrl->all_per_second;
+ break;
+ case DNS_RRL_RTYPE_TCP:
+ balance += age;
+ break;
+ default:
+ INSIST(0);
+ }
+ return (balance);
+}
+
+/*
+ * Search for an entry for a response and optionally create it.
+ */
+static dns_rrl_entry_t *
+get_rrl_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr,
+ dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass,
+ dns_rrl_rtype_t rtype, isc_stdtime_t now, isc_boolean_t create,
+ char *log_buf, unsigned int log_buf_len)
+{
+ dns_rrl_key_t key;
+ isc_uint32_t hval;
+ dns_rrl_hash_t *hash, *old_hash;
+ dns_rrl_entry_t *e;
+ dns_rrl_bin_t *new_bin, *old_bin;
+ int probes, age;
+
+ hval = make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype);
+
+ /*
+ * Look for the entry in the current hash table.
+ */
+ hash = rrl->hash;
+ new_bin = get_rrl_bin(hash, hval);
+ for (e = ISC_LIST_HEAD(*new_bin), probes = 1;
+ e != NULL;
+ e = ISC_LIST_NEXT(e, hlink), ++probes) {
+ if (rrl_key_cmp(&e->key, &key)) {
+ rrl_entry_ref(rrl, e, new_bin, probes, now);
+ return (e);
+ }
+ }
+
+ /*
+ * Look in the old hash table if we did not find the entry.
+ */
+ old_hash = rrl->old_hash;
+ if (old_hash != NULL) {
+ old_bin = get_rrl_bin(old_hash, hval);
+ for (e = ISC_LIST_HEAD(*old_bin);
+ e != NULL;
+ e = ISC_LIST_NEXT(e, hlink)) {
+ if (rrl_key_cmp(&e->key, &key)) {
+ rrl_entry_ref(rrl, e, new_bin, probes, now);
+ return (e);
+ }
+ }
+
+ /*
+ * Discard prevous hash table when its entries are all old.
+ */
+ if (delta_rrl_time(old_hash->check_time, now) > rrl->window)
+ free_old_hash(rrl);
+ }
+
+ if (!create)
+ return (NULL);
+
+ /*
+ * The block does not already exist, so create it.
+ * Unroll the first circuit of the loop to cover most cases.
+ * Immediately a new create entry if the oldest is fresh.
+ * Preserve penalized entries.
+ * Try to make more entries if none are idle.
+ * Steal the oldest entry if we cannot make more.
+ */
+ e = ISC_LIST_TAIL(rrl->lru);
+ age = delta_rrl_time(e->last_used, now);
+ if (age <= rrl->window) {
+ for (;;) {
+ if (age <= 1) {
+ add_rrl_entries(rrl,
+ ISC_MIN((rrl->num_entries+1)/2,
+ 1000));
+ e = ISC_LIST_TAIL(rrl->lru);
+ break;
+ }
+ if (response_balance(rrl, e, age) >= 0)
+ break;
+
+ e = e->lru.prev;
+ if (e == NULL) {
+ add_rrl_entries(rrl,
+ ISC_MIN((rrl->num_entries+1)/2,
+ 1000));
+ e = ISC_LIST_TAIL(rrl->lru);
+ break;
+ }
+ age = delta_rrl_time(e->last_used, now);
+ }
+ }
+ if (e->logged)
+ log_end(rrl, e, log_buf, log_buf_len);
+ e->key = key;
+ e->last_used = 0;
+ rrl_entry_ref(rrl, e, new_bin, probes, now);
+ return (e);
+}
+
+static inline dns_rrl_result_t
+debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale,
+ const isc_sockaddr_t *client_addr, isc_stdtime_t now,
+ char *log_buf, unsigned int log_buf_len)
+{
+ int rate, new_rate, *ratep, slip, new_slip, age, log_secs, min;
+ const char *rate_str;
+ dns_rrl_entry_t const *credit_e;
+ dns_rrl_result_t rrl_result;
+
+ /*
+ * Pick the rate counter. Optionally adjust the rates by the estimated
+ * query/second rate.
+ */
+ switch (e->key.rtype) {
+ case DNS_RRL_RTYPE_QUERY:
+ rate = rrl->responses_per_second;
+ ratep = &rrl->scaled_responses_per_second;
+ break;
+ case DNS_RRL_RTYPE_NXDOMAIN:
+ rate = rrl->nxdomains_per_second;
+ ratep = &rrl->scaled_nxdomains_per_second;
+ break;
+ case DNS_RRL_RTYPE_ERROR:
+ rate = rrl->errors_per_second;
+ ratep = &rrl->scaled_errors_per_second;
+ break;
+ case DNS_RRL_RTYPE_ALL:
+ rate = rrl->all_per_second;
+ ratep = &rrl->scaled_all_per_second;
+ break;
+ default:
+ INSIST(0);
+ }
+ if (rate == 0)
+ return (DNS_RRL_RESULT_OK);
+
+ if (scale < 1.0) {
+ /*
+ * The limit for clients that have used TCP is not scaled.
+ */
+ credit_e = get_rrl_entry(rrl, client_addr,
+ dns_rdatatype_none, NULL, 0,
+ DNS_RRL_RTYPE_TCP, now, ISC_FALSE,
+ log_buf, log_buf_len);
+ if (credit_e != NULL) {
+ age = delta_rrl_time(credit_e->last_used, now);
+ if (age < rrl->window)
+ scale = 1.0;
+ }
+ }
+ if (scale < 1.0) {
+ new_rate = rate * scale;
+ if (new_rate < 1)
+ new_rate = 1;
+ if (*ratep != new_rate) {
+ if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
+ switch (e->key.rtype) {
+ case DNS_RRL_RTYPE_QUERY:
+ rate_str = "responses-per-second";
+ break;
+ case DNS_RRL_RTYPE_NXDOMAIN:
+ rate_str = "nxdomains-per-second";
+ break;
+ case DNS_RRL_RTYPE_ERROR:
+ rate_str = "errors-per-second";
+ break;
+ case DNS_RRL_RTYPE_ALL:
+ rate_str = "all-per-second";
+ break;
+ default:
+ INSIST(0);
+ }
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST,
+ DNS_RRL_LOG_DEBUG1,
+ "%d qps scaled %s by %.2f"
+ " from %d to %d",
+ (int)qps, rate_str, scale,
+ rate, new_rate);
+ }
+ rate = new_rate;
+ *ratep = rate;
+ }
+ }
+
+ min = -rrl->window * rate;
+
+ /*
+ * Treat time jumps into the past as no time.
+ * Treat entries older than the window as if they were just created
+ * Credit other entries.
+ */
+ rrl_result = DNS_RRL_RESULT_DROP;
+ age = delta_rrl_time(e->last_used, now);
+ if (age > 0) {
+ /*
+ * Credit tokens earned during elapsed time.
+ */
+ if (age > rrl->window) {
+ e->responses = rate;
+ e->slip_cnt = 0;
+ } else {
+ e->responses += rate*age;
+ if (e->responses > rate) {
+ e->responses = rate;
+ e->slip_cnt = 0;
+ }
+ }
+ /*
+ * Find the seconds since last log message without overflowing
+ * small counter.
+ * This counter should be reset when an entry is create (or
+ * recycled) and after at least one second without limiting.
+ * It is not necessarily reset when some requests are answered
+ * provided other requests continue to be dropped or slipped.
+ * This can happen when the request rate is just at the limit.
+ */
+ if (e->logged) {
+ log_secs = e->log_secs;
+ log_secs += age;
+ if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0)
+ log_secs = DNS_RRL_MAX_LOG_SECS;
+ e->log_secs = log_secs;
+ }
+ }
+ e->last_used = now;
+
+ if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3))
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
+ "rrl age=%d responses=%d", age, e->responses);
+
+ /*
+ * Debit the entry for this response.
+ */
+ if (--e->responses >= 0)
+ return (DNS_RRL_RESULT_OK);
+
+ if (e->responses < min)
+ e->responses = min;
+
+ /*
+ * Drop this response unless it should leak.
+ */
+ slip = rrl->slip;
+ if (slip > 2 && scale < 1.0) {
+ new_slip *= scale;
+ if (new_slip < 2)
+ new_slip = 2;
+ if (rrl->scaled_slip != new_slip) {
+ if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1))
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST,
+ DNS_RRL_LOG_DEBUG1,
+ "%d qps scaled slip"
+ " by %.2f from %d to %d",
+ (int)qps, scale,
+ slip, new_slip);
+ slip = new_slip;
+ rrl->scaled_slip = slip;
+ }
+ }
+ if (slip != 0 && ++e->slip_cnt >= slip) {
+ e->slip_cnt = 0;
+ return (DNS_RRL_RESULT_SLIP);
+ }
+
+ return (rrl_result);
+}
+
+static inline dns_rrl_qname_buf_t *
+get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) {
+ dns_rrl_qname_buf_t *qbuf;
+
+ qbuf = rrl->qnames[e->log_qname];
+ if (qbuf == NULL || qbuf->e != e)
+ return (NULL);
+ return (qbuf);
+}
+
+static inline void
+free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) {
+ dns_rrl_qname_buf_t *qbuf;
+
+ qbuf = get_qname(rrl, e);
+ if (qbuf != NULL) {
+ qbuf->e = NULL;
+ ISC_LIST_APPEND(rrl->qname_free, qbuf, link);
+ }
+}
+
+static void
+add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len)
+{
+ isc_region_t region;
+
+ isc_buffer_availableregion(lb, ®ion);
+ if (str_len >= region.length) {
+ if (region.length <= 0)
+ return;
+ str_len = region.length;
+ }
+ memcpy(region.base, str, str_len);
+ isc_buffer_add(lb, str_len);
+}
+
+#define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1)
+
+/*
+ * Build strings for the logs
+ */
+static void
+make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e,
+ const char *str1, const char *str2, isc_boolean_t plural,
+ dns_rrl_result_t rrl_result,
+ dns_name_t *qname, isc_boolean_t save_qname, dns_rcode_t rcode,
+ char *log_buf, unsigned int log_buf_len)
+{
+ isc_buffer_t lb;
+ dns_rrl_qname_buf_t *qbuf;
+ isc_netaddr_t cidr;
+ char strbuf[ISC_MAX(sizeof("/123"), sizeof(" (12345678)"))];
+ isc_result_t msg_result;
+
+ if (log_buf_len <= 1) {
+ if (log_buf_len == 1)
+ log_buf[0] = '\0';
+ return;
+ }
+ isc_buffer_init(&lb, log_buf, log_buf_len-1);
+
+ if (str1 != NULL)
+ add_log_str(&lb, str1, strlen(str1));
+ if (str2 != NULL)
+ add_log_str(&lb, str2, strlen(str2));
+
+ switch (rrl_result) {
+ case DNS_RRL_RESULT_OK:
+ break;
+ case DNS_RRL_RESULT_DROP:
+ ADD_LOG_CSTR(&lb, "drop ");
+ break;
+ case DNS_RRL_RESULT_SLIP:
+ ADD_LOG_CSTR(&lb, "slip ");
+ break;
+ default:
+ INSIST(0);
+ break;
+ }
+
+
+ switch (e->key.rtype) {
+ case DNS_RRL_RTYPE_QUERY:
+ case DNS_RRL_RTYPE_ALL:
+ break;
+ case DNS_RRL_RTYPE_NXDOMAIN:
+ ADD_LOG_CSTR(&lb, "NXDOMAIN ");
+ break;
+ case DNS_RRL_RTYPE_ERROR:
+ if (rcode == dns_rcode_noerror) {
+ ADD_LOG_CSTR(&lb, "error ");
+ } else {
+ msg_result = dns_rcode_totext(rcode, &lb);
+ if (msg_result == ISC_R_SUCCESS) {
+ ADD_LOG_CSTR(&lb, " ");
+ } else {
+ ADD_LOG_CSTR(&lb, "UNKNOWN RCODE ");
+ }
+ }
+ break;
+ default:
+ INSIST(0);
+ }
+
+ if (plural)
+ ADD_LOG_CSTR(&lb, "responses to ");
+ else
+ ADD_LOG_CSTR(&lb, "response to ");
+
+ memset(&cidr, 0, sizeof(cidr));
+ if (e->key.ipv6) {
+ snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen);
+ cidr.family = AF_INET6;
+ memcpy(&cidr.type.in6, e->key.ip, sizeof(cidr.type.in6));
+ } else {
+ snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen);
+ cidr.family = AF_INET;
+ cidr.type.in.s_addr = e->key.ip[3];
+ }
+ msg_result = isc_netaddr_totext(&cidr, &lb);
+ if (msg_result != ISC_R_SUCCESS)
+ ADD_LOG_CSTR(&lb, "?");
+ add_log_str(&lb, strbuf, strlen(strbuf));
+
+ if (e->key.rtype == DNS_RRL_RTYPE_QUERY ||
+ e->key.rtype == DNS_RRL_RTYPE_NXDOMAIN) {
+ qbuf = get_qname(rrl, e);
+ if (save_qname && qbuf == NULL &&
+ qname != NULL && dns_name_isabsolute(qname)) {
+ /*
+ * Capture the qname for the "stop limiting" message.
+ */
+ qbuf = ISC_LIST_TAIL(rrl->qname_free);
+ if (qbuf != NULL) {
+ ISC_LIST_UNLINK(rrl->qname_free, qbuf, link);
+ } else if (rrl->num_qnames < DNS_RRL_NUM_QNAMES) {
+ qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf));
+ if (qbuf != NULL) {
+ memset(qbuf, 0, sizeof(*qbuf));
+ qbuf->index = rrl->num_qnames;
+ rrl->qnames[rrl->num_qnames++] = qbuf;
+ } else {
+ isc_log_write(dns_lctx,
+ DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST,
+ DNS_RRL_LOG_FAIL,
+ "isc_mem_get(%d)"
+ " failed for RRL qname",
+ (int)sizeof(*qbuf));
+ }
+ }
+ if (qbuf != NULL) {
+ e->log_qname = qbuf->index;
+ qbuf->e = e;
+ dns_fixedname_init(&qbuf->qname);
+ dns_name_copy(qname,
+ dns_fixedname_name(&qbuf->qname),
+ NULL);
+ }
+ }
+ if (qbuf != NULL)
+ qname = dns_fixedname_name(&qbuf->qname);
+ if (qname != NULL) {
+ ADD_LOG_CSTR(&lb, " for ");
+ dns_name_totext(qname, ISC_TRUE, &lb);
+ ADD_LOG_CSTR(&lb, " ");
+ } else {
+ ADD_LOG_CSTR(&lb, " for (?) ");
+ }
+ dns_rdataclass_totext(e->key.qclass-2, &lb);
+ ADD_LOG_CSTR(&lb, " ");
+ dns_rdatatype_totext(e->key.qtype, &lb);
+ snprintf(strbuf, sizeof(strbuf), " (%08x)", e->key.qname_hash);
+ add_log_str(&lb, strbuf, strlen(strbuf));
+ }
+
+ /*
+ * We saved room for '\0'.
+ */
+ log_buf[isc_buffer_usedlength(&lb)] = '\0';
+}
+
+static void
+log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e,
+ char *log_buf, unsigned int log_buf_len)
+{
+ if (e->logged) {
+ make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
+ "stop limiting ", ISC_TRUE,
+ DNS_RRL_RESULT_OK, NULL, ISC_FALSE,
+ dns_rcode_noerror, log_buf, log_buf_len);
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
+ "%s", log_buf);
+ free_qname(rrl, e);
+ e->logged = ISC_FALSE;
+ }
+}
+
+/*
+ * Log some messages for streams that have stopped being rate limited
+ * or really for buckets that are now idle after having done something.
+ */
+static void
+prune_qnames(dns_rrl_t *rrl, isc_stdtime_t now,
+ char *log_buf, unsigned int log_buf_len)
+{
+ dns_rrl_entry_t *e, *e_prev;
+ isc_boolean_t move_ptr;
+ int cnt, age;
+
+ move_ptr = ISC_TRUE;
+ cnt = 8;
+
+ for (e = rrl->log_ended; e != NULL; e = e->lru.prev) {
+ e_prev = e;
+ if (!e->logged)
+ continue;
+
+ age = delta_rrl_time(e->last_used, now);
+ if (age <= rrl->window) {
+ rrl->prune_time = now;
+ break;
+ }
+
+ if (age < DNS_RRL_STOP_LOG_SECS ||
+ response_balance(rrl, e, age) < 0) {
+ move_ptr = ISC_FALSE;
+ continue;
+ }
+
+ log_end(rrl, e, log_buf, log_buf_len);
+
+ /*
+ * Do not log many messages at once to avoid stalling real work.
+ */
+ if (--cnt <= 0)
+ break;
+ }
+ if (e == NULL)
+ rrl->prune_time = now;
+ if (move_ptr)
+ rrl->log_ended = e_prev;
+}
+
+/*
+ * Main rate limit interface.
+ */
+dns_rrl_result_t
+dns_rrl(dns_view_t *view,
+ const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp,
+ dns_rdataclass_t qclass, dns_rdatatype_t qtype,
+ dns_name_t *qname, dns_rcode_t rcode, isc_stdtime_t now,
+ isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len)
+{
+ dns_rrl_t *rrl;
+ dns_rrl_rtype_t rtype;
+ dns_rrl_entry_t *e;
+ isc_netaddr_t netclient;
+ int secs;
+ double qps, scale;
+ int exempt_match;
+ isc_result_t result;
+ dns_rrl_result_t rrl_result;
+
+ INSIST(log_buf != NULL && log_buf_len > 0);
+
+ rrl = view->rrl;
+ if (rrl->exempt != NULL) {
+ isc_netaddr_fromsockaddr(&netclient, client_addr);
+ result = dns_acl_match(&netclient, NULL, rrl->exempt,
+ &view->aclenv, &exempt_match, NULL);
+ if (result == ISC_R_SUCCESS && exempt_match > 0)
+ return (DNS_RRL_RESULT_OK);
+ }
+
+ LOCK(&rrl->lock);
+
+ /*
+ * Estimate total query per second rate when scaling by qps.
+ */
+ if (rrl->qps_scale == 0) {
+ qps = 0.0;
+ scale = 1.0;
+ } else {
+ ++rrl->qps_responses;
+ secs = delta_rrl_time(rrl->qps_time, now);
+ if (secs <= 0) {
+ qps = rrl->qps;
+ } else {
+ qps = (1.0*rrl->qps_responses) / secs;
+ if (secs >= rrl->window) {
+ if (isc_log_wouldlog(dns_lctx,
+ DNS_RRL_LOG_DEBUG3))
+ isc_log_write(dns_lctx,
+ DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST,
+ DNS_RRL_LOG_DEBUG3,
+ "%d responses/%d seconds"
+ " = %d qps",
+ rrl->qps_responses, secs,
+ (int)qps);
+ rrl->qps = qps;
+ rrl->qps_responses = 0;
+ rrl->qps_time = now;
+ } else if (qps < rrl->qps) {
+ qps = rrl->qps;
+ }
+ }
+ scale = rrl->qps_scale / qps;
+ }
+
+ if (rrl->prune_time != now)
+ prune_qnames(rrl, now, log_buf, log_buf_len);
+
+ /*
+ * Notice TCP responses when scaling limits by qps.
+ * Do not try to rate limit TCP responses.
+ */
+ if (is_tcp) {
+ if (scale < 1.0) {
+ e = get_rrl_entry(rrl, client_addr,
+ dns_rdatatype_none, NULL, 0,
+ DNS_RRL_RTYPE_TCP, now, ISC_TRUE,
+ log_buf, log_buf_len);
+ if (e != NULL) {
+ e->responses = -(rrl->window+1);
+ e->last_used = now;
+ }
+ }
+ UNLOCK(&rrl->lock);
+ return (ISC_R_SUCCESS);
+ }
+
+ /*
+ * Find the right kind of entry, creating it if necessary.
+ * If that is impossible, then nothing more can be done
+ */
+ if (rcode == dns_rcode_noerror)
+ rtype = DNS_RRL_RTYPE_QUERY;
+ else if (rcode == dns_rcode_nxdomain)
+ rtype = DNS_RRL_RTYPE_NXDOMAIN;
+ else
+ rtype = DNS_RRL_RTYPE_ERROR;
+ e = get_rrl_entry(rrl, client_addr, qtype, qname, qclass, rtype,
+ now, ISC_TRUE, log_buf, log_buf_len);
+ if (e == NULL) {
+ UNLOCK(&rrl->lock);
+ return (DNS_RRL_RESULT_OK);
+ }
+
+ if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
+ /*
+ * Do not worry about speed or releasing the lock.
+ * This message appears before messages from debit_rrl_entry().
+ */
+ make_log_buf(rrl, e, "consider limiting ", NULL, ISC_FALSE,
+ DNS_RRL_RESULT_OK, qname, ISC_FALSE,
+ rcode, log_buf, log_buf_len);
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
+ "%s", log_buf);
+ }
+
+ rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now,
+ log_buf, log_buf_len);
+
+ if (rrl->all_per_second != 0) {
+ /*
+ * We must debit the all-per-second token bucket if we have
+ * an all-per-second limit for the IP address.
+ * The all-per-second limit determines the log message
+ * when both limits are hit.
+ */
+ dns_rrl_entry_t *e_all;
+ dns_rrl_result_t rrl_all_result;
+
+ e_all = get_rrl_entry(rrl, client_addr,
+ dns_rdatatype_none, NULL, 0,
+ DNS_RRL_RTYPE_ALL, now, ISC_TRUE,
+ log_buf, log_buf_len);
+ if (e_all == NULL) {
+ UNLOCK(&rrl->lock);
+ return (DNS_RRL_RESULT_OK);
+ }
+ rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale,
+ client_addr, now,
+ log_buf, log_buf_len);
+ if (rrl_all_result != DNS_RRL_RESULT_OK) {
+ int level;
+
+ e = e_all;
+ if (rrl_result == DNS_RRL_RESULT_OK)
+ level = DNS_RRL_LOG_DEBUG2;
+ else
+ level = DNS_RRL_LOG_DEBUG1;
+ rrl_result = rrl_all_result;
+ if (isc_log_wouldlog(dns_lctx, level)) {
+ make_log_buf(rrl, e,
+ "prefer all-per-second limiting ",
+ NULL, ISC_TRUE, DNS_RRL_RESULT_OK,
+ NULL, ISC_FALSE, dns_rcode_noerror,
+ log_buf, log_buf_len);
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, level,
+ "%s", log_buf);
+ }
+ }
+ }
+
+ if (rrl_result == DNS_RRL_RESULT_OK) {
+ UNLOCK(&rrl->lock);
+ return (DNS_RRL_RESULT_OK);
+ }
+
+ /*
+ * Log occassionally in the rate-limit category.
+ */
+ if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) &&
+ isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) {
+ make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
+ e->logged ? "continue limiting " : "limit ",
+ ISC_TRUE, DNS_RRL_RESULT_OK,
+ qname, ISC_TRUE, rcode, log_buf, log_buf_len);
+ e->logged = ISC_TRUE;
+ e->log_secs = 0;
+ /*
+ * Avoid holding the lock.
+ */
+ if (!wouldlog) {
+ UNLOCK(&rrl->lock);
+ e = NULL;
+ }
+ isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
+ DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
+ "%s", log_buf);
+ }
+
+ /*
+ * Make a log message for the caller.
+ */
+ if (wouldlog)
+ make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
+ NULL, ISC_FALSE, rrl_result,
+ qname, ISC_FALSE, rcode, log_buf, log_buf_len);
+
+ if (e != NULL) {
+ /*
+ * Do not save the qname unless we might needed it for
+ * the ending log message.
+ */
+ if (!e->logged)
+ free_qname(rrl, e);
+ UNLOCK(&rrl->lock);
+ }
+ return (rrl_result);
+}
+
+void
+dns_rrl_view_destroy(dns_view_t *view) {
+ dns_rrl_t *rrl;
+ dns_rrl_block_t *b;
+ dns_rrl_hash_t *h;
+ int i;
+
+ rrl = view->rrl;
+ if (rrl == NULL)
+ return;
+ view->rrl = NULL;
+
+ /*
+ * Assume the caller takes care of locking the view and anything else.
+ */
+ do {
+ char log_buf[DNS_RRL_LOG_BUF_LEN];
+
+ prune_qnames(rrl, rrl->prune_time+DNS_RRL_MAX_WINDOW+1,
+ log_buf, sizeof(log_buf));
+ } while (rrl->log_ended->lru.prev != NULL);
+
+ for (i = 0; i < DNS_RRL_NUM_QNAMES; ++i) {
+ if (rrl->qnames[i] == NULL)
+ break;
+ isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i]));
+ }
+
+ if (rrl->exempt != NULL)
+ dns_acl_detach(&rrl->exempt);
+
+ DESTROYLOCK(&rrl->lock);
+
+ while (!ISC_LIST_EMPTY(rrl->blocks)) {
+ b = ISC_LIST_HEAD(rrl->blocks);
+ ISC_LIST_UNLINK(rrl->blocks, b, link);
+ isc_mem_put(rrl->mctx, b, b->size);
+ }
+
+ h = rrl->hash;
+ if (h != NULL)
+ isc_mem_put(rrl->mctx, h,
+ sizeof(*h)+(h->length-1)*sizeof(h->bins[0]));
+
+ h = rrl->old_hash;
+ if (h != NULL)
+ isc_mem_put(rrl->mctx, h,
+ sizeof(*h)+(h->length-1)*sizeof(h->bins[0]));
+
+ isc_mem_put(rrl->mctx, rrl, sizeof(*rrl));
+}
+
+isc_result_t
+dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) {
+ dns_rrl_t *rrl;
+ isc_result_t result;
+
+ *rrlp = NULL;
+
+ rrl = isc_mem_get(view->mctx, sizeof(*rrl));
+ if (rrl == NULL)
+ return (ISC_R_NOMEMORY);
+ memset(rrl, 0, sizeof(*rrl));
+ rrl->mctx = view->mctx;
+ result = isc_mutex_init(&rrl->lock);
+ if (result != ISC_R_SUCCESS) {
+ isc_mem_put(view->mctx, rrl, sizeof(*rrl));
+ return (result);
+ }
+
+ view->rrl = rrl;
+
+ result = add_rrl_entries(rrl, min_entries);
+ if (result != ISC_R_SUCCESS) {
+ dns_rrl_view_destroy(view);
+ return (result);
+ }
+ result = expand_rrl_hash(rrl, 0);
+ if (result != ISC_R_SUCCESS) {
+ dns_rrl_view_destroy(view);
+ return (result);
+ }
+
+ *rrlp = rrl;
+ return (ISC_R_SUCCESS);
+}
diff -r -u lib/dns/view.c-orig lib/dns/view.c
--- lib/dns/view.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ lib/dns/view.c 2004-01-01 00:00:00.000000000 +0000
@@ -48,6 +48,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -181,6 +182,7 @@
view->answeracl_exclude = NULL;
view->denyanswernames = NULL;
view->answernames_exclude = NULL;
+ view->rrl = NULL;
view->provideixfr = ISC_TRUE;
view->maxcachettl = 7 * 24 * 3600;
view->maxncachettl = 3 * 3600;
@@ -331,9 +333,11 @@
dns_acache_detach(&view->acache);
}
dns_rpz_view_destroy(view);
+ dns_rrl_view_destroy(view);
#else
INSIST(view->acache == NULL);
INSIST(ISC_LIST_EMPTY(view->rpz_zones));
+ INSIST(view->rrl == NULL);
#endif
if (view->requestmgr != NULL)
dns_requestmgr_detach(&view->requestmgr);
diff -r -u lib/isccfg/namedconf.c-orig lib/isccfg/namedconf.c
--- lib/isccfg/namedconf.c-orig 2004-01-01 00:00:00.000000000 +0000
+++ lib/isccfg/namedconf.c 2004-01-01 00:00:00.000000000 +0000
@@ -1244,6 +1244,39 @@
};
+/*
+ * rate-limit
+ */
+static cfg_clausedef_t rrl_clauses[] = {
+ { "responses-per-second", &cfg_type_uint32, 0 },
+ { "errors-per-second", &cfg_type_uint32, 0 },
+ { "nxdomains-per-second", &cfg_type_uint32, 0 },
+ { "responses-per-second", &cfg_type_uint32, 0 },
+ { "all-per-second", &cfg_type_uint32, 0 },
+ { "slip", &cfg_type_uint32, 0 },
+ { "window", &cfg_type_uint32, 0 },
+ { "log-only", &cfg_type_boolean, 0 },
+ { "qps-scale", &cfg_type_uint32, 0 },
+ { "IPv4-prefix-length", &cfg_type_uint32, 0 },
+ { "IPv6-prefix-length", &cfg_type_uint32, 0 },
+ { "exempt-clients", &cfg_type_bracketed_aml, 0 },
+ { "max-table-size", &cfg_type_uint32, 0 },
+ { "min-table-size", &cfg_type_uint32, 0 },
+ { NULL, NULL, 0 }
+};
+
+static cfg_clausedef_t *rrl_clausesets[] = {
+ rrl_clauses,
+ NULL
+};
+
+static cfg_type_t cfg_type_rrl = {
+ "rate-limit", cfg_parse_map, cfg_print_map, cfg_doc_map,
+ &cfg_rep_map, rrl_clausesets
+};
+
+
+
/*%
* dnssec-lookaside
*/
@@ -1397,6 +1430,7 @@
CFG_CLAUSEFLAG_NOTCONFIGURED },
#endif
{ "response-policy", &cfg_type_rpz, 0 },
+ { "rate-limit", &cfg_type_rrl, 0 },
{ NULL, NULL, 0 }
};
diff -r -u version-orig version
--- version-orig 2004-01-01 00:00:00.000000000 +0000
+++ version 2004-01-01 00:00:00.000000000 +0000
@@ -5,6 +5,6 @@
#
MAJORVER=9
MINORVER=9
-PATCHVER=2
+PATCHVER=2-vjs287.12
RELEASETYPE=
RELEASEVER=