diff -r -u bin/named/client.c-orig bin/named/client.c --- bin/named/client.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/named/client.c 2004-01-01 00:00:00.000000000 +0000 @@ -994,6 +994,11 @@ } if (result != ISC_R_SUCCESS) goto done; + /* + * Stop after the question if TC was set for rate limiting. + */ + if ((client->message->flags & DNS_MESSAGEFLAG_TC) != 0) + goto renderend; result = dns_message_rendersection(client->message, DNS_SECTION_ANSWER, DNS_MESSAGERENDER_PARTIAL | @@ -1134,6 +1139,49 @@ #endif /* + * Try to rate limit error responses. + */ + if (client->view != NULL && client->view->rrl != NULL) { + isc_boolean_t wouldlog; + char log_buf[DNS_RRL_LOG_BUF_LEN]; + dns_rrl_result_t rrl_result; + + INSIST(rcode != dns_rcode_noerror && + rcode != dns_rcode_nxdomain); + wouldlog = (ns_g_server->log_queries && + isc_log_wouldlog(ns_g_lctx, DNS_RRL_LOG_DROP)); + rrl_result = dns_rrl(client->view, &client->peeraddr, + TCP_CLIENT(client), + dns_rdataclass_in, dns_rdatatype_none, + NULL, rcode, client->now, + wouldlog, log_buf, sizeof(log_buf)); + if (rrl_result != DNS_RRL_RESULT_OK) { + /* + * Log dropped errors in the query category + * so that they are not lost in silence. + * Starts of rate-limited bursts are logged in + * NS_LOGCATEGORY_RRL. + */ + if (wouldlog) { + ns_client_log(client, NS_LOGCATEGORY_QUERIES, + NS_LOGMODULE_CLIENT, + DNS_RRL_LOG_DROP, + "%s", log_buf); + } + /* + * Some error responses cannot be 'slipped', + * so don't try. + * This will counted with dropped queries in the + * QryDropped counter. + */ + if (!client->view->rrl->log_only) { + ns_client_next(client, DNS_R_DROP); + return; + } + } + } + + /* * Message may be an in-progress reply that we had trouble * with, in which case QR will be set. We need to clear QR before * calling dns_message_reply() to avoid triggering an assertion. diff -r -u bin/named/config.c-orig bin/named/config.c --- bin/named/config.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/named/config.c 2004-01-01 00:00:00.000000000 +0000 @@ -227,6 +227,17 @@ notify no;\n\ allow-new-zones no;\n\ \n\ + # Prevent use of this zone in DNS amplified reflection DoS attacks\n\ + # Notice the size of the authors.bind response.\n\ + rate-limit {\n\ + responses-per-second 1;\n\ + window 10;\n\ + slip 0;\n\ + IPv4-prefix-length 16;\n\ + IPv6-prefix-length 32;\n\ + min-table-size 10;\n\ + };\n\ +\n\ zone \"version.bind\" chaos {\n\ type master;\n\ database \"_builtin version\";\n\ diff -r -u bin/named/include/named/query.h-orig bin/named/include/named/query.h --- bin/named/include/named/query.h-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/named/include/named/query.h 2004-01-01 00:00:00.000000000 +0000 @@ -85,6 +85,7 @@ #define NS_QUERYATTR_CACHEACLOK 0x2000 #define NS_QUERYATTR_DNS64 0x4000 #define NS_QUERYATTR_DNS64EXCLUDE 0x8000 +#define NS_QUERYATTR_RRL_CHECKED 0x10000 isc_result_t diff -r -u bin/named/include/named/server.h-orig bin/named/include/named/server.h --- bin/named/include/named/server.h-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/named/include/named/server.h 2004-01-01 00:00:00.000000000 +0000 @@ -165,7 +165,10 @@ dns_nsstatscounter_updatefail = 34, dns_nsstatscounter_updatebadprereq = 35, - dns_nsstatscounter_max = 36 + dns_nsstatscounter_ratedropped = 36, + dns_nsstatscounter_rateslipped = 37, + + dns_nsstatscounter_max = 38 }; void diff -r -u bin/named/query.c-orig bin/named/query.c --- bin/named/query.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/named/query.c 2004-01-01 00:00:00.000000000 +0000 @@ -5746,6 +5746,104 @@ resume: CTRACE("query_find: resume"); + /* + * Rate limit these responses to this client. + */ + if (client->view->rrl != NULL && + fname != NULL && dns_name_isabsolute(fname) && + (client->query.attributes & NS_QUERYATTR_RRL_CHECKED) == 0) { + dns_rdataset_t nc_rdataset; + dns_rcode_t rcode; + isc_boolean_t wouldlog; + char log_buf[DNS_RRL_LOG_BUF_LEN]; + isc_result_t nc_result; + dns_rrl_result_t rrl_result; + + client->query.attributes |= NS_QUERYATTR_RRL_CHECKED; + + wouldlog = isc_log_wouldlog(ns_g_lctx, DNS_RRL_LOG_DROP); + tname = fname; + if (result == DNS_R_NXDOMAIN) { + /* + * Use the database origin name to rate limit NXDOMAIN + */ + if (db != NULL) + tname = dns_db_origin(db); + rcode = dns_rcode_nxdomain; + } else if (result == DNS_R_NCACHENXDOMAIN && + rdataset != NULL && + dns_rdataset_isassociated(rdataset) && + (rdataset->attributes & + DNS_RDATASETATTR_NEGATIVE) != 0) { + /* + * Try to use owner name in the negative cache SOA. + */ + dns_fixedname_init(&fixed); + dns_rdataset_init(&nc_rdataset); + for (nc_result = dns_rdataset_first(rdataset); + nc_result == ISC_R_SUCCESS; + nc_result = dns_rdataset_next(rdataset)) { + dns_ncache_current(rdataset, + dns_fixedname_name(&fixed), + &nc_rdataset); + if (nc_rdataset.type == dns_rdatatype_soa) { + dns_rdataset_disassociate(&nc_rdataset); + tname = dns_fixedname_name(&fixed); + break; + } + dns_rdataset_disassociate(&nc_rdataset); + } + rcode = dns_rcode_nxdomain; + } else { + rcode = dns_rcode_noerror; + } + rrl_result = dns_rrl(client->view, &client->peeraddr, + ISC_TF((client->attributes + & NS_CLIENTATTR_TCP) != 0), + client->message->rdclass, qtype, tname, + rcode, client->now, + wouldlog, log_buf, sizeof(log_buf)); + if (rrl_result != DNS_RRL_RESULT_OK) { + /* + * Log dropped or slipped responses in the query + * category so that requests are not silently lost. + * Starts of rate-limited bursts are logged in + * DNS_LOGCATEGORY_RRL. + * + * Dropped responses are counted with dropped queries + * in QryDropped while slipped responses are counted + * with other truncated responses in RespTruncated. + */ + if (wouldlog && ns_g_server->log_queries) { + ns_client_log(client, NS_LOGCATEGORY_QUERIES, + NS_LOGMODULE_CLIENT, + DNS_RRL_LOG_DROP, + "%s", log_buf); + } + if (!client->view->rrl->log_only) { + if (rrl_result == DNS_RRL_RESULT_DROP) { + /* + * These will also be counted in + * dns_nsstatscounter_dropped + */ + inc_stats(client, + dns_nsstatscounter_ratedropped); + QUERY_ERROR(DNS_R_DROP); + } else { + /* + * These will also be counted in + * dns_nsstatscounter_truncatedresp + */ + inc_stats(client, + dns_nsstatscounter_rateslipped); + client->message->flags |= + DNS_MESSAGEFLAG_TC; + } + goto cleanup; + } + } + } + if (!ISC_LIST_EMPTY(client->view->rpz_zones) && (RECURSIONOK(client) || !client->view->rpz_recursive_only) && rpz_ck_dnssec(client, result, rdataset, sigrdataset) && @@ -7168,12 +7266,14 @@ } if (eresult != ISC_R_SUCCESS && - (!PARTIALANSWER(client) || WANTRECURSION(client))) { + (!PARTIALANSWER(client) || WANTRECURSION(client) + || eresult == DNS_R_DROP)) { if (eresult == DNS_R_DUPLICATE || eresult == DNS_R_DROP) { /* * This was a duplicate query that we are - * recursing on. Don't send a response now. - * The original query will still cause a response. + * recursing on or the result of rate limiting. + * Don't send a response now for a duplicate query, + * because the original will still cause a response. */ query_next(client, eresult); } else { diff -r -u bin/named/server.c-orig bin/named/server.c --- bin/named/server.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/named/server.c 2004-01-01 00:00:00.000000000 +0000 @@ -1561,6 +1561,201 @@ return (result); } +#define CHECK_RRL(obj, cond, pat, val) \ + do { \ + if (!(cond)) { \ + cfg_obj_log(obj, ns_g_lctx, ISC_LOG_ERROR, \ + pat, val); \ + result = ISC_R_RANGE; \ + goto cleanup; \ + } \ + } while (0) + +static isc_result_t +configure_rrl(dns_view_t *view, const cfg_obj_t *config, const cfg_obj_t *map) { + const cfg_obj_t *obj; + dns_rrl_t *rrl; + isc_result_t result; + int min_entries, i, j; + + /* + * Most DNS servers have few clients, but intentinally open + * recursive and authoritative servers often have many. + * So start with a small number of entries unless told otherwise + * to reduce cold-start costs. + */ + min_entries = 1000; + obj = NULL; + result = cfg_map_get(map, "min-table-size", &obj); + if (result == ISC_R_SUCCESS) { + min_entries = cfg_obj_asuint32(obj); + CHECK_RRL(obj, min_entries > 1, + "invalid '{min-table-size %d;}'", min_entries); + } + result = dns_rrl_init(&rrl, view, min_entries); + if (result != ISC_R_SUCCESS) + return (result); + + i = ISC_MAX(10000, min_entries); + obj = NULL; + result = cfg_map_get(map, "max-table-size", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i >= min_entries, + "invalid '{max-table-size %d;}'", i); + } + rrl->max_entries = i; + + obj = NULL; + result = cfg_map_get(map, "responses-per-second", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE, + "invalid '{responses-per-second %d;}'", i); + } + rrl->responses_per_second = i; + rrl->scaled_responses_per_second = rrl->responses_per_second; + + /* + * The default error rate is the response rate, + * and so off by default. + */ + i = rrl->responses_per_second; + obj = NULL; + result = cfg_map_get(map, "errors-per-second", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE, + "invalid '{errors-per-second %d;}'", i); + } + rrl->errors_per_second = i; + rrl->scaled_errors_per_second = rrl->errors_per_second; + /* + * The default NXDOMAIN rate is the response rate, + * and so off by default. + */ + i = rrl->responses_per_second; + obj = NULL; + result = cfg_map_get(map, "nxdomains-per-second", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE, + "invalid '{nxdomains-per-second %d;}'", i); + } + rrl->nxdomains_per_second = i; + rrl->scaled_nxdomains_per_second = rrl->nxdomains_per_second; + + /* + * The all-per-second rate is off by default. + */ + i = 0; + obj = NULL; + result = cfg_map_get(map, "all-per-second", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i <= DNS_RRL_MAX_RATE, + "invalid '{all-per-second %d;}'", i); + CHECK_RRL(obj, i == 0 || (i >= rrl->responses_per_second*4 && + i >= rrl->errors_per_second*4 && + i >= rrl->nxdomains_per_second*4), + "'{all-per-second %d;}' must be" + " at least 4 times responses-per-second," + "errors_per_second, and nxdomains_per_second", + i); + } + rrl->all_per_second = i; + rrl->scaled_all_per_second = rrl->all_per_second; + + i = 2; + obj = NULL; + result = cfg_map_get(map, "slip", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i <= DNS_RRL_MAX_SLIP, "invalid '{slip %d;}'", i); + } + rrl->slip = i; + rrl->scaled_slip = rrl->slip; + + i = 15; + obj = NULL; + result = cfg_map_get(map, "window", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i >= 1 && i <= DNS_RRL_MAX_WINDOW, + "invalid '{window %d;}'", i); + } + rrl->window = i; + + i = 0; + obj = NULL; + result = cfg_map_get(map, "qps-scale", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i >= 1, "invalid '{qps-scale %d;}'", i); + } + rrl->qps_scale = i; + rrl->qps = 1.0; + + i = 24; + obj = NULL; + result = cfg_map_get(map, "IPv4-prefix-length", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i >= 8 && i <= 32, + "invalid '{IPv4-prefix-length %d;}'", i); + } + rrl->ipv4_prefixlen = i; + if (i == 32) + rrl->ipv4_mask = 0xffffffff; + else + rrl->ipv4_mask = htonl(0xffffffff << (32-i)); + + i = 56; + obj = NULL; + result = cfg_map_get(map, "IPv6-prefix-length", &obj); + if (result == ISC_R_SUCCESS) { + i = cfg_obj_asuint32(obj); + CHECK_RRL(obj, i >= 16 && i <= 128, + "invalid '{IPv6-prefix-length %d;}'", i); + } + rrl->ipv6_prefixlen = i; + memset(rrl->ipv6_mask, 0xff, sizeof(rrl->ipv6_mask)); + for (j = 0; j < 4; ++j) { + if (i == 0) { + rrl->ipv6_mask[j] = 0; + } else if (i < 32) { + rrl->ipv6_mask[j] = htonl(0xffffffff << (32-i)); + i = 0; + } else { + rrl->ipv6_mask[j] = 0xffffffff; + i -= 32; + } + } + + obj = NULL; + result = cfg_map_get(map, "exempt-clients", &obj); + if (result == ISC_R_SUCCESS) { + result = cfg_acl_fromconfig(obj, config, ns_g_lctx, + ns_g_aclconfctx, ns_g_mctx, + 0, &rrl->exempt); + CHECK_RRL(obj, result == ISC_R_SUCCESS, + "invalid %s", "address_match_list"); + } + + obj = NULL; + result = cfg_map_get(map, "log-only", &obj); + if (result == ISC_R_SUCCESS && cfg_obj_asboolean(obj)) + rrl->log_only = ISC_TRUE; + else + rrl->log_only = ISC_FALSE; + + return (ISC_R_SUCCESS); + + cleanup: + dns_rrl_view_destroy(view); + return (result); +} + /* * Configure 'view' according to 'vconfig', taking defaults from 'config' * where values are missing in 'vconfig'. @@ -2925,6 +3120,14 @@ } } + obj = NULL; + result = ns_config_get(maps, "rate-limit", &obj); + if (result == ISC_R_SUCCESS) { + result = configure_rrl(view, config, obj); + if (result != ISC_R_SUCCESS) + goto cleanup; + } + result = ISC_R_SUCCESS; cleanup: diff -r -u bin/named/statschannel.c-orig bin/named/statschannel.c --- bin/named/statschannel.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/named/statschannel.c 2004-01-01 00:00:00.000000000 +0000 @@ -202,6 +202,10 @@ SET_NSSTATDESC(updatebadprereq, "updates rejected due to prerequisite failure", "UpdateBadPrereq"); + SET_NSSTATDESC(ratedropped, "responses dropped for rate limits", + "RateDropped"); + SET_NSSTATDESC(rateslipped, "responses truncated for rate limits", + "RateSlipped"); INSIST(i == dns_nsstatscounter_max); /* Initialize resolver statistics */ diff -r -u bin/tests/system/README-orig bin/tests/system/README --- bin/tests/system/README-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/README 2004-01-01 00:00:00.000000000 +0000 @@ -17,6 +17,7 @@ nsupdate/ Dynamic update and IXFR tests resolver/ Regression tests for resolver bugs that have been fixed (not a complete resolver test suite) + rrl/ query rate limiting rpz/ Tests of response policy zone (RPZ) rewriting stub/ Tests of stub zone functionality unknown/ Unknown type and class tests diff -r -u bin/tests/system/conf.sh.in-orig bin/tests/system/conf.sh.in --- bin/tests/system/conf.sh.in-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/conf.sh.in 2004-01-01 00:00:00.000000000 +0000 @@ -58,7 +58,7 @@ @CHECKDS@ checknames checkzone database dlv dlvauto dlz dlzexternal dname dns64 dnssec ecdsa forward glue gost ixfr inline limits logfileconfig lwresd masterfile masterformat metadata notify - nsupdate pending pkcs11 redirect resolver rndc rpz rrsetorder + nsupdate pending pkcs11 redirect resolver rndc rpz rrl rrsetorder rsabigexponent sortlist smartsign staticstub stub tkey tsig tsiggss unknown upforwd verify views xfer xferquota zonechecks" diff -r -u bin/tests/system/rrl/clean.sh-orig bin/tests/system/rrl/clean.sh --- bin/tests/system/rrl/clean.sh-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/clean.sh 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,22 @@ +# Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. + +# $Id$ + + +# Clean up after rrl tests. + +rm -f dig.out* +rm -f */named.memstats */named.run ns*/log* */named.rpz */session.key +rm -f ns3/bl*.db */*.jnl */*.core */*.pid diff -r -u bin/tests/system/rrl/ns1/named.conf-orig bin/tests/system/rrl/ns1/named.conf --- bin/tests/system/rrl/ns1/named.conf-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/ns1/named.conf 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id$ */ + +controls { /* empty */ }; + +options { + query-source address 10.53.0.1; + notify-source 10.53.0.1; + transfer-source 10.53.0.1; + port 5300; + session-keyfile "session.key"; + pid-file "named.pid"; + listen-on { 10.53.0.1; }; + listen-on-v6 { none; }; + notify no; +}; + +zone "." {type master; file "root.db";}; diff -r -u bin/tests/system/rrl/ns1/root.db-orig bin/tests/system/rrl/ns1/root.db --- bin/tests/system/rrl/ns1/root.db-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/ns1/root.db 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,32 @@ +; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") +; +; Permission to use, copy, modify, and/or distribute this software for any +; purpose with or without fee is hereby granted, provided that the above +; copyright notice and this permission notice appear in all copies. +; +; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +; PERFORMANCE OF THIS SOFTWARE. + +; $Id$ + +$TTL 120 +@ SOA ns. hostmaster.ns. ( 1 3600 1200 604800 60 ) +@ NS ns. +ns. A 10.53.0.1 +. A 10.53.0.1 + +; limit responses from here +tld2. NS ns.tld2. +ns.tld2. A 10.53.0.2 + +; limit recursion to here +tld3. NS ns.tld3. +ns.tld3. A 10.53.0.3 + +; generate SERVFAIL +tld4. NS ns.tld3. diff -r -u bin/tests/system/rrl/ns2/hints-orig bin/tests/system/rrl/ns2/hints --- bin/tests/system/rrl/ns2/hints-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/ns2/hints 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,19 @@ +; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") +; +; Permission to use, copy, modify, and/or distribute this software for any +; purpose with or without fee is hereby granted, provided that the above +; copyright notice and this permission notice appear in all copies. +; +; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +; PERFORMANCE OF THIS SOFTWARE. + +; $Id$ + + +. 0 NS ns1. +ns1. 0 A 10.53.0.1 diff -r -u bin/tests/system/rrl/ns2/named.conf-orig bin/tests/system/rrl/ns2/named.conf --- bin/tests/system/rrl/ns2/named.conf-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/ns2/named.conf 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id$ */ + +controls { /* empty */ }; + +options { + query-source address 10.53.0.2; + notify-source 10.53.0.2; + transfer-source 10.53.0.2; + port 5300; + session-keyfile "session.key"; + pid-file "named.pid"; + listen-on { 10.53.0.2; }; + listen-on-v6 { none; }; + notify no; + + rate-limit { + responses-per-second 2; + all-per-second 70; + IPv4-prefix-length 24; + IPv6-prefix-length 64; + slip 3; + /* qps-scale 2; */ + exempt-clients { 10.53.0.7; }; + window 1; + max-table-size 100; + min-table-size 2; + }; +}; + +/* + * These log settings have no effect unless "-g" is removed from ../../start.pl + */ +logging { + channel debug { + file "log-debug"; + print-category yes; print-severity yes; severity debug 10; + }; + channel queries { + file "log-queries"; + print-category yes; print-severity yes; severity info; + }; + category rate-limit { debug; queries; }; + category queries { debug; queries; }; +}; + +zone "." { type hint; file "hints"; }; + +zone "tld2."{ type master; file "tld2.db"; }; diff -r -u bin/tests/system/rrl/ns2/tld2.db-orig bin/tests/system/rrl/ns2/tld2.db --- bin/tests/system/rrl/ns2/tld2.db-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/ns2/tld2.db 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,43 @@ +; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") +; +; Permission to use, copy, modify, and/or distribute this software for any +; purpose with or without fee is hereby granted, provided that the above +; copyright notice and this permission notice appear in all copies. +; +; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +; PERFORMANCE OF THIS SOFTWARE. + +; $Id$ + + +; rate limit response from this zone + +$TTL 120 +@ SOA tld2. hostmaster.ns.tld2. ( 1 3600 1200 604800 60 ) + NS ns + NS . +ns A 10.53.0.2 + +a1 A 192.168.2.1 + +*.a2 A 192.168.2.2 + +; a3 is in tld3 + +; a4 does not exist to give NXDOMAIN + +; a5 for TCP requests +a5 A 192.168.2.5 + +; a6 for whitelisted clients +a6 A 192.168.2.6 + +; a7 for SERVFAIL + +; a8 for all-per-second limit +$GENERATE 101-180 all$.a8 A 192.168.2.8 diff -r -u bin/tests/system/rrl/ns3/hints-orig bin/tests/system/rrl/ns3/hints --- bin/tests/system/rrl/ns3/hints-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/ns3/hints 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,19 @@ +; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") +; +; Permission to use, copy, modify, and/or distribute this software for any +; purpose with or without fee is hereby granted, provided that the above +; copyright notice and this permission notice appear in all copies. +; +; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +; PERFORMANCE OF THIS SOFTWARE. + +; $Id$ + + +. 0 NS ns1. +ns1. 0 A 10.53.0.1 diff -r -u bin/tests/system/rrl/ns3/named.conf-orig bin/tests/system/rrl/ns3/named.conf --- bin/tests/system/rrl/ns3/named.conf-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/ns3/named.conf 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id$ */ + +controls { /* empty */ }; + +options { + query-source address 10.53.0.3; + notify-source 10.53.0.3; + transfer-source 10.53.0.3; + port 5300; + session-keyfile "session.key"; + pid-file "named.pid"; + listen-on { 10.53.0.3; }; + listen-on-v6 { none; }; + notify no; +}; + +zone "." { type hint; file "hints"; }; + +zone "tld3."{ type master; file "tld3.db"; }; diff -r -u bin/tests/system/rrl/ns3/tld3.db-orig bin/tests/system/rrl/ns3/tld3.db --- bin/tests/system/rrl/ns3/tld3.db-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/ns3/tld3.db 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,26 @@ +; Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") +; +; Permission to use, copy, modify, and/or distribute this software for any +; purpose with or without fee is hereby granted, provided that the above +; copyright notice and this permission notice appear in all copies. +; +; THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +; REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +; AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +; INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +; LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +; OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +; PERFORMANCE OF THIS SOFTWARE. + +; $Id$ + + +; rate limit response from this zone + +$TTL 120 +@ SOA tld3. hostmaster.ns.tld3. ( 1 3600 1200 604800 60 ) + NS ns + NS . +ns A 10.53.0.3 + +*.a3 A 192.168.3.3 diff -r -u bin/tests/system/rrl/setup.sh-orig bin/tests/system/rrl/setup.sh --- bin/tests/system/rrl/setup.sh-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/setup.sh 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,22 @@ +#!/bin/sh +# +# Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. + +# $Id$ + +SYSTEMTESTTOP=.. +. $SYSTEMTESTTOP/conf.sh +. ./clean.sh + diff -r -u bin/tests/system/rrl/tests.sh-orig bin/tests/system/rrl/tests.sh --- bin/tests/system/rrl/tests.sh-orig 2004-01-01 00:00:00.000000000 +0000 +++ bin/tests/system/rrl/tests.sh 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,210 @@ +# Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, +# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. + +# $Id$ + +# test response rate limiting + +SYSTEMTESTTOP=.. +. $SYSTEMTESTTOP/conf.sh + +#set -x + +ns1=10.53.0.1 # root, defining the others +ns2=10.53.0.2 # test server +ns3=10.53.0.3 # secondary test server +ns7=10.53.0.7 # whitelisted client + +USAGE="$0: [-x]" +while getopts "x" c; do + case $c in + x) set -x;; + *) echo "$USAGE" 1>&2; exit 1;; + esac +done +shift `expr $OPTIND - 1 || true` +if test "$#" -ne 0; then + echo "$USAGE" 1>&2 + exit 1 +fi +# really quit on control-C +trap 'exit 1' 1 2 15 + + +ret=0 +setret () { + ret=1 + echo "$*" +} + + +# Wait until soon after the start of a second to make results consistent. +# The start of a second credits a rate limit. +# This would be far easier in C or by assuming a modern version of perl. +sec_start () { + START=`date` + while true; do + NOW=`date` + if test "$START" != "$NOW"; then + return + fi + $PERL -e 'select(undef, undef, undef, 0.05)' || true + done +} + + +# $1=result name $2=domain name $3=dig options +digcmd () { + OFILE=$1; shift + DIG_DOM=$1; shift + ARGS="+noadd +noauth +nosearch +time=1 +tries=1 +ignore $* -p 5300 $DIG_DOM @$ns2" + #echo I:dig $ARGS 1>&2 + START=`date +%y%m%d%H%M.%S` + RESULT=`$DIG $ARGS 2>&1 | tee $OFILE=TEMP \ + | sed -n -e 's/^[^;].* \([^ ]\{1,\}\)$/\1/p' \ + -e 's/;; flags.* tc .*/TC/p' \ + -e 's/;; .* status: NXDOMAIN.*/NXDOMAIN/p' \ + -e 's/;; .* status: SERVFAIL.*/SERVFAIL/p' \ + -e 's/;; connection timed out.*/drop/p' \ + -e 's/;; communications error to.*/drop/p' \ + | tr -d '\n'` + mv "$OFILE=TEMP" "$OFILE=$RESULT" + touch -t $START "$OFILE=$RESULT" +} + + +# $1=number of tests $2=target domain $3=dig options +CNT=1 +burst () { + BURST_LIMIT=$1; shift + BURST_DOM_BASE="$1"; shift + while test "$BURST_LIMIT" -ge 1; do + if test $CNT -lt 10; then + CNT="0$CNT" + fi + if test $CNT -lt 100; then + CNT="0$CNT" + fi + eval BURST_DOM="$BURST_DOM_BASE" + FILE="dig.out-$BURST_DOM-$CNT" + rm -f $FILE=* + digcmd $FILE $BURST_DOM $* & + CNT=`expr $CNT + 1` + BURST_LIMIT=`expr "$BURST_LIMIT" - 1` + done +} + + +# $1=domain $2=IP address $3=# of IP addresses $4=TC $5=drop +# $6=NXDOMAIN $7=SERVFAIL or other errors +ck_result() { + BAD= + wait + ADDRS=`ls dig.out-$1-*=$2 2>/dev/null | wc -l | tr -d ' '` + TC=`ls dig.out-$1-*=TC 2>/dev/null | wc -l | tr -d ' '` + DROP=`ls dig.out-$1-*=drop 2>/dev/null | wc -l | tr -d ' '` + NXDOMAIN=`ls dig.out-$1-*=NXDOMAIN 2>/dev/null | wc -l | tr -d ' '` + SERVFAIL=`ls dig.out-$1-*=SERVFAIL 2>/dev/null | wc -l | tr -d ' '` + if test $ADDRS -ne "$3"; then + setret "I:$ADDRS instead of $3 $2 responses for $1" + BAD=yes + fi + if test $TC -ne "$4"; then + setret "I:$TC instead of $4 truncation responses for $1" + BAD=yes + fi + if test $DROP -ne "$5"; then + setret "I:$DROP instead of $5 dropped responses for $1" + BAD=yes + fi + if test $NXDOMAIN -ne "$6"; then + setret "I:$NXDOMAIN instead of $6 NXDOMAIN responses for $1" + BAD=yes + fi + if test $SERVFAIL -ne "$7"; then + setret "I:$SERVFAIL instead of $7 error responses for $1" + BAD=yes + fi + if test -z "$BAD"; then + rm -f dig.out-$1-* + fi +} + + +######### +sec_start + +# basic rate limiting +burst 3 a1.tld2 +# 1 second delay allows an additional response. +sleep 1 +burst 21 a1.tld2 +# request 30 different qnames to try a wild card +burst 30 'x$CNT.a2.tld2' + +# IP TC drop NXDOMAIN SERVFAIL +# check for 24 results +# including the 1 second delay +ck_result a1.tld2 192.168.2.1 3 7 14 0 0 + +# Check the wild card answers. +# The parent name of the 30 requests is counted. +ck_result 'x*.a2.tld2' 192.168.2.2 2 9 19 0 0 + + +######### +sec_start + +burst 1 'y$CNT.a3.tld3'; wait; burst 20 'y$CNT.a3.tld3' +burst 20 'z$CNT.a4.tld2' + +# Recursion. +# The first answer is counted separately because it is counted against +# the rate limit on recursing to the server for a3.tld3. The remaining 20 +# are counted as local responses from the cache. +ck_result 'y*.a3.tld3' 192.168.3.3 3 6 12 0 0 + +# NXDOMAIN responses are also limited based on the parent name. +ck_result 'z*.a4.tld2' x 0 6 12 2 0 + + +######### +sec_start + +burst 20 a5.tld2 +tcp +burst 20 a6.tld2 -b $ns7 +burst 20 a7.tld4 + +# TCP responses are not rate limited +ck_result a5.tld2 192.168.2.5 20 0 0 0 0 + +# whitelisted client is not rate limited +ck_result a6.tld2 192.168.2.6 20 0 0 0 0 + +# Errors such as SERVFAIL are rate limited. The numbers are confusing, because +# other rate limiting can be triggered before SERVFAIL is reached. +ck_result a7.tld4 192.168.2.1 0 5 13 0 2 + + +######### +sec_start + +# all-per-second +CNT=101 +burst 80 'all$CNT.a8.tld2' +ck_result 'a*.a8.tld2' 192.168.2.8 70 3 7 0 0 + + +echo "I:exit status: $ret" +exit $ret diff -r -u doc/arm/Bv9ARM-book.xml-orig doc/arm/Bv9ARM-book.xml --- doc/arm/Bv9ARM-book.xml-orig 2004-01-01 00:00:00.000000000 +0000 +++ doc/arm/Bv9ARM-book.xml 2004-01-01 00:00:00.000000000 +0000 @@ -4803,6 +4803,24 @@ + + + rate-limit + + + + The start, periodic, and final notices of rate limiting + of a stream of responses are logged at + info severity in this category. + Various internal performance data such as expansions + of the table is logged debug 1 level and higher. + Rate limiting of individual requests + is logged in the queries category + and can be controlled with the + querylog option. + + + @@ -5334,6 +5352,21 @@ resolver-query-timeout number ; deny-answer-addresses { address_match_list } except-from { namelist } ; deny-answer-aliases { namelist } except-from { namelist } ; + rate-limit { + responses-per-second number ; + errors-per-second number ; + nxdomains-per-second number ; + all-per-second number ; + window number ; + log-only yes_or_no ; + qps-scale number ; + IPv4-prefix-length number ; + IPv6-prefix-length number ; + slip number ; + exempt-clients { address_match_list } ; + max-table-size number ; + min-table-size number ; + } ; response-policy { zone_name policy given | disabled | passthru | nxdomain | nodata | cname domain recursive-only yes_or_no max-policy-ttl number ; @@ -9737,6 +9770,204 @@ 48.zz.2.2001.rpz-nsip CNAME . + + + Rate Limiting + + Excessive essentially identical UDP responses + can be discarded by configuring a + rate-limit clause in an + options statement. + This mechanism keeps BIND 9 from being used + in amplifying reflection denial of service attacks + as well as partially protecting BIND 9 itself from + some denial of service attacks. + Very short truncated responses can be sent to provide + rate-limited responses to legitimate + clients within a range of attacked and forged IP addresses, + Legitimate clients react to truncated response by retrying + with TCP. + + + + Rate limiting works by setting + responses-per-second + to a number of repetitions per second for responses for a given name + and record type to a DNS client. + + + + Responses-per-second is a limit on + identical responses instead of a limit on all responses or + even all responses to a single client. + 10 identical responses per second is a generous limit except perhaps + when many clients are using a single IP address via network + address translation (NAT). + The default limit of zero specifies an unbounded limit to turn off + rate-limiting in a view or to only rate-limit NXDOMAIN or other + errors. + + + + The notion of "identical responses" + and "single DNS client" cannot be simplistic. + All responses to a CIDR block with prefix + length specified with IPv4-prefix-length + (default 24) or IPv6-prefix-length + (default 56) are assumed to come from a single DNS client. + Requests for a name that result in DNS NXDOMAIN + errors are considered identical. + This controls some attacks using random names, but + accommodates servers that expect many legitimate NXDOMAIN responses + such as anti-spam blacklists. + By default the limit on NXDOMAIN errors is the same as the + responses-per-second value, + but it can be set separately with + nxdomains-per-second. + All requests for all names or types that result in DNS errors + such as SERVFAIL and FORMERR (but not NXDOMAIN) are considered + identical. + This controls attacks using invalid requests or distant, + broken authoritative servers. + By default the limit on errors is the same as the + responses-per-second value, + but it can be set separately with + errors-per-second. + + + + Rate limiting uses a "credit" or "token bucket" scheme. + Each identical response has a conceptual account + that is given responses-per-second, + errors-per-second, and + nxdomains-per-second credits every second. + A DNS request triggering some desired response debits + the account by one. + Responses are not sent while the account is negative. + The account cannot become more positive than + the per-second limit + or more negative than window + times the per-second limit. + A DNS client that sends requests that are not + answered can penalized for up to window seconds + (default 15). + + + + Responses generated from local wildcards are counted and limited + as if they were for the parent domain name. + This prevents flooding by requesting random.wild.example.com. + For similar reasons, NXDOMAIN responses are counted and rate + limited their owner name, the nearest valid domain name to the + query name with an SOA record. + + + + Many attacks using DNS involve UDP requests with forged source + addresses. + Rate limiting prevents the use of BIND 9 to flood a network + with responses to requests with forged source addresses, + but could let a third party block responses to legitimate requests. + There is a mechanism that can answer some legitimate + requests from a client whose address is being forged in a flood. + Setting slip to 2 (its default) causes every + other UDP request to be answered with a small response + claiming that the response would have been truncated. + The small size and relative infrequency of the response make + it unattractive for abuse of third parties. + slip must be between 0 and 10. + 0 "slips" or sends no rate limiting truncated responses. + Some error responses cannot be replaced with responses + with the TC flag, and so are instead + leaked at the slip rate. + + + + When the approximate query per second rate exceeds + the qps-scale value, + the responses-per-second, + errors-per-second, + nxdomains-per-second, + and slip values are reduced by the + ratio of the current rate to the qps-scale value. + This feature can tighten defenses during attacks. + For example, with + qps-scale 250; responses-per-second 20; and + a total query rate of 1000 queries/second for all queries from + all DNS clients including via TCP, + then the effective responses/second limit changes to + (250/1000)*20 or 5. + The limits for IP addresses using TCP are not reduced. + Responses sent via TCP are not subject to rate limits + but are counted to approximate the query per second rate. + + + + Communities of DNS clients can be given their own parameters or no + rate limiting by putting + rate-limit statements in view + statements instead of the global option + statement. + A rate-limit statement in a view replaces + instead of being merged with a rate-limit + statement among the main options. + DNS clients within a view can be exempted from rate limits + with the exempt-clients clause. + + + + UDP responses of all kinds can be limited with the + all-per-second phrase. + This rate limiting is similar to the rate limiting offered by + firewalls. When performed in a DNS server it is inferior to + the other rate-limit forms, because it ignores + the contents of responses to a block of IP addresses. + The rate limiting provided by + responses-per-second, + errors-per-second, and + nxdomains-per-second on a DNS server + is often invisible to the victim of a DNS reflection attack. + Unless the forged requests of the attack are the same as the + legitimate requests of the victim, the victim's requests are + not affected. + A all-per-second limit must be + at least 4 times as large as the other limits, + because single DNS clients often send bursts of legitimate + requests. + For example, the receipt of a single mail message can prompt + requests from an SMTP server for NS, PTR, A, and AAAA records + as the incoming SMTP/TCP/IP connection is considered. + The SMTP server can need additional NS, A, AAAA, MX, TXT, and SPF + records as it considers the STMP Mail From + command. + + + + The maximum size of the table used to track requests and so + rate limit responses is set with max-table-size. + Each entry in the table is between 40 and 80 bytes. + The default of 10,000 is suitable for a server receiving + 5000 DNS requests/second. + 10,000 entries require about 1 megabyte. + To reduce cold start costs including those in growing the + table, min-table-size (default 1000) + can set the minimum table size. + Enable logging to monitor expansions of the table and inform + non-default choices for the initial and maximum table size. + + + + Use log-only yes to test rate limiting parameters + without actually dropping any requests. + + + + Responses dropped by rate limits are included in the + RateDropped and QryDropped + statistics. + Responses that truncated by rate limits are included in + RateSlipped and RespTruncated. + @@ -14385,6 +14616,32 @@ + + + RateDropped + + + + + + + Responses dropped by rate limits. + + + + + + RateSlipped + + + + + + + Responses truncated by rate limits. + + + diff -r -u lib/dns/Makefile.in-orig lib/dns/Makefile.in --- lib/dns/Makefile.in-orig 2004-01-01 00:00:00.000000000 +0000 +++ lib/dns/Makefile.in 2004-01-01 00:00:00.000000000 +0000 @@ -66,8 +66,8 @@ portlist.@O@ private.@O@ \ rbt.@O@ rbtdb.@O@ rbtdb64.@O@ rcode.@O@ rdata.@O@ \ rdatalist.@O@ rdataset.@O@ rdatasetiter.@O@ rdataslab.@O@ \ - request.@O@ resolver.@O@ result.@O@ rootns.@O@ rpz.@O@ \ - rriterator.@O@ sdb.@O@ \ + request.@O@ resolver.@O@ result.@O@ rootns.@O@ \ + rpz.@O@ rrl.@O@ rriterator.@O@ sdb.@O@ \ sdlz.@O@ soa.@O@ ssu.@O@ ssu_external.@O@ \ stats.@O@ tcpmsg.@O@ time.@O@ timer.@O@ tkey.@O@ \ tsec.@O@ tsig.@O@ ttl.@O@ update.@O@ validator.@O@ \ @@ -93,7 +93,7 @@ name.c ncache.c nsec.c nsec3.c order.c peer.c portlist.c \ rbt.c rbtdb.c rbtdb64.c rcode.c rdata.c rdatalist.c \ rdataset.c rdatasetiter.c rdataslab.c request.c \ - resolver.c result.c rootns.c rpz.c rriterator.c \ + resolver.c result.c rootns.c rpz.c rrl.c rriterator.c \ sdb.c sdlz.c soa.c ssu.c ssu_external.c \ stats.c tcpmsg.c time.c timer.c tkey.c \ tsec.c tsig.c ttl.c update.c validator.c \ diff -r -u lib/dns/include/dns/log.h-orig lib/dns/include/dns/log.h --- lib/dns/include/dns/log.h-orig 2004-01-01 00:00:00.000000000 +0000 +++ lib/dns/include/dns/log.h 2004-01-01 00:00:00.000000000 +0000 @@ -43,6 +43,7 @@ #define DNS_LOGCATEGORY_DELEGATION_ONLY (&dns_categories[10]) #define DNS_LOGCATEGORY_EDNS_DISABLED (&dns_categories[11]) #define DNS_LOGCATEGORY_RPZ (&dns_categories[12]) +#define DNS_LOGCATEGORY_RRL (&dns_categories[13]) /* Backwards compatibility. */ #define DNS_LOGCATEGORY_GENERAL ISC_LOGCATEGORY_GENERAL diff -r -u lib/dns/include/dns/rrl.h-orig lib/dns/include/dns/rrl.h --- lib/dns/include/dns/rrl.h-orig 2004-01-01 00:00:00.000000000 +0000 +++ lib/dns/include/dns/rrl.h 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id$ */ + +#ifndef DNS_RRL_H +#define DNS_RRL_H 1 + +/* + * Rate limit DNS responses. + */ + +#include + +#include +#include +#include + +ISC_LANG_BEGINDECLS + + +/* + * Memory allocation or other failures. + */ +#define DNS_RRL_LOG_FAIL ISC_LOG_WARNING +/* + * dropped or slipped responses. + */ +#define DNS_RRL_LOG_DROP ISC_LOG_INFO +/* + * Major events in dropping or slipping. + */ +#define DNS_RRL_LOG_DEBUG1 ISC_LOG_DEBUG(3) +/* + * Limit computations. + */ +#define DNS_RRL_LOG_DEBUG2 ISC_LOG_DEBUG(4) +/* + * Less interesting. + */ +#define DNS_RRL_LOG_DEBUG3 ISC_LOG_DEBUG(9) + + +#define DNS_RRL_LOG_ERR_LEN 64 +#define DNS_RRL_LOG_BUF_LEN (sizeof("would continue limiting") + \ + DNS_RRL_LOG_ERR_LEN + \ + sizeof(" responses to ") + \ + ISC_NETADDR_FORMATSIZE + \ + sizeof("/128 for IN ") + \ + DNS_RDATATYPE_FORMATSIZE + \ + DNS_NAME_FORMATSIZE) + + +typedef struct dns_rrl_hash dns_rrl_hash_t; + +/* + * Response types. + */ +typedef enum { + DNS_RRL_RTYPE_FREE, + DNS_RRL_RTYPE_QUERY, + DNS_RRL_RTYPE_NXDOMAIN, + DNS_RRL_RTYPE_ERROR, + DNS_RRL_RTYPE_ALL, + DNS_RRL_RTYPE_TCP, +} dns_rrl_rtype_t; + +/* + * A rate limit bucket key. + * This should be small to limit the total size of the database. + */ +typedef struct dns_rrl_key dns_rrl_key_t; +struct dns_rrl_key { + isc_uint32_t ip[4]; + isc_uint32_t qname_hash; + dns_rdatatype_t qtype; + dns_rrl_rtype_t rtype :3; + isc_boolean_t qclass :3; + isc_boolean_t ipv6 :1; +}; + +/* + * A rate-limit entry. + * This should be small to limit the total size of the database. + * With gcc on ARM, the key should have __attribute((__packed__)) to + * avoid padding to a multiple of 8 bytes. + */ +typedef struct dns_rrl_entry dns_rrl_entry_t; +typedef ISC_LIST(dns_rrl_entry_t) dns_rrl_bin_t; +struct dns_rrl_entry { + ISC_LINK(dns_rrl_entry_t) lru; + ISC_LINK(dns_rrl_entry_t) hlink; + dns_rrl_bin_t *bin; + isc_stdtime_t last_used; + isc_int32_t responses; +# define DNS_RRL_MAX_WINDOW 600 +# define DNS_RRL_MAX_RATE (ISC_INT32_MAX / DNS_RRL_MAX_WINDOW) + dns_rrl_key_t key; + unsigned int slip_cnt :4; +# define DNS_RRL_MAX_SLIP 10 + unsigned int log_secs :10; +# define DNS_RRL_MAX_LOG_SECS 600 +# define DNS_RRL_STOP_LOG_SECS 60 + isc_boolean_t logged :1; + unsigned int log_qname :8; +# define DNS_RRL_NUM_QNAMES 256 +}; + +/* + * A hash table of rate-limit entries. + */ +struct dns_rrl_hash { + isc_stdtime_t check_time; + int length; + dns_rrl_bin_t bins[1]; +}; + +/* + * A block of rate-limit entries. + */ +typedef struct dns_rrl_block dns_rrl_block_t; +struct dns_rrl_block { + ISC_LINK(dns_rrl_block_t) link; + int size; + dns_rrl_entry_t entries[1]; +}; + +/* + * A rate limited qname buffers. + */ +typedef struct dns_rrl_qname_buf dns_rrl_qname_buf_t; +struct dns_rrl_qname_buf { + ISC_LINK(dns_rrl_qname_buf_t) link; + const dns_rrl_entry_t *e; + unsigned int index; + dns_fixedname_t qname; +}; + +/* + * Per-view query rate limit parameters and a pointer to database. + */ +typedef struct dns_rrl dns_rrl_t; +struct dns_rrl { + isc_mutex_t lock; + isc_mem_t *mctx; + + isc_boolean_t log_only; + int responses_per_second; + int errors_per_second; + int nxdomains_per_second; + int all_per_second; + int window; + int slip; + double qps_scale; + int max_entries; + + dns_acl_t *exempt; + + int num_entries; + + int qps_responses; + isc_stdtime_t qps_time; + double qps; + int scaled_responses_per_second; + int scaled_errors_per_second; + int scaled_nxdomains_per_second; + int scaled_all_per_second; + int scaled_slip; + + isc_stdtime_t prune_time; + + unsigned int probes; + unsigned int searches; + + ISC_LIST(dns_rrl_block_t) blocks; + ISC_LIST(dns_rrl_entry_t) lru; + + dns_rrl_hash_t *hash; + dns_rrl_hash_t *old_hash; + + int ipv4_prefixlen; + isc_uint32_t ipv4_mask; + int ipv6_prefixlen; + isc_uint32_t ipv6_mask[4]; + + dns_rrl_entry_t *log_ended; + ISC_LIST(dns_rrl_qname_buf_t) qname_free; + int num_qnames; + dns_rrl_qname_buf_t *qnames[DNS_RRL_NUM_QNAMES]; +}; + +typedef enum { + DNS_RRL_RESULT_OK, + DNS_RRL_RESULT_DROP, + DNS_RRL_RESULT_SLIP, +} dns_rrl_result_t; + +dns_rrl_result_t +dns_rrl(dns_view_t *view, + const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp, + dns_rdataclass_t rdclass, dns_rdatatype_t qtype, + dns_name_t *qname, dns_rcode_t rcode, isc_stdtime_t now, + isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len); + +void +dns_rrl_view_destroy(dns_view_t *view); + +isc_result_t +dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries); + +ISC_LANG_ENDDECLS + +#endif /* DNS_RRL_H */ diff -r -u lib/dns/include/dns/view.h-orig lib/dns/include/dns/view.h --- lib/dns/include/dns/view.h-orig 2004-01-01 00:00:00.000000000 +0000 +++ lib/dns/include/dns/view.h 2004-01-01 00:00:00.000000000 +0000 @@ -73,6 +73,7 @@ #include #include +#include #include #include #include @@ -142,6 +143,7 @@ dns_rbt_t * answeracl_exclude; dns_rbt_t * denyanswernames; dns_rbt_t * answernames_exclude; + dns_rrl_t * rrl; isc_boolean_t provideixfr; isc_boolean_t requestnsid; dns_ttl_t maxcachettl; diff -r -u lib/dns/log.c-orig lib/dns/log.c --- lib/dns/log.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ lib/dns/log.c 2004-01-01 00:00:00.000000000 +0000 @@ -45,6 +45,7 @@ { "delegation-only", 0 }, { "edns-disabled", 0 }, { "rpz", 0 }, + { "rate-limit", 0 }, { NULL, 0 } }; diff -r -u lib/dns/rrl.c-orig lib/dns/rrl.c --- lib/dns/rrl.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ lib/dns/rrl.c 2004-01-01 00:00:00.000000000 +0000 @@ -0,0 +1,1242 @@ +/* + * Copyright (C) 2012 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id$ */ + +/*! \file */ + +/* + * Rate limit DNS responses. + */ + +/* #define ISC_LIST_CHECKINIT */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +static void +log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, + char *log_buf, unsigned int log_buf_len); + + +/* + * Get a modulus for a hash function that is tolerably likely to be + * relatively prime to most inputs. Of course, we get a prime for for initial + * values not larger than the square of the last prime. We often get a prime + * after that. + * This works well in practice for hash tables up to at least 100 + * times the square of the last prime and better than a multiplicative hash. + */ +static int +hash_divisor(unsigned int initial) { + static isc_uint16_t primes[] = { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, + 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, +#if 0 + 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, + 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, + 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, + 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, + 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, + 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, + 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, + 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, + 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, + 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, + 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, + 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997,1009, +#endif + }; + int divisions, tries; + unsigned int result; + isc_uint16_t *pp, p; + + result = initial; + + if (primes[sizeof(primes)/sizeof(primes[0])-1] >= result) { + pp = primes; + while (*pp < result) + ++pp; + return (*pp); + } + + if ((result & 1) == 0) + ++result; + + divisions = 0; + tries = 1; + pp = primes; + do { + p = *pp++; + ++divisions; + if ((result % p) == 0) { + ++tries; + result += 2; + pp = primes; + } + } while (pp < &primes[sizeof(primes)/sizeof(primes[0])]); + + if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3, + "%d hash_divisor() divisions in %d tries" + " to get %d from %d", + divisions, tries, result, initial); + + return (result); +} + +/* + * Convert a timestamp to a number of seconds in the past. + */ +static inline int +delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) { + int delta; + + delta = now - ts; + if (delta >= 0) + return (delta); + + /* + * The timestamp is in the future. That future might result from + * re-ordered requests, because we use timestamps on requests + * instead of consulting a clock. Timestamps in the distant future are + * assumed to result from clock changes. When the clock changes to + * the past, make existing timestamps appear to be in the past. + */ + if (delta < -5) + return (now); + return (0); +} + +static isc_result_t +add_rrl_entries(dns_rrl_t *rrl, int new) { + unsigned int bsize; + dns_rrl_block_t *b; + dns_rrl_entry_t *e; + double rate; + int i; + + if (rrl->num_entries+new >= rrl->max_entries && rrl->max_entries != 0) { + if (rrl->num_entries >= rrl->max_entries) + return (ISC_R_SUCCESS); + new = rrl->max_entries - rrl->num_entries; + if (new <= 0) + return (ISC_R_NOMEMORY); + } + + /* + * Try to log expansions so that the user can tune max-table-size + * and min-table-size. + */ + if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && + rrl->hash != NULL ) { + rate = rrl->probes; + if (rrl->searches != 0) + rate /= rrl->searches; + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, + "increase from %d to %d RRL entries with" + " %d bins; average search length %.1f", + rrl->num_entries, rrl->num_entries+new, + rrl->hash->length, rate); + } + + bsize = sizeof(dns_rrl_block_t) + (new-1)*sizeof(dns_rrl_entry_t); + b = isc_mem_get(rrl->mctx, bsize); + if (b == NULL) { + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL, + "isc_mem_get(%d) failed for RRL entries", + bsize); + return (ISC_R_NOMEMORY); + } + memset(b, 0, bsize); + b->size = bsize; + + e = b->entries; + rrl->log_ended = e; + for (i = 0; i < new; ++i, ++e) { + ISC_LINK_INIT(e, hlink); + ISC_LIST_INITANDAPPEND(rrl->lru, e, lru); + } + rrl->num_entries += new; + ISC_LIST_INITANDAPPEND(rrl->blocks, b, link); + + return (ISC_R_SUCCESS); +} + +static inline dns_rrl_bin_t * +get_rrl_bin(dns_rrl_hash_t *hash, unsigned int hval) { + return (&hash->bins[hval % hash->length]); +} + +static void +free_old_hash(dns_rrl_t *rrl) { + dns_rrl_hash_t *old_hash; + dns_rrl_bin_t *old_bin; + dns_rrl_entry_t *e; + + old_hash = rrl->old_hash; + for (old_bin = &old_hash->bins[0]; + old_bin < &old_hash->bins[old_hash->length]; + ++old_bin) { + while ((e = ISC_LIST_HEAD(*old_bin)) != NULL) { + ISC_LIST_UNLINK(*e->bin, e, hlink); + e->bin = NULL; + } + } + + isc_mem_put(rrl->mctx, old_hash, + sizeof(*old_hash) + + (old_hash->length-1)*sizeof(old_hash->bins[0])); + rrl->old_hash = NULL; +} + +static isc_result_t +expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) { + dns_rrl_hash_t *hash; + int old_bins, new_bins, hsize; + double rate; + + if (rrl->old_hash != NULL) + free_old_hash(rrl); + + /* + * Most searches fail and so go to the end of the chain. + * Use a small hash table load factor. + */ + old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length; + new_bins = old_bins/8 + old_bins; + if (new_bins < rrl->num_entries) + new_bins = rrl->num_entries; + new_bins = hash_divisor(new_bins); + + hsize = sizeof(dns_rrl_hash_t) + (new_bins-1)*sizeof(hash->bins[0]); + hash = isc_mem_get(rrl->mctx, hsize); + if (hash == NULL) { + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_FAIL, + "isc_mem_get(%d) failed for" + " RRL hash table", + hsize); + return (ISC_R_NOMEMORY); + } + memset(hash, 0, hsize); + hash->length = new_bins; + + if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1) && old_bins != 0) { + rate = rrl->probes; + if (rrl->searches != 0) + rate /= rrl->searches; + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, + DNS_RRL_LOG_DEBUG1, + "increase from %d to %d RRL bins for" + " %d entries; average search length %.1f", + old_bins, new_bins, rrl->num_entries, rate); + } + + rrl->old_hash = rrl->hash; + if (rrl->old_hash != NULL) + rrl->old_hash->check_time = now; + rrl->hash = hash; + + return (ISC_R_SUCCESS); +} + +static void +rrl_entry_ref(dns_rrl_t *rrl, dns_rrl_entry_t *e, dns_rrl_bin_t *new_bin, + int probes, isc_stdtime_t now) +{ + /* + * Make the entry most recently used. + */ + if (ISC_LIST_HEAD(rrl->lru) != e) { + if (e == rrl->log_ended) { + if (e->lru.next != NULL) + rrl->log_ended = e->lru.next; + else + rrl->log_ended = e->lru.prev; + } + ISC_LIST_UNLINK(rrl->lru, e, lru); + ISC_LIST_PREPEND(rrl->lru, e, lru); + } + + /* + * Move the entry to the head of its hash chain. + */ + if (ISC_LIST_HEAD(*new_bin) != e) { + if (e->bin != NULL) + ISC_LIST_UNLINK(*e->bin, e, hlink); + ISC_LIST_PREPEND(*new_bin, e, hlink); + e->bin = new_bin; + } + + /* + * Expand the hash table if it is time and necessary. + * This will leave the newly referenced entry in a chain in the + * old hash table. It will migrate to the new hash table the next + * time it is used or be cut loose when the old hash table is destroyed. + */ + rrl->probes += probes; + ++rrl->searches; + if (rrl->searches > 100 && + delta_rrl_time(rrl->hash->check_time, now) >= 10) { + if (rrl->probes/rrl->searches > 2) + expand_rrl_hash(rrl, now); + rrl->hash->check_time = now; + rrl->probes = 0; + rrl->searches = 0; + } +} + +static inline isc_boolean_t +rrl_key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) { + return (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0 ? ISC_TRUE : ISC_FALSE); +} + +/* + * Construct the database key. + * Use a hash of the DNS query name to save space in the database. + * Collisions result in legitimate rate limiting responses for one + * query name also limiting responses for other names to the + * same client. This is rare and benign enough given the large + * space costs compared to keeping the entire name in the database + * entry or the time costs of dynamic allocation. + */ +static isc_uint32_t +make_key(dns_rrl_t *rrl, dns_rrl_key_t *key, + const isc_sockaddr_t *client_addr, + dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass, + dns_rrl_rtype_t rtype) +{ + isc_uint32_t hval; + int i; + + memset(key, 0, sizeof(*key)); + + key->rtype = rtype; + hval = rtype; + if (rtype == DNS_RRL_RTYPE_QUERY || + rtype == DNS_RRL_RTYPE_NXDOMAIN) { + /* + * Map dns_rdataclass_reserved0 = 0 -> 2 + * dns_rdataclass_in = 1 -> 3 + * dns_rdataclass_chaos = 3 -> 5 + * dns_rdataclass_hs = 4 -> 6 + * dns_rdataclass_none = 254 -> 0 + * dns_rdataclass_any = 255 -> 1 + * and trust that there will never be significant changes. + */ + key->qclass = qclass+2; + key->qtype = qtype; + hval += qtype<<8; + } + + if (qname != NULL && qname->labels != 0) { + /* + * Ignore the first label of wildcards. + */ + if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 && + (i = dns_name_countlabels(qname)) > 1) { + dns_fixedname_t suffixf; + dns_name_t *suffix; + + dns_fixedname_init(&suffixf); + suffix = dns_fixedname_name(&suffixf); + dns_name_split(qname, i-1, NULL, suffix); + key->qname_hash = dns_name_hashbylabel(suffix, + ISC_FALSE); + } else { + key->qname_hash = dns_name_hashbylabel(qname, + ISC_FALSE); + } + hval += key->qname_hash; + } + + switch (client_addr->type.sa.sa_family) { + case AF_INET: + key->ip[3] = (client_addr->type.sin.sin_addr.s_addr & + rrl->ipv4_mask); + hval = (hval>>31) + (hval<<1) + key->ip[3]; + break; + case AF_INET6: + key->ipv6 = ISC_TRUE; + memcpy(key->ip, &client_addr->type.sin6.sin6_addr, + sizeof(key->ip)); + for (i = 0; i < 4; ++i) { + key->ip[i] &= rrl->ipv6_mask[i]; + hval = (hval>>31) + (hval<<1) + key->ip[i]; + } + break; + } + + return (hval); +} + +static inline int +response_balance(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) { + int balance; + + balance = e->responses; + if (balance < 0) + switch (e->key.rtype) { + case DNS_RRL_RTYPE_QUERY: + balance += age * rrl->responses_per_second; + break; + case DNS_RRL_RTYPE_NXDOMAIN: + balance += age * rrl->nxdomains_per_second; + break; + case DNS_RRL_RTYPE_ERROR: + balance += age * rrl->errors_per_second; + break; + case DNS_RRL_RTYPE_ALL: + balance += age * rrl->all_per_second; + break; + case DNS_RRL_RTYPE_TCP: + balance += age; + break; + default: + INSIST(0); + } + return (balance); +} + +/* + * Search for an entry for a response and optionally create it. + */ +static dns_rrl_entry_t * +get_rrl_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr, + dns_rdatatype_t qtype, dns_name_t *qname, dns_rdataclass_t qclass, + dns_rrl_rtype_t rtype, isc_stdtime_t now, isc_boolean_t create, + char *log_buf, unsigned int log_buf_len) +{ + dns_rrl_key_t key; + isc_uint32_t hval; + dns_rrl_hash_t *hash, *old_hash; + dns_rrl_entry_t *e; + dns_rrl_bin_t *new_bin, *old_bin; + int probes, age; + + hval = make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype); + + /* + * Look for the entry in the current hash table. + */ + hash = rrl->hash; + new_bin = get_rrl_bin(hash, hval); + for (e = ISC_LIST_HEAD(*new_bin), probes = 1; + e != NULL; + e = ISC_LIST_NEXT(e, hlink), ++probes) { + if (rrl_key_cmp(&e->key, &key)) { + rrl_entry_ref(rrl, e, new_bin, probes, now); + return (e); + } + } + + /* + * Look in the old hash table if we did not find the entry. + */ + old_hash = rrl->old_hash; + if (old_hash != NULL) { + old_bin = get_rrl_bin(old_hash, hval); + for (e = ISC_LIST_HEAD(*old_bin); + e != NULL; + e = ISC_LIST_NEXT(e, hlink)) { + if (rrl_key_cmp(&e->key, &key)) { + rrl_entry_ref(rrl, e, new_bin, probes, now); + return (e); + } + } + + /* + * Discard prevous hash table when its entries are all old. + */ + if (delta_rrl_time(old_hash->check_time, now) > rrl->window) + free_old_hash(rrl); + } + + if (!create) + return (NULL); + + /* + * The block does not already exist, so create it. + * Unroll the first circuit of the loop to cover most cases. + * Immediately a new create entry if the oldest is fresh. + * Preserve penalized entries. + * Try to make more entries if none are idle. + * Steal the oldest entry if we cannot make more. + */ + e = ISC_LIST_TAIL(rrl->lru); + age = delta_rrl_time(e->last_used, now); + if (age <= rrl->window) { + for (;;) { + if (age <= 1) { + add_rrl_entries(rrl, + ISC_MIN((rrl->num_entries+1)/2, + 1000)); + e = ISC_LIST_TAIL(rrl->lru); + break; + } + if (response_balance(rrl, e, age) >= 0) + break; + + e = e->lru.prev; + if (e == NULL) { + add_rrl_entries(rrl, + ISC_MIN((rrl->num_entries+1)/2, + 1000)); + e = ISC_LIST_TAIL(rrl->lru); + break; + } + age = delta_rrl_time(e->last_used, now); + } + } + if (e->logged) + log_end(rrl, e, log_buf, log_buf_len); + e->key = key; + e->last_used = 0; + rrl_entry_ref(rrl, e, new_bin, probes, now); + return (e); +} + +static inline dns_rrl_result_t +debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale, + const isc_sockaddr_t *client_addr, isc_stdtime_t now, + char *log_buf, unsigned int log_buf_len) +{ + int rate, new_rate, *ratep, slip, new_slip, age, log_secs, min; + const char *rate_str; + dns_rrl_entry_t const *credit_e; + dns_rrl_result_t rrl_result; + + /* + * Pick the rate counter. Optionally adjust the rates by the estimated + * query/second rate. + */ + switch (e->key.rtype) { + case DNS_RRL_RTYPE_QUERY: + rate = rrl->responses_per_second; + ratep = &rrl->scaled_responses_per_second; + break; + case DNS_RRL_RTYPE_NXDOMAIN: + rate = rrl->nxdomains_per_second; + ratep = &rrl->scaled_nxdomains_per_second; + break; + case DNS_RRL_RTYPE_ERROR: + rate = rrl->errors_per_second; + ratep = &rrl->scaled_errors_per_second; + break; + case DNS_RRL_RTYPE_ALL: + rate = rrl->all_per_second; + ratep = &rrl->scaled_all_per_second; + break; + default: + INSIST(0); + } + if (rate == 0) + return (DNS_RRL_RESULT_OK); + + if (scale < 1.0) { + /* + * The limit for clients that have used TCP is not scaled. + */ + credit_e = get_rrl_entry(rrl, client_addr, + dns_rdatatype_none, NULL, 0, + DNS_RRL_RTYPE_TCP, now, ISC_FALSE, + log_buf, log_buf_len); + if (credit_e != NULL) { + age = delta_rrl_time(credit_e->last_used, now); + if (age < rrl->window) + scale = 1.0; + } + } + if (scale < 1.0) { + new_rate = rate * scale; + if (new_rate < 1) + new_rate = 1; + if (*ratep != new_rate) { + if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) { + switch (e->key.rtype) { + case DNS_RRL_RTYPE_QUERY: + rate_str = "responses-per-second"; + break; + case DNS_RRL_RTYPE_NXDOMAIN: + rate_str = "nxdomains-per-second"; + break; + case DNS_RRL_RTYPE_ERROR: + rate_str = "errors-per-second"; + break; + case DNS_RRL_RTYPE_ALL: + rate_str = "all-per-second"; + break; + default: + INSIST(0); + } + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, + DNS_RRL_LOG_DEBUG1, + "%d qps scaled %s by %.2f" + " from %d to %d", + (int)qps, rate_str, scale, + rate, new_rate); + } + rate = new_rate; + *ratep = rate; + } + } + + min = -rrl->window * rate; + + /* + * Treat time jumps into the past as no time. + * Treat entries older than the window as if they were just created + * Credit other entries. + */ + rrl_result = DNS_RRL_RESULT_DROP; + age = delta_rrl_time(e->last_used, now); + if (age > 0) { + /* + * Credit tokens earned during elapsed time. + */ + if (age > rrl->window) { + e->responses = rate; + e->slip_cnt = 0; + } else { + e->responses += rate*age; + if (e->responses > rate) { + e->responses = rate; + e->slip_cnt = 0; + } + } + /* + * Find the seconds since last log message without overflowing + * small counter. + * This counter should be reset when an entry is create (or + * recycled) and after at least one second without limiting. + * It is not necessarily reset when some requests are answered + * provided other requests continue to be dropped or slipped. + * This can happen when the request rate is just at the limit. + */ + if (e->logged) { + log_secs = e->log_secs; + log_secs += age; + if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0) + log_secs = DNS_RRL_MAX_LOG_SECS; + e->log_secs = log_secs; + } + } + e->last_used = now; + + if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3, + "rrl age=%d responses=%d", age, e->responses); + + /* + * Debit the entry for this response. + */ + if (--e->responses >= 0) + return (DNS_RRL_RESULT_OK); + + if (e->responses < min) + e->responses = min; + + /* + * Drop this response unless it should leak. + */ + slip = rrl->slip; + if (slip > 2 && scale < 1.0) { + new_slip *= scale; + if (new_slip < 2) + new_slip = 2; + if (rrl->scaled_slip != new_slip) { + if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, + DNS_RRL_LOG_DEBUG1, + "%d qps scaled slip" + " by %.2f from %d to %d", + (int)qps, scale, + slip, new_slip); + slip = new_slip; + rrl->scaled_slip = slip; + } + } + if (slip != 0 && ++e->slip_cnt >= slip) { + e->slip_cnt = 0; + return (DNS_RRL_RESULT_SLIP); + } + + return (rrl_result); +} + +static inline dns_rrl_qname_buf_t * +get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) { + dns_rrl_qname_buf_t *qbuf; + + qbuf = rrl->qnames[e->log_qname]; + if (qbuf == NULL || qbuf->e != e) + return (NULL); + return (qbuf); +} + +static inline void +free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) { + dns_rrl_qname_buf_t *qbuf; + + qbuf = get_qname(rrl, e); + if (qbuf != NULL) { + qbuf->e = NULL; + ISC_LIST_APPEND(rrl->qname_free, qbuf, link); + } +} + +static void +add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len) +{ + isc_region_t region; + + isc_buffer_availableregion(lb, ®ion); + if (str_len >= region.length) { + if (region.length <= 0) + return; + str_len = region.length; + } + memcpy(region.base, str, str_len); + isc_buffer_add(lb, str_len); +} + +#define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s)-1) + +/* + * Build strings for the logs + */ +static void +make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e, + const char *str1, const char *str2, isc_boolean_t plural, + dns_rrl_result_t rrl_result, + dns_name_t *qname, isc_boolean_t save_qname, dns_rcode_t rcode, + char *log_buf, unsigned int log_buf_len) +{ + isc_buffer_t lb; + dns_rrl_qname_buf_t *qbuf; + isc_netaddr_t cidr; + char strbuf[ISC_MAX(sizeof("/123"), sizeof(" (12345678)"))]; + isc_result_t msg_result; + + if (log_buf_len <= 1) { + if (log_buf_len == 1) + log_buf[0] = '\0'; + return; + } + isc_buffer_init(&lb, log_buf, log_buf_len-1); + + if (str1 != NULL) + add_log_str(&lb, str1, strlen(str1)); + if (str2 != NULL) + add_log_str(&lb, str2, strlen(str2)); + + switch (rrl_result) { + case DNS_RRL_RESULT_OK: + break; + case DNS_RRL_RESULT_DROP: + ADD_LOG_CSTR(&lb, "drop "); + break; + case DNS_RRL_RESULT_SLIP: + ADD_LOG_CSTR(&lb, "slip "); + break; + default: + INSIST(0); + break; + } + + + switch (e->key.rtype) { + case DNS_RRL_RTYPE_QUERY: + case DNS_RRL_RTYPE_ALL: + break; + case DNS_RRL_RTYPE_NXDOMAIN: + ADD_LOG_CSTR(&lb, "NXDOMAIN "); + break; + case DNS_RRL_RTYPE_ERROR: + if (rcode == dns_rcode_noerror) { + ADD_LOG_CSTR(&lb, "error "); + } else { + msg_result = dns_rcode_totext(rcode, &lb); + if (msg_result == ISC_R_SUCCESS) { + ADD_LOG_CSTR(&lb, " "); + } else { + ADD_LOG_CSTR(&lb, "UNKNOWN RCODE "); + } + } + break; + default: + INSIST(0); + } + + if (plural) + ADD_LOG_CSTR(&lb, "responses to "); + else + ADD_LOG_CSTR(&lb, "response to "); + + memset(&cidr, 0, sizeof(cidr)); + if (e->key.ipv6) { + snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen); + cidr.family = AF_INET6; + memcpy(&cidr.type.in6, e->key.ip, sizeof(cidr.type.in6)); + } else { + snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen); + cidr.family = AF_INET; + cidr.type.in.s_addr = e->key.ip[3]; + } + msg_result = isc_netaddr_totext(&cidr, &lb); + if (msg_result != ISC_R_SUCCESS) + ADD_LOG_CSTR(&lb, "?"); + add_log_str(&lb, strbuf, strlen(strbuf)); + + if (e->key.rtype == DNS_RRL_RTYPE_QUERY || + e->key.rtype == DNS_RRL_RTYPE_NXDOMAIN) { + qbuf = get_qname(rrl, e); + if (save_qname && qbuf == NULL && + qname != NULL && dns_name_isabsolute(qname)) { + /* + * Capture the qname for the "stop limiting" message. + */ + qbuf = ISC_LIST_TAIL(rrl->qname_free); + if (qbuf != NULL) { + ISC_LIST_UNLINK(rrl->qname_free, qbuf, link); + } else if (rrl->num_qnames < DNS_RRL_NUM_QNAMES) { + qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf)); + if (qbuf != NULL) { + memset(qbuf, 0, sizeof(*qbuf)); + qbuf->index = rrl->num_qnames; + rrl->qnames[rrl->num_qnames++] = qbuf; + } else { + isc_log_write(dns_lctx, + DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, + DNS_RRL_LOG_FAIL, + "isc_mem_get(%d)" + " failed for RRL qname", + (int)sizeof(*qbuf)); + } + } + if (qbuf != NULL) { + e->log_qname = qbuf->index; + qbuf->e = e; + dns_fixedname_init(&qbuf->qname); + dns_name_copy(qname, + dns_fixedname_name(&qbuf->qname), + NULL); + } + } + if (qbuf != NULL) + qname = dns_fixedname_name(&qbuf->qname); + if (qname != NULL) { + ADD_LOG_CSTR(&lb, " for "); + dns_name_totext(qname, ISC_TRUE, &lb); + ADD_LOG_CSTR(&lb, " "); + } else { + ADD_LOG_CSTR(&lb, " for (?) "); + } + dns_rdataclass_totext(e->key.qclass-2, &lb); + ADD_LOG_CSTR(&lb, " "); + dns_rdatatype_totext(e->key.qtype, &lb); + snprintf(strbuf, sizeof(strbuf), " (%08x)", e->key.qname_hash); + add_log_str(&lb, strbuf, strlen(strbuf)); + } + + /* + * We saved room for '\0'. + */ + log_buf[isc_buffer_usedlength(&lb)] = '\0'; +} + +static void +log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, + char *log_buf, unsigned int log_buf_len) +{ + if (e->logged) { + make_log_buf(rrl, e, rrl->log_only ? "would " : NULL, + "stop limiting ", ISC_TRUE, + DNS_RRL_RESULT_OK, NULL, ISC_FALSE, + dns_rcode_noerror, log_buf, log_buf_len); + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, + "%s", log_buf); + free_qname(rrl, e); + e->logged = ISC_FALSE; + } +} + +/* + * Log some messages for streams that have stopped being rate limited + * or really for buckets that are now idle after having done something. + */ +static void +prune_qnames(dns_rrl_t *rrl, isc_stdtime_t now, + char *log_buf, unsigned int log_buf_len) +{ + dns_rrl_entry_t *e, *e_prev; + isc_boolean_t move_ptr; + int cnt, age; + + move_ptr = ISC_TRUE; + cnt = 8; + + for (e = rrl->log_ended; e != NULL; e = e->lru.prev) { + e_prev = e; + if (!e->logged) + continue; + + age = delta_rrl_time(e->last_used, now); + if (age <= rrl->window) { + rrl->prune_time = now; + break; + } + + if (age < DNS_RRL_STOP_LOG_SECS || + response_balance(rrl, e, age) < 0) { + move_ptr = ISC_FALSE; + continue; + } + + log_end(rrl, e, log_buf, log_buf_len); + + /* + * Do not log many messages at once to avoid stalling real work. + */ + if (--cnt <= 0) + break; + } + if (e == NULL) + rrl->prune_time = now; + if (move_ptr) + rrl->log_ended = e_prev; +} + +/* + * Main rate limit interface. + */ +dns_rrl_result_t +dns_rrl(dns_view_t *view, + const isc_sockaddr_t *client_addr, isc_boolean_t is_tcp, + dns_rdataclass_t qclass, dns_rdatatype_t qtype, + dns_name_t *qname, dns_rcode_t rcode, isc_stdtime_t now, + isc_boolean_t wouldlog, char *log_buf, unsigned int log_buf_len) +{ + dns_rrl_t *rrl; + dns_rrl_rtype_t rtype; + dns_rrl_entry_t *e; + isc_netaddr_t netclient; + int secs; + double qps, scale; + int exempt_match; + isc_result_t result; + dns_rrl_result_t rrl_result; + + INSIST(log_buf != NULL && log_buf_len > 0); + + rrl = view->rrl; + if (rrl->exempt != NULL) { + isc_netaddr_fromsockaddr(&netclient, client_addr); + result = dns_acl_match(&netclient, NULL, rrl->exempt, + &view->aclenv, &exempt_match, NULL); + if (result == ISC_R_SUCCESS && exempt_match > 0) + return (DNS_RRL_RESULT_OK); + } + + LOCK(&rrl->lock); + + /* + * Estimate total query per second rate when scaling by qps. + */ + if (rrl->qps_scale == 0) { + qps = 0.0; + scale = 1.0; + } else { + ++rrl->qps_responses; + secs = delta_rrl_time(rrl->qps_time, now); + if (secs <= 0) { + qps = rrl->qps; + } else { + qps = (1.0*rrl->qps_responses) / secs; + if (secs >= rrl->window) { + if (isc_log_wouldlog(dns_lctx, + DNS_RRL_LOG_DEBUG3)) + isc_log_write(dns_lctx, + DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, + DNS_RRL_LOG_DEBUG3, + "%d responses/%d seconds" + " = %d qps", + rrl->qps_responses, secs, + (int)qps); + rrl->qps = qps; + rrl->qps_responses = 0; + rrl->qps_time = now; + } else if (qps < rrl->qps) { + qps = rrl->qps; + } + } + scale = rrl->qps_scale / qps; + } + + if (rrl->prune_time != now) + prune_qnames(rrl, now, log_buf, log_buf_len); + + /* + * Notice TCP responses when scaling limits by qps. + * Do not try to rate limit TCP responses. + */ + if (is_tcp) { + if (scale < 1.0) { + e = get_rrl_entry(rrl, client_addr, + dns_rdatatype_none, NULL, 0, + DNS_RRL_RTYPE_TCP, now, ISC_TRUE, + log_buf, log_buf_len); + if (e != NULL) { + e->responses = -(rrl->window+1); + e->last_used = now; + } + } + UNLOCK(&rrl->lock); + return (ISC_R_SUCCESS); + } + + /* + * Find the right kind of entry, creating it if necessary. + * If that is impossible, then nothing more can be done + */ + if (rcode == dns_rcode_noerror) + rtype = DNS_RRL_RTYPE_QUERY; + else if (rcode == dns_rcode_nxdomain) + rtype = DNS_RRL_RTYPE_NXDOMAIN; + else + rtype = DNS_RRL_RTYPE_ERROR; + e = get_rrl_entry(rrl, client_addr, qtype, qname, qclass, rtype, + now, ISC_TRUE, log_buf, log_buf_len); + if (e == NULL) { + UNLOCK(&rrl->lock); + return (DNS_RRL_RESULT_OK); + } + + if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) { + /* + * Do not worry about speed or releasing the lock. + * This message appears before messages from debit_rrl_entry(). + */ + make_log_buf(rrl, e, "consider limiting ", NULL, ISC_FALSE, + DNS_RRL_RESULT_OK, qname, ISC_FALSE, + rcode, log_buf, log_buf_len); + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1, + "%s", log_buf); + } + + rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now, + log_buf, log_buf_len); + + if (rrl->all_per_second != 0) { + /* + * We must debit the all-per-second token bucket if we have + * an all-per-second limit for the IP address. + * The all-per-second limit determines the log message + * when both limits are hit. + */ + dns_rrl_entry_t *e_all; + dns_rrl_result_t rrl_all_result; + + e_all = get_rrl_entry(rrl, client_addr, + dns_rdatatype_none, NULL, 0, + DNS_RRL_RTYPE_ALL, now, ISC_TRUE, + log_buf, log_buf_len); + if (e_all == NULL) { + UNLOCK(&rrl->lock); + return (DNS_RRL_RESULT_OK); + } + rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale, + client_addr, now, + log_buf, log_buf_len); + if (rrl_all_result != DNS_RRL_RESULT_OK) { + int level; + + e = e_all; + if (rrl_result == DNS_RRL_RESULT_OK) + level = DNS_RRL_LOG_DEBUG2; + else + level = DNS_RRL_LOG_DEBUG1; + rrl_result = rrl_all_result; + if (isc_log_wouldlog(dns_lctx, level)) { + make_log_buf(rrl, e, + "prefer all-per-second limiting ", + NULL, ISC_TRUE, DNS_RRL_RESULT_OK, + NULL, ISC_FALSE, dns_rcode_noerror, + log_buf, log_buf_len); + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, level, + "%s", log_buf); + } + } + } + + if (rrl_result == DNS_RRL_RESULT_OK) { + UNLOCK(&rrl->lock); + return (DNS_RRL_RESULT_OK); + } + + /* + * Log occassionally in the rate-limit category. + */ + if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) && + isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP)) { + make_log_buf(rrl, e, rrl->log_only ? "would " : NULL, + e->logged ? "continue limiting " : "limit ", + ISC_TRUE, DNS_RRL_RESULT_OK, + qname, ISC_TRUE, rcode, log_buf, log_buf_len); + e->logged = ISC_TRUE; + e->log_secs = 0; + /* + * Avoid holding the lock. + */ + if (!wouldlog) { + UNLOCK(&rrl->lock); + e = NULL; + } + isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, + DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, + "%s", log_buf); + } + + /* + * Make a log message for the caller. + */ + if (wouldlog) + make_log_buf(rrl, e, rrl->log_only ? "would " : NULL, + NULL, ISC_FALSE, rrl_result, + qname, ISC_FALSE, rcode, log_buf, log_buf_len); + + if (e != NULL) { + /* + * Do not save the qname unless we might needed it for + * the ending log message. + */ + if (!e->logged) + free_qname(rrl, e); + UNLOCK(&rrl->lock); + } + return (rrl_result); +} + +void +dns_rrl_view_destroy(dns_view_t *view) { + dns_rrl_t *rrl; + dns_rrl_block_t *b; + dns_rrl_hash_t *h; + int i; + + rrl = view->rrl; + if (rrl == NULL) + return; + view->rrl = NULL; + + /* + * Assume the caller takes care of locking the view and anything else. + */ + do { + char log_buf[DNS_RRL_LOG_BUF_LEN]; + + prune_qnames(rrl, rrl->prune_time+DNS_RRL_MAX_WINDOW+1, + log_buf, sizeof(log_buf)); + } while (rrl->log_ended->lru.prev != NULL); + + for (i = 0; i < DNS_RRL_NUM_QNAMES; ++i) { + if (rrl->qnames[i] == NULL) + break; + isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i])); + } + + if (rrl->exempt != NULL) + dns_acl_detach(&rrl->exempt); + + DESTROYLOCK(&rrl->lock); + + while (!ISC_LIST_EMPTY(rrl->blocks)) { + b = ISC_LIST_HEAD(rrl->blocks); + ISC_LIST_UNLINK(rrl->blocks, b, link); + isc_mem_put(rrl->mctx, b, b->size); + } + + h = rrl->hash; + if (h != NULL) + isc_mem_put(rrl->mctx, h, + sizeof(*h)+(h->length-1)*sizeof(h->bins[0])); + + h = rrl->old_hash; + if (h != NULL) + isc_mem_put(rrl->mctx, h, + sizeof(*h)+(h->length-1)*sizeof(h->bins[0])); + + isc_mem_put(rrl->mctx, rrl, sizeof(*rrl)); +} + +isc_result_t +dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) { + dns_rrl_t *rrl; + isc_result_t result; + + *rrlp = NULL; + + rrl = isc_mem_get(view->mctx, sizeof(*rrl)); + if (rrl == NULL) + return (ISC_R_NOMEMORY); + memset(rrl, 0, sizeof(*rrl)); + rrl->mctx = view->mctx; + result = isc_mutex_init(&rrl->lock); + if (result != ISC_R_SUCCESS) { + isc_mem_put(view->mctx, rrl, sizeof(*rrl)); + return (result); + } + + view->rrl = rrl; + + result = add_rrl_entries(rrl, min_entries); + if (result != ISC_R_SUCCESS) { + dns_rrl_view_destroy(view); + return (result); + } + result = expand_rrl_hash(rrl, 0); + if (result != ISC_R_SUCCESS) { + dns_rrl_view_destroy(view); + return (result); + } + + *rrlp = rrl; + return (ISC_R_SUCCESS); +} diff -r -u lib/dns/view.c-orig lib/dns/view.c --- lib/dns/view.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ lib/dns/view.c 2004-01-01 00:00:00.000000000 +0000 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -181,6 +182,7 @@ view->answeracl_exclude = NULL; view->denyanswernames = NULL; view->answernames_exclude = NULL; + view->rrl = NULL; view->provideixfr = ISC_TRUE; view->maxcachettl = 7 * 24 * 3600; view->maxncachettl = 3 * 3600; @@ -331,9 +333,11 @@ dns_acache_detach(&view->acache); } dns_rpz_view_destroy(view); + dns_rrl_view_destroy(view); #else INSIST(view->acache == NULL); INSIST(ISC_LIST_EMPTY(view->rpz_zones)); + INSIST(view->rrl == NULL); #endif if (view->requestmgr != NULL) dns_requestmgr_detach(&view->requestmgr); diff -r -u lib/isccfg/namedconf.c-orig lib/isccfg/namedconf.c --- lib/isccfg/namedconf.c-orig 2004-01-01 00:00:00.000000000 +0000 +++ lib/isccfg/namedconf.c 2004-01-01 00:00:00.000000000 +0000 @@ -1244,6 +1244,39 @@ }; +/* + * rate-limit + */ +static cfg_clausedef_t rrl_clauses[] = { + { "responses-per-second", &cfg_type_uint32, 0 }, + { "errors-per-second", &cfg_type_uint32, 0 }, + { "nxdomains-per-second", &cfg_type_uint32, 0 }, + { "responses-per-second", &cfg_type_uint32, 0 }, + { "all-per-second", &cfg_type_uint32, 0 }, + { "slip", &cfg_type_uint32, 0 }, + { "window", &cfg_type_uint32, 0 }, + { "log-only", &cfg_type_boolean, 0 }, + { "qps-scale", &cfg_type_uint32, 0 }, + { "IPv4-prefix-length", &cfg_type_uint32, 0 }, + { "IPv6-prefix-length", &cfg_type_uint32, 0 }, + { "exempt-clients", &cfg_type_bracketed_aml, 0 }, + { "max-table-size", &cfg_type_uint32, 0 }, + { "min-table-size", &cfg_type_uint32, 0 }, + { NULL, NULL, 0 } +}; + +static cfg_clausedef_t *rrl_clausesets[] = { + rrl_clauses, + NULL +}; + +static cfg_type_t cfg_type_rrl = { + "rate-limit", cfg_parse_map, cfg_print_map, cfg_doc_map, + &cfg_rep_map, rrl_clausesets +}; + + + /*% * dnssec-lookaside */ @@ -1397,6 +1430,7 @@ CFG_CLAUSEFLAG_NOTCONFIGURED }, #endif { "response-policy", &cfg_type_rpz, 0 }, + { "rate-limit", &cfg_type_rrl, 0 }, { NULL, NULL, 0 } };