[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: Debian 11 bind9 and CVE-2025-40778



Hello,

El 04/11/25 a las 08:57, Frank Richter escribió:
> Hello,
> 
> are you working on a patched version of bind9 1:9.16.50-1~deb11u3 to fix
> CVE-2025-40778?

Debian 11 "bullseye" is currently maintained by the Debian LTS Team
(https://wiki.debian.org/LTS). I can say that yes, a fix is being
prepared for 9.16, shipped in bullseye.

> 
> We’ve build bind9 with the attached patches from
> https://gitlab.isc.org/isc-projects/bind9/-/commits/bind-9.16?ref_type=heads
> We use this version since yesterday without problems so far.
> 
> Kind regards,
> Frank
> 
> -- 
> Frank Richter, Chemnitz University of Technology, Germany

> >From d9b5ef342916462bfd63391831d96afc80c12df3 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej@isc.org>
> Date: Tue, 19 Aug 2025 19:22:18 +0200
> Subject: [PATCH] Use cryptographically-secure pseudo-random generator
>  everywhere
> 
> It was discovered in an upcoming academic paper that a xoshiro128**
> internal state can be recovered by an external 3rd party allowing to
> predict UDP ports and DNS IDs in the outgoing queries.  This could lead
> to an attacker spoofing the DNS answers with great efficiency and
> poisoning the DNS cache.
> 
> Change the internal random generator to system CSPRNG with buffering to
> avoid excessive syscalls.
> 
> Thanks Omer Ben Simhon and Amit Klein of Hebrew University of Jerusalem
> for responsibly reporting this to us.  Very cool research!
> 
> (cherry picked from commit cffcab9d5f3e709002f331b72498fcc229786ae2)
> ---
>  lib/isc/include/isc/os.h     |   5 +
>  lib/isc/include/isc/random.h |   2 +-
>  lib/isc/random.c             | 181 ++++++++++++++++++-----------------
>  lib/isc/tests/random_test.c  |   4 +-
>  4 files changed, 102 insertions(+), 90 deletions(-)
> 
> diff --git a/lib/isc/include/isc/os.h b/lib/isc/include/isc/os.h
> index 585abc0b124..699cbb971ac 100644
> --- a/lib/isc/include/isc/os.h
> +++ b/lib/isc/include/isc/os.h
> @@ -20,6 +20,11 @@
>  
>  ISC_LANG_BEGINDECLS
>  
> +/*%<
> + * Hardcode the L1 cacheline size of the CPU to 64.
> + */
> +#define ISC_OS_CACHELINE_SIZE 64
> +
>  unsigned int
>  isc_os_ncpus(void);
>  /*%<
> diff --git a/lib/isc/include/isc/random.h b/lib/isc/include/isc/random.h
> index 1e30d0c87d5..fd55343778a 100644
> --- a/lib/isc/include/isc/random.h
> +++ b/lib/isc/include/isc/random.h
> @@ -20,7 +20,7 @@
>  #include <isc/types.h>
>  
>  /*! \file isc/random.h
> - * \brief Implements wrapper around a non-cryptographically secure
> + * \brief Implements wrapper around a cryptographically secure
>   * pseudo-random number generator.
>   *
>   */
> diff --git a/lib/isc/random.c b/lib/isc/random.c
> index b11c39f419d..3fc0ff7234b 100644
> --- a/lib/isc/random.c
> +++ b/lib/isc/random.c
> @@ -31,131 +31,136 @@
>   */
>  
>  #include <inttypes.h>
> -#include <stdlib.h>
> -#include <string.h>
> -#include <unistd.h>
> +#include <stdio.h>
>  
> -#include <isc/once.h>
> +#include <isc/os.h>
>  #include <isc/platform.h>
>  #include <isc/random.h>
> -#include <isc/result.h>
>  #include <isc/thread.h>
> -#include <isc/types.h>
>  #include <isc/util.h>
>  
>  #include "entropy_private.h"
>  
> -/*
> - * The specific implementation for PRNG is included as a C file
> - * that has to provide a static variable named seed, and a function
> - * uint32_t next(void) that provides next random number.
> - *
> - * The implementation must be thread-safe.
> - */
> -
> -/*
> - * Two contestants have been considered: the xoroshiro family of the
> - * functions by Villa&Blackman, and PCG by O'Neill.  After
> - * consideration, the xoshiro128starstar function has been chosen as
> - * the uint32_t random number provider because it is very fast and has
> - * good enough properties for our usage pattern.
> - */
> -#include "xoshiro128starstar.c"
> +#define ISC_RANDOM_BUFSIZE (ISC_OS_CACHELINE_SIZE / sizeof(uint32_t))
>  
> -ISC_THREAD_LOCAL isc_once_t isc_random_once = ISC_ONCE_INIT;
> +ISC_THREAD_LOCAL uint32_t isc__random_pool[ISC_RANDOM_BUFSIZE];
> +ISC_THREAD_LOCAL size_t isc__random_pos = ISC_RANDOM_BUFSIZE;
>  
> -static void
> -isc_random_initialize(void) {
> -	int useed[4] = { 0, 0, 0, 1 };
> +static uint32_t
> +random_u32(void) {
>  #if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
>  	/*
> -	 * Set a constant seed to help in problem reproduction should fuzzing
> -	 * find a crash or a hang.  The seed array must be non-zero else
> -	 * xoshiro128starstar will generate an infinite series of zeroes.
> +	 * A fixed stream of numbers helps with problem reproduction when
> +	 * fuzzing.  The first result needs to be non-zero as expected by
> +	 * random_test.c (it starts with ISC_RANDOM_BUFSIZE, see above).
>  	 */
> -#else  /* if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */
> -	isc_entropy_get(useed, sizeof(useed));
> +	return (uint32_t)(isc__random_pos++);
>  #endif /* if FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION */
> -	memmove(seed, useed, sizeof(seed));
> +
> +	if (isc__random_pos == ISC_RANDOM_BUFSIZE) {
> +		isc_entropy_get(isc__random_pool, sizeof(isc__random_pool));
> +		isc__random_pos = 0;
> +	}
> +
> +	return isc__random_pool[isc__random_pos++];
>  }
>  
>  uint8_t
>  isc_random8(void) {
> -	RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
> -		      ISC_R_SUCCESS);
> -	return (next() & 0xff);
> +	return (uint8_t)random_u32();
>  }
>  
>  uint16_t
>  isc_random16(void) {
> -	RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
> -		      ISC_R_SUCCESS);
> -	return (next() & 0xffff);
> +	return (uint16_t)random_u32();
>  }
>  
>  uint32_t
>  isc_random32(void) {
> -	RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
> -		      ISC_R_SUCCESS);
> -	return (next());
> +	return random_u32();
>  }
>  
>  void
>  isc_random_buf(void *buf, size_t buflen) {
> -	int i;
> -	uint32_t r;
> +	REQUIRE(buflen == 0 || buf != NULL);
>  
> -	REQUIRE(buf != NULL);
> -	REQUIRE(buflen > 0);
> -
> -	RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
> -		      ISC_R_SUCCESS);
> -
> -	for (i = 0; i + sizeof(r) <= buflen; i += sizeof(r)) {
> -		r = next();
> -		memmove((uint8_t *)buf + i, &r, sizeof(r));
> +	if (buf == NULL || buflen == 0) {
> +		return;
>  	}
> -	r = next();
> -	memmove((uint8_t *)buf + i, &r, buflen % sizeof(r));
> -	return;
> +
> +	isc_entropy_get(buf, buflen);
>  }
>  
>  uint32_t
> -isc_random_uniform(uint32_t upper_bound) {
> -	/* Copy of arc4random_uniform from OpenBSD */
> -	uint32_t r, min;
> -
> -	RUNTIME_CHECK(isc_once_do(&isc_random_once, isc_random_initialize) ==
> -		      ISC_R_SUCCESS);
> -
> -	if (upper_bound < 2) {
> -		return (0);
> -	}
> -
> -#if (ULONG_MAX > 0xffffffffUL)
> -	min = 0x100000000UL % upper_bound;
> -#else  /* if (ULONG_MAX > 0xffffffffUL) */
> -	/* Calculate (2**32 % upper_bound) avoiding 64-bit math */
> -	if (upper_bound > 0x80000000) {
> -		min = 1 + ~upper_bound; /* 2**32 - upper_bound */
> -	} else {
> -		/* (2**32 - (x * 2)) % x == 2**32 % x when x <= 2**31 */
> -		min = ((0xffffffff - (upper_bound * 2)) + 1) % upper_bound;
> -	}
> -#endif /* if (ULONG_MAX > 0xffffffffUL) */
> -
> +isc_random_uniform(uint32_t limit) {
>  	/*
> -	 * This could theoretically loop forever but each retry has
> -	 * p > 0.5 (worst case, usually far better) of selecting a
> -	 * number inside the range we need, so it should rarely need
> -	 * to re-roll.
> +	 * Daniel Lemire's nearly-divisionless unbiased bounded random numbers.
> +	 *
> +	 * https://lemire.me/blog/?p=17551
> +	 *
> +	 * The raw random number generator `next()` returns a 32-bit value.
> +	 * We do a 64-bit multiply `next() * limit` and treat the product as a
> +	 * 32.32 fixed-point value less than the limit. Our result will be the
> +	 * integer part (upper 32 bits), and we will use the fraction part
> +	 * (lower 32 bits) to determine whether or not we need to resample.
>  	 */
> -	for (;;) {
> -		r = next();
> -		if (r >= min) {
> -			break;
> +	uint64_t num = (uint64_t)random_u32() * (uint64_t)limit;
> +	/*
> +	 * In the fast path, we avoid doing a division in most cases by
> +	 * comparing the fraction part of `num` with the limit, which is
> +	 * a slight over-estimate for the exact resample threshold.
> +	 */
> +	if ((uint32_t)(num) < limit) {
> +		/*
> +		 * We are in the slow path where we re-do the approximate test
> +		 * more accurately. The exact threshold for the resample loop
> +		 * is the remainder after dividing the raw RNG limit `1 << 32`
> +		 * by the caller's limit. We use a trick to calculate it
> +		 * within 32 bits:
> +		 *
> +		 *     (1 << 32) % limit
> +		 * == ((1 << 32) - limit) % limit
> +		 * ==  (uint32_t)(-limit) % limit
> +		 *
> +		 * This division is safe: we know that `limit` is strictly
> +		 * greater than zero because of the slow-path test above.
> +		 */
> +		uint32_t residue = (uint32_t)(-limit) % limit;
> +		/*
> +		 * Unless we get one of `N = (1 << 32) - residue` valid
> +		 * values, we reject the sample. This `N` is a multiple of
> +		 * `limit`, so our results will be unbiased; and `N` is the
> +		 * largest multiple that fits in 32 bits, so rejections are as
> +		 * rare as possible.
> +		 *
> +		 * There are `limit` possible values for the integer part of
> +		 * our fixed-point number. Each one corresponds to `N/limit`
> +		 * or `N/limit + 1` possible fraction parts. For our result to
> +		 * be unbiased, every possible integer part must have the same
> +		 * number of possible valid fraction parts. So, when we get
> +		 * the superfluous value in the `N/limit + 1` cases, we need
> +		 * to reject and resample.
> +		 *
> +		 * Because of the multiplication, the possible values in the
> +		 * fraction part are equally spaced by `limit`, with varying
> +		 * gaps at each end of the fraction's 32-bit range. We will
> +		 * choose a range of size `N` (a multiple of `limit`) into
> +		 * which valid fraction values must fall, with the rest of the
> +		 * 32-bit range covered by the `residue`. Lemire's paper says
> +		 * that exactly `N/limit` possible values spaced apart by
> +		 * `limit` will fit into our size `N` valid range, regardless
> +		 * of the size of the end gaps, the phase alignment of the
> +		 * values, or the position of the range.
> +		 *
> +		 * So, when a fraction value falls in the `residue` outside
> +		 * our valid range, it is superfluous, and we resample.
> +		 */
> +		while ((uint32_t)(num) < residue) {
> +			num = (uint64_t)random_u32() * (uint64_t)limit;
>  		}
>  	}
> -
> -	return (r % upper_bound);
> +	/*
> +	 * Return the integer part (upper 32 bits).
> +	 */
> +	return (uint32_t)(num >> 32);
>  }
> diff --git a/lib/isc/tests/random_test.c b/lib/isc/tests/random_test.c
> index 8025583f87e..57b6d997b01 100644
> --- a/lib/isc/tests/random_test.c
> +++ b/lib/isc/tests/random_test.c
> @@ -347,7 +347,9 @@ random_test(pvalue_func_t *func, isc_random_func test_func) {
>  			}
>  			break;
>  		case ISC_RANDOM_BYTES:
> -			isc_random_buf(values, sizeof(values));
> +			for (i = 0; i < ARRAY_SIZE(values); i++) {
> +				values[i] = isc_random32();
> +			}
>  			break;
>  		case ISC_RANDOM_UNIFORM:
>  			uniform_values = (uint16_t *)values;
> -- 
> GitLab
> 

> >From 2f0f44d493c382a7f0a3adfe7c4976b18a3d480b Mon Sep 17 00:00:00 2001
> From: Evan Hunt <each@isc.org>
> Date: Mon, 29 Sep 2025 21:46:59 -0700
> Subject: [PATCH 1/3] Tighten restrictions on caching NS RRsets in authority
>  section
> 
> To prevent certain spoofing attacks, a new check has been added
> to the existing rules for whether NS data can be cached: the owner
> name of the NS RRset must be an ancestor of the name being queried.
> 
> (cherry picked from commit fa153f791f9324bf84abf8d259e11c0531fe6e25)
> ---
>  lib/dns/resolver.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c
> index 737f83cdbf5..7a2768c1416 100644
> --- a/lib/dns/resolver.c
> +++ b/lib/dns/resolver.c
> @@ -9243,7 +9243,9 @@ rctx_authority_positive(respctx_t *rctx) {
>  		dns_message_currentname(rctx->query->rmessage,
>  					DNS_SECTION_AUTHORITY, &name);
>  
> -		if (!name_external(name, dns_rdatatype_ns, fctx)) {
> +		if (!name_external(name, dns_rdatatype_ns, fctx) &&
> +		    dns_name_issubdomain(&fctx->name, name))
> +		{
>  			dns_rdataset_t *rdataset = NULL;
>  
>  			/*
> -- 
> GitLab
> 
> 
> >From 50479358efdf432d690415131b74b5df158a9d69 Mon Sep 17 00:00:00 2001
> From: Evan Hunt <each@isc.org>
> Date: Mon, 29 Sep 2025 21:52:43 -0700
> Subject: [PATCH 2/3] Further restrict addresses that are cached when
>  processing referrals
> 
> Use the owner name of the NS record as the bailwick apex name
> when determining which additional records to cache, rather than
> the name of the delegating zone (or a parent thereof).
> 
> (cherry picked from commit a41054e9e606a61f1b3c8bc0c54e2f1059347165)
> ---
>  lib/dns/resolver.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c
> index 7a2768c1416..168fcc5e808 100644
> --- a/lib/dns/resolver.c
> +++ b/lib/dns/resolver.c
> @@ -7162,8 +7162,9 @@ mark_related(dns_name_t *name, dns_rdataset_t *rdataset, bool external,
>   * subdomain or because it's below a forward declaration or a
>   * locally served zone.
>   */
> -static bool
> -name_external(const dns_name_t *name, dns_rdatatype_t type, fetchctx_t *fctx) {
> +static inline bool
> +name_external(const dns_name_t *name, dns_rdatatype_t type, respctx_t *rctx) {
> +	fetchctx_t *fctx = rctx->fctx;
>  	isc_result_t result;
>  	dns_forwarders_t *forwarders = NULL;
>  	dns_fixedname_t fixed, zfixed;
> @@ -7176,7 +7177,7 @@ name_external(const dns_name_t *name, dns_rdatatype_t type, fetchctx_t *fctx) {
>  	dns_namereln_t rel;
>  
>  	apex = (ISDUALSTACK(fctx->addrinfo) || !ISFORWARDER(fctx->addrinfo))
> -		       ? &fctx->domain
> +		       ? rctx->ns_name != NULL ? rctx->ns_name : &fctx->domain
>  		       : fctx->fwdname;
>  
>  	/*
> @@ -7285,7 +7286,7 @@ check_section(void *arg, const dns_name_t *addname, dns_rdatatype_t type,
>  	result = dns_message_findname(rctx->query->rmessage, section, addname,
>  				      dns_rdatatype_any, 0, &name, NULL);
>  	if (result == ISC_R_SUCCESS) {
> -		external = name_external(name, type, fctx);
> +		external = name_external(name, type, rctx);
>  		if (type == dns_rdatatype_a) {
>  			for (rdataset = ISC_LIST_HEAD(name->list);
>  			     rdataset != NULL;
> @@ -8920,7 +8921,7 @@ rctx_answer_scan(respctx_t *rctx) {
>  			/*
>  			 * Don't accept DNAME from parent namespace.
>  			 */
> -			if (name_external(name, dns_rdatatype_dname, fctx)) {
> +			if (name_external(name, dns_rdatatype_dname, rctx)) {
>  				continue;
>  			}
>  
> @@ -9243,7 +9244,7 @@ rctx_authority_positive(respctx_t *rctx) {
>  		dns_message_currentname(rctx->query->rmessage,
>  					DNS_SECTION_AUTHORITY, &name);
>  
> -		if (!name_external(name, dns_rdatatype_ns, fctx) &&
> +		if (!name_external(name, dns_rdatatype_ns, rctx) &&
>  		    dns_name_issubdomain(&fctx->name, name))
>  		{
>  			dns_rdataset_t *rdataset = NULL;
> -- 
> GitLab
> 
> 
> >From 33a7db1fe964e55b76b4ac003ecc56cc67028bd9 Mon Sep 17 00:00:00 2001
> From: Evan Hunt <each@isc.org>
> Date: Mon, 29 Sep 2025 21:57:48 -0700
> Subject: [PATCH 3/3] Retry lookups with unsigned DNAME over TCP
> 
> To prevent spoofed unsigned DNAME responses being accepted retry
> response with unsigned DNAMEs over TCP if the response is not TSIG
> signed or there isn't a good DNS CLIENT COOKIE.
> 
> To prevent test failures, this required adding TCP support to the
> ans3 and ans4 servers in the chain system test.
> 
> (cherry picked from commit 2e40705c06831988106335ed77db3cf924d431f6)
> ---
>  bin/tests/system/chain/ans3/ans.pl | 143 -------------------
>  bin/tests/system/chain/ans3/ans.py | 217 +++++++++++++++++++++++++++++
>  bin/tests/system/chain/ans4/ans.py |  57 ++++++--
>  lib/dns/include/dns/message.h      |   8 ++
>  lib/dns/message.c                  |  12 ++
>  lib/dns/resolver.c                 |  99 ++++++++++---
>  6 files changed, 368 insertions(+), 168 deletions(-)
>  delete mode 100644 bin/tests/system/chain/ans3/ans.pl
>  create mode 100644 bin/tests/system/chain/ans3/ans.py
> 
> diff --git a/bin/tests/system/chain/ans3/ans.pl b/bin/tests/system/chain/ans3/ans.pl
> deleted file mode 100644
> index e42240be63a..00000000000
> --- a/bin/tests/system/chain/ans3/ans.pl
> +++ /dev/null
> @@ -1,143 +0,0 @@
> -#!/usr/bin/env perl
> -
> -# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
> -#
> -# SPDX-License-Identifier: MPL-2.0
> -#
> -# This Source Code Form is subject to the terms of the Mozilla Public
> -# License, v. 2.0.  If a copy of the MPL was not distributed with this
> -# file, you can obtain one at https://mozilla.org/MPL/2.0/.
> -#
> -# See the COPYRIGHT file distributed with this work for additional
> -# information regarding copyright ownership.
> -
> -use strict;
> -use warnings;
> -
> -use IO::File;
> -use Getopt::Long;
> -use Net::DNS::Nameserver;
> -
> -my $pidf = new IO::File "ans.pid", "w" or die "cannot open pid file: $!";
> -print $pidf "$$\n" or die "cannot write pid file: $!";
> -$pidf->close or die "cannot close pid file: $!";
> -sub rmpid { unlink "ans.pid"; exit 1; };
> -sub term { };
> -
> -$SIG{INT} = \&rmpid;
> -if ($Net::DNS::VERSION > 1.41) {
> -    $SIG{TERM} = \&term;
> -} else {
> -    $SIG{TERM} = \&rmpid;
> -}
> -
> -my $localaddr = "10.53.0.3";
> -
> -my $localport = int($ENV{'PORT'});
> -if (!$localport) { $localport = 5300; }
> -
> -my $verbose = 0;
> -my $ttl = 60;
> -my $zone = "example.broken";
> -my $nsname = "ns3.$zone";
> -my $synth = "synth-then-dname.$zone";
> -my $synth2 = "synth2-then-dname.$zone";
> -
> -sub reply_handler {
> -    my ($qname, $qclass, $qtype, $peerhost, $query, $conn) = @_;
> -    my ($rcode, @ans, @auth, @add);
> -
> -    print ("request: $qname/$qtype\n");
> -    STDOUT->flush();
> -
> -    if ($qname eq "example.broken") {
> -        if ($qtype eq "SOA") {
> -	    my $rr = new Net::DNS::RR("$qname $ttl $qclass SOA . . 0 0 0 0 0");
> -	    push @ans, $rr;
> -        } elsif ($qtype eq "NS") {
> -	    my $rr = new Net::DNS::RR("$qname $ttl $qclass NS $nsname");
> -	    push @ans, $rr;
> -	    $rr = new Net::DNS::RR("$nsname $ttl $qclass A $localaddr");
> -	    push @add, $rr;
> -        }
> -        $rcode = "NOERROR";
> -    } elsif ($qname eq "cname-to-$synth2") {
> -        my $rr = new Net::DNS::RR("$qname $ttl $qclass CNAME name.$synth2");
> -	push @ans, $rr;
> -        $rr = new Net::DNS::RR("name.$synth2 $ttl $qclass CNAME name");
> -	push @ans, $rr;
> -        $rr = new Net::DNS::RR("$synth2 $ttl $qclass DNAME .");
> -	push @ans, $rr;
> -	$rcode = "NOERROR";
> -    } elsif ($qname eq "$synth" || $qname eq "$synth2") {
> -	if ($qtype eq "DNAME") {
> -	    my $rr = new Net::DNS::RR("$qname $ttl $qclass DNAME .");
> -	    push @ans, $rr;
> -	}
> -	$rcode = "NOERROR";
> -    } elsif ($qname eq "name.$synth") {
> -	my $rr = new Net::DNS::RR("$qname $ttl $qclass CNAME name.");
> -	push @ans, $rr;
> -	$rr = new Net::DNS::RR("$synth $ttl $qclass DNAME .");
> -	push @ans, $rr;
> -	$rcode = "NOERROR";
> -    } elsif ($qname eq "name.$synth2") {
> -	my $rr = new Net::DNS::RR("$qname $ttl $qclass CNAME name.");
> -	push @ans, $rr;
> -	$rr = new Net::DNS::RR("$synth2 $ttl $qclass DNAME .");
> -	push @ans, $rr;
> -	$rcode = "NOERROR";
> -    # The following three code branches referring to the "example.dname"
> -    # zone are necessary for the resolver variant of the CVE-2021-25215
> -    # regression test to work.  A named instance cannot be used for
> -    # serving the DNAME records below as a version of BIND vulnerable to
> -    # CVE-2021-25215 would crash while answering the queries asked by
> -    # the tested resolver.
> -    } elsif ($qname eq "ns3.example.dname") {
> -	if ($qtype eq "A") {
> -		my $rr = new Net::DNS::RR("$qname $ttl $qclass A 10.53.0.3");
> -		push @ans, $rr;
> -	}
> -	if ($qtype eq "AAAA") {
> -		my $rr = new Net::DNS::RR("example.dname. $ttl $qclass SOA . . 0 0 0 0 $ttl");
> -		push @auth, $rr;
> -	}
> -	$rcode = "NOERROR";
> -    } elsif ($qname eq "self.example.self.example.dname") {
> -	my $rr = new Net::DNS::RR("self.example.dname. $ttl $qclass DNAME dname.");
> -	push @ans, $rr;
> -	$rr = new Net::DNS::RR("$qname $ttl $qclass CNAME self.example.dname.");
> -	push @ans, $rr;
> -	$rcode = "NOERROR";
> -    } elsif ($qname eq "self.example.dname") {
> -	if ($qtype eq "DNAME") {
> -		my $rr = new Net::DNS::RR("$qname $ttl $qclass DNAME dname.");
> -		push @ans, $rr;
> -	}
> -	$rcode = "NOERROR";
> -    } else {
> -	$rcode = "REFUSED";
> -    }
> -    return ($rcode, \@ans, \@auth, \@add, { aa => 1 });
> -}
> -
> -GetOptions(
> -    'port=i' => \$localport,
> -    'verbose!' => \$verbose,
> -);
> -
> -my $ns = Net::DNS::Nameserver->new(
> -    LocalAddr => $localaddr,
> -    LocalPort => $localport,
> -    ReplyHandler => \&reply_handler,
> -    Verbose => $verbose,
> -);
> -
> -if ($Net::DNS::VERSION >= 1.42) {
> -    $ns->start_server();
> -    select(undef, undef, undef, undef);
> -    $ns->stop_server();
> -    unlink "ans.pid";
> -} else {
> -    $ns->main_loop;
> -}
> diff --git a/bin/tests/system/chain/ans3/ans.py b/bin/tests/system/chain/ans3/ans.py
> new file mode 100644
> index 00000000000..0a031c1145b
> --- /dev/null
> +++ b/bin/tests/system/chain/ans3/ans.py
> @@ -0,0 +1,217 @@
> +# Copyright (C) Internet Systems Consortium, Inc. ("ISC")
> +#
> +# SPDX-License-Identifier: MPL-2.0
> +#
> +# This Source Code Form is subject to the terms of the Mozilla Public
> +# License, v. 2.0.  If a copy of the MPL was not distributed with this
> +# file, you can obtain one at https://mozilla.org/MPL/2.0/.
> +#
> +# See the COPYRIGHT file distributed with this work for additional
> +# information regarding copyright ownership.
> +
> +############################################################################
> +# ans.py: See README.anspy for details.
> +############################################################################
> +
> +from __future__ import print_function
> +import os
> +import sys
> +import signal
> +import socket
> +import select
> +from datetime import datetime, timedelta
> +import functools
> +
> +import dns, dns.message, dns.query
> +from dns.rdatatype import *
> +from dns.rdataclass import *
> +from dns.rcode import *
> +from dns.name import *
> +
> +
> +############################################################################
> +# Respond to a DNS query.
> +############################################################################
> +def create_response(msg):
> +    ttl = 60
> +    zone = "example.broken."
> +    nsname = f"ns3.{zone}"
> +    synth = f"synth-then-dname.{zone}"
> +    synth2 = f"synth2-then-dname.{zone}"
> +
> +    m = dns.message.from_wire(msg)
> +    qname = m.question[0].name.to_text()
> +
> +    # prepare the response and convert to wire format
> +    r = dns.message.make_response(m)
> +
> +    # get qtype
> +    rrtype = m.question[0].rdtype
> +    qtype = dns.rdatatype.to_text(rrtype)
> +    print(f"request: {qname}/{qtype}")
> +
> +    rcode = "NOERROR"
> +    if qname == zone:
> +        if qtype == "SOA":
> +            r.answer.append(dns.rrset.from_text(qname, ttl, IN, SOA, ". . 0 0 0 0 0"))
> +        elif qtype == "NS":
> +            r.answer.append(dns.rrset.from_text(qname, ttl, IN, NS, nsname))
> +            r.additional.append(dns.rrset.from_text(nsname, ttl, IN, A, ip4))
> +    elif qname == f"cname-to-{synth2}":
> +        r.answer.append(dns.rrset.from_text(qname, ttl, IN, CNAME, f"name.{synth2}"))
> +        r.answer.append(dns.rrset.from_text(f"name.{synth2}", ttl, IN, CNAME, "name."))
> +        r.answer.append(dns.rrset.from_text(synth2, ttl, IN, DNAME, "."))
> +    elif qname == f"{synth}" or qname == f"{synth2}":
> +        if qtype == "DNAME":
> +            r.answer.append(dns.rrset.from_text(qname, ttl, IN, DNAME, "."))
> +    elif qname == f"name.{synth}":
> +        r.answer.append(dns.rrset.from_text(qname, ttl, IN, CNAME, "name."))
> +        r.answer.append(dns.rrset.from_text(synth, ttl, IN, DNAME, "."))
> +    elif qname == f"name.{synth2}":
> +        r.answer.append(dns.rrset.from_text(qname, ttl, IN, CNAME, "name."))
> +        r.answer.append(dns.rrset.from_text(synth2, ttl, IN, DNAME, "."))
> +    elif qname == "ns3.example.dname.":
> +        # This and the next two code branches referring to the "example.dname"
> +        # zone are necessary for the resolver variant of the CVE-2021-25215
> +        # regression test to work.  A named instance cannot be used for
> +        # serving the DNAME records below as a version of BIND vulnerable to
> +        # CVE-2021-25215 would crash while answering the queries asked by
> +        # the tested resolver.
> +        if qtype == "A":
> +            r.answer.append(dns.rrset.from_text(qname, ttl, IN, A, ip4))
> +        elif qtype == "AAAA":
> +            r.authority.append(
> +                dns.rrset.from_text("example.dname.", ttl, IN, SOA, ". . 0 0 0 0 0")
> +            )
> +    elif qname == "self.example.self..example.dname.":
> +        r.answer.append(
> +            dns.rrset.from_text("self.example.dname.", ttl, IN, DNAME, "dname.")
> +        )
> +        r.answer.append(
> +            dns.rrset.from_text(qname, ttl, IN, CNAME, "self.example.dname.")
> +        )
> +    elif qname == "self.example.dname.":
> +        if qtype == "DNAME":
> +            r.answer.append(dns.rrset.from_text(qname, ttl, IN, DNAME, "dname."))
> +    else:
> +        rcode = "REFUSED"
> +
> +    r.flags |= dns.flags.AA
> +    r.use_edns()
> +    return r.to_wire()
> +
> +
> +def sigterm(signum, frame):
> +    print("Shutting down now...")
> +    os.remove("ans.pid")
> +    running = False
> +    sys.exit(0)
> +
> +
> +############################################################################
> +# Main
> +#
> +# Set up responder and control channel, open the pid file, and start
> +# the main loop, listening for queries on the query channel or commands
> +# on the control channel and acting on them.
> +############################################################################
> +ip4 = "10.53.0.3"
> +ip6 = "fd92:7065:b8e:ffff::3"
> +
> +try:
> +    port = int(os.environ["PORT"])
> +except:
> +    port = 5300
> +
> +query4_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
> +query4_udp.bind((ip4, port))
> +
> +query4_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
> +query4_tcp.bind((ip4, port))
> +query4_tcp.listen(1)
> +query4_tcp.settimeout(1)
> +
> +havev6 = True
> +try:
> +    query6_udp = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
> +    try:
> +        query6_udp.bind((ip6, port))
> +    except:
> +        query6_udp.close()
> +        havev6 = False
> +
> +    query6_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
> +    try:
> +        query6_tcp.bind((ip4, port))
> +        query6_tcp.listen(1)
> +        query6_tcp.settimeout(1)
> +    except:
> +        query6_tcp.close()
> +        havev6 = False
> +except:
> +    havev6 = False
> +
> +signal.signal(signal.SIGTERM, sigterm)
> +
> +f = open("ans.pid", "w")
> +pid = os.getpid()
> +print(pid, file=f)
> +f.close()
> +
> +running = True
> +
> +print("Listening on %s port %d" % (ip4, port))
> +if havev6:
> +    print("Listening on %s port %d" % (ip6, port))
> +print("Ctrl-c to quit")
> +
> +if havev6:
> +    input = [query4_udp, query4_tcp, query6_udp, query6_tcp]
> +else:
> +    input = [query4_udp, query4_tcp]
> +
> +while running:
> +    try:
> +        inputready, outputready, exceptready = select.select(input, [], [])
> +    except select.error as e:
> +        break
> +    except socket.error as e:
> +        break
> +    except KeyboardInterrupt:
> +        break
> +
> +    for s in inputready:
> +        if s == query4_udp or s == query6_udp:
> +            print("Query received on %s" % (ip4 if s == query4_udp else ip6))
> +            # Handle incoming queries
> +            msg = s.recvfrom(65535)
> +            rsp = create_response(msg[0])
> +            if rsp:
> +                s.sendto(rsp, msg[1])
> +        elif s == query4_tcp or s == query6_tcp:
> +            try:
> +                conn, _ = s.accept()
> +                if s == query4_tcp or s == query6_tcp:
> +                    print(
> +                        "TCP Query received on %s" % (ip4 if s == query4_tcp else ip6),
> +                        end=" ",
> +                    )
> +                # get TCP message length
> +                msg = conn.recv(2)
> +                if len(msg) != 2:
> +                    print("couldn't read TCP message length")
> +                    continue
> +                length = struct.unpack(">H", msg[:2])[0]
> +                msg = conn.recv(length)
> +                if len(msg) != length:
> +                    print("couldn't read TCP message")
> +                    continue
> +                rsp = create_response(msg)
> +                if rsp:
> +                    conn.send(struct.pack(">H", len(rsp)))
> +                    conn.send(rsp)
> +                conn.close()
> +            except socket.error as e:
> +                print("error: %s" % str(e))
> +    if not running:
> +        break
> diff --git a/bin/tests/system/chain/ans4/ans.py b/bin/tests/system/chain/ans4/ans.py
> index 839067faa5e..66f0193caca 100755
> --- a/bin/tests/system/chain/ans4/ans.py
> +++ b/bin/tests/system/chain/ans4/ans.py
> @@ -316,16 +316,30 @@ try:
>  except:
>      ctrlport = 5300
>  
> -query4_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
> -query4_socket.bind((ip4, port))
> +query4_udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
> +query4_udp.bind((ip4, port))
> +
> +query4_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
> +query4_tcp.bind((ip4, port))
> +query4_tcp.listen(1)
> +query4_tcp.settimeout(1)
>  
>  havev6 = True
>  try:
> -    query6_socket = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
> +    query6_udp = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
> +    try:
> +        query6_udp.bind((ip6, port))
> +    except:
> +        query6_udp.close()
> +        havev6 = False
> +
> +    query6_tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
>      try:
> -        query6_socket.bind((ip6, port))
> +        query6_tcp.bind((ip4, port))
> +        query6_tcp.listen(1)
> +        query6_tcp.settimeout(1)
>      except:
> -        query6_socket.close()
> +        query6_tcp.close()
>          havev6 = False
>  except:
>      havev6 = False
> @@ -350,9 +364,9 @@ print("Control channel on %s port %d" % (ip4, ctrlport))
>  print("Ctrl-c to quit")
>  
>  if havev6:
> -    input = [query4_socket, query6_socket, ctrl_socket]
> +    input = [query4_udp, query4_tcp, query6_udp, query6_tcp, ctrl_socket]
>  else:
> -    input = [query4_socket, ctrl_socket]
> +    input = [query4_udp, query4_tcp, ctrl_socket]
>  
>  while running:
>      try:
> @@ -375,12 +389,37 @@ while running:
>                      break
>                  ctl_channel(msg)
>              conn.close()
> -        if s == query4_socket or s == query6_socket:
> -            print("Query received on %s" % (ip4 if s == query4_socket else ip6))
> +        elif s == query4_udp or s == query6_udp:
> +            print("Query received on %s" % (ip4 if s == query4_udp else ip6))
>              # Handle incoming queries
>              msg = s.recvfrom(65535)
>              rsp = create_response(msg[0])
>              if rsp:
>                  s.sendto(rsp, msg[1])
> +        elif s == query4_tcp or s == query6_tcp:
> +            try:
> +                conn, _ = s.accept()
> +                if s == query4_tcp or s == query6_tcp:
> +                    print(
> +                        "TCP Query received on %s" % (ip4 if s == query4_tcp else ip6),
> +                        end=" ",
> +                    )
> +                # get TCP message length
> +                msg = conn.recv(2)
> +                if len(msg) != 2:
> +                    print("couldn't read TCP message length")
> +                    continue
> +                length = struct.unpack(">H", msg[:2])[0]
> +                msg = conn.recv(length)
> +                if len(msg) != length:
> +                    print("couldn't read TCP message")
> +                    continue
> +                rsp = create_response(msg)
> +                if rsp:
> +                    conn.send(struct.pack(">H", len(rsp)))
> +                    conn.send(rsp)
> +                conn.close()
> +            except socket.error as e:
> +                print("error: %s" % str(e))
>      if not running:
>          break
> diff --git a/lib/dns/include/dns/message.h b/lib/dns/include/dns/message.h
> index fe51fcfe24b..280d872d208 100644
> --- a/lib/dns/include/dns/message.h
> +++ b/lib/dns/include/dns/message.h
> @@ -236,6 +236,7 @@ struct dns_message {
>  	unsigned int tkey	      : 1;
>  	unsigned int rdclass_set      : 1;
>  	unsigned int fuzzing	      : 1;
> +	unsigned int has_dname	      : 1;
>  
>  	unsigned int opt_reserved;
>  	unsigned int sig_reserved;
> @@ -1457,4 +1458,11 @@ dns_message_clonebuffer(dns_message_t *msg);
>   * \li   msg be a valid message.
>   */
>  
> +bool
> +dns_message_hasdname(dns_message_t *msg);
> +/*%<
> + * Return whether a DNAME was detected in the ANSWER section of a QUERY
> + * message when it was parsed.
> + */
> +
>  ISC_LANG_ENDDECLS
> diff --git a/lib/dns/message.c b/lib/dns/message.c
> index e23baf7e09e..225c9d7576f 100644
> --- a/lib/dns/message.c
> +++ b/lib/dns/message.c
> @@ -442,6 +442,7 @@ msginit(dns_message_t *m) {
>  	m->cc_bad = 0;
>  	m->tkey = 0;
>  	m->rdclass_set = 0;
> +	m->has_dname = 0;
>  	m->querytsig = NULL;
>  	m->indent.string = "\t";
>  	m->indent.count = 0;
> @@ -1727,6 +1728,11 @@ getsection(isc_buffer_t *source, dns_message_t *msg, dns_decompress_t *dctx,
>  			 */
>  			msg->tsigname->attributes |= DNS_NAMEATTR_NOCOMPRESS;
>  			free_name = false;
> +		} else if (rdtype == dns_rdatatype_dname &&
> +			   sectionid == DNS_SECTION_ANSWER &&
> +			   msg->opcode == dns_opcode_query)
> +		{
> +			msg->has_dname = 1;
>  		}
>  		rdataset = NULL;
>  
> @@ -4773,3 +4779,9 @@ dns_message_clonebuffer(dns_message_t *msg) {
>  		msg->free_query = 1;
>  	}
>  }
> +
> +bool
> +dns_message_hasdname(dns_message_t *msg) {
> +	REQUIRE(DNS_MESSAGE_VALID(msg));
> +	return msg->has_dname;
> +}
> diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c
> index 168fcc5e808..a823f5a7051 100644
> --- a/lib/dns/resolver.c
> +++ b/lib/dns/resolver.c
> @@ -758,6 +758,7 @@ typedef struct respctx {
>  	bool get_nameservers; /* get a new NS rrset at
>  			       * zone cut? */
>  	bool resend;	      /* resend this query? */
> +	bool secured;	      /* message was signed or had a valid cookie */
>  	bool nextitem;	      /* invalid response; keep
>  			       * listening for the correct one */
>  	bool truncated;	      /* response was truncated */
> @@ -7900,6 +7901,47 @@ betterreferral(respctx_t *rctx) {
>  	return (false);
>  }
>  
> +static bool
> +rctx_need_tcpretry(respctx_t *rctx) {
> +	resquery_t *query = rctx->query;
> +	if ((rctx->retryopts & DNS_FETCHOPT_TCP) != 0) {
> +		/* TCP is already in the retry flags */
> +		return false;
> +	}
> +
> +	/*
> +	 * If the message was secured, no need to continue.
> +	 */
> +	if (rctx->secured) {
> +		return false;
> +	}
> +
> +	/*
> +	 * Currently the only extra reason why we might need to
> +	 * retry a UDP response over TCP is a DNAME in the message.
> +	 */
> +	if (dns_message_hasdname(query->rmessage)) {
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +static isc_result_t
> +rctx_tcpretry(respctx_t *rctx) {
> +	/*
> +	 * Do we need to retry a UDP response over TCP?
> +	 */
> +	if (rctx_need_tcpretry(rctx)) {
> +		rctx->retryopts |= DNS_FETCHOPT_TCP;
> +		rctx->resend = true;
> +		rctx_done(rctx, ISC_R_SUCCESS);
> +		return ISC_R_COMPLETE;
> +	}
> +
> +	return ISC_R_SUCCESS;
> +}
> +
>  /*
>   * resquery_response():
>   * Handles responses received in response to iterative queries sent by
> @@ -8058,6 +8100,11 @@ resquery_response(isc_task_t *task, isc_event_t *event) {
>  		break;
>  	}
>  
> +	/*
> +	 * The dispatcher should ensure we only get responses with QR set.
> +	 */
> +	INSIST((query->rmessage->flags & DNS_MESSAGEFLAG_QR) != 0);
> +
>  	/*
>  	 * If the message is signed, check the signature.  If not, this
>  	 * returns success anyway.
> @@ -8075,9 +8122,16 @@ resquery_response(isc_task_t *task, isc_event_t *event) {
>  	}
>  
>  	/*
> -	 * The dispatcher should ensure we only get responses with QR set.
> +	 * Remember whether this message was signed or had a
> +	 * valid client cookie; if not, we may need to retry over
> +	 * TCP later.
>  	 */
> -	INSIST((query->rmessage->flags & DNS_MESSAGEFLAG_QR) != 0);
> +	if (query->rmessage->cc_ok || query->rmessage->tsig != NULL ||
> +	    query->rmessage->sig0 != NULL)
> +	{
> +		rctx.secured = true;
> +	}
> +
>  	/*
>  	 * INSIST() that the message comes from the place we sent it to,
>  	 * since the dispatch code should ensure this.
> @@ -8091,10 +8145,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) {
>  	 * This may be a misconfigured anycast server or an attempt to send
>  	 * a spoofed response.  Skip if we have a valid tsig.
>  	 */
> -	if (dns_message_gettsig(query->rmessage, NULL) == NULL &&
> -	    !query->rmessage->cc_ok && !query->rmessage->cc_bad &&
> -	    (rctx.retryopts & DNS_FETCHOPT_TCP) == 0)
> -	{
> +	if (!rctx.secured && (rctx.retryopts & DNS_FETCHOPT_TCP) == 0) {
>  		unsigned char cookie[COOKIE_BUFFER_SIZE];
>  		if (dns_adb_getcookie(fctx->adb, query->addrinfo, cookie,
>  				      sizeof(cookie)) > CLIENT_COOKIE_SIZE)
> @@ -8120,6 +8171,17 @@ resquery_response(isc_task_t *task, isc_event_t *event) {
>  		 */
>  	}
>  
> +	/*
> +	 * Check whether we need to retry over TCP for some other reason.
> +	 */
> +	result = rctx_tcpretry(&rctx);
> +	if (result == ISC_R_COMPLETE) {
> +		return;
> +	}
> +
> +	/*
> +	 * Check for EDNS issues.
> +	 */
>  	rctx_edns(&rctx);
>  
>  	/*
> @@ -8848,8 +8910,8 @@ rctx_answer_positive(respctx_t *rctx) {
>  	}
>  
>  	/*
> -	 * Cache records in the authority section, if
> -	 * there are any suitable for caching.
> +	 * Cache records in the authority section, if there are
> +	 * any suitable for caching.
>  	 */
>  	rctx_authority_positive(rctx);
>  
> @@ -9221,14 +9283,14 @@ rctx_answer_dname(respctx_t *rctx) {
>  
>  /*
>   * rctx_authority_positive():
> - * Examine the records in the authority section (if there are any) for a
> - * positive answer.  We expect the names for all rdatasets in this section
> - * to be subdomains of the domain being queried; any that are not are
> - * skipped.  We expect to find only *one* owner name; any names
> - * after the first one processed are ignored. We expect to find only
> - * rdatasets of type NS, RRSIG, or SIG; all others are ignored. Whatever
> - * remains can be cached at trust level authauthority or additional
> - * (depending on whether the AA bit was set on the answer).
> + * If a positive answer was received over TCP or secured with a cookie
> + * or TSIG, examine the authority section.  We expect names for all
> + * rdatasets in this section to be subdomains of the domain being queried;
> + * any that are not are skipped.  We expect to find only *one* owner name;
> + * any names after the first one processed are ignored. We expect to find
> + * only rdatasets of type NS; all others are ignored. Whatever remains can
> + * be cached at trust level authauthority or additional (depending on
> + * whether the AA bit was set on the answer).
>   */
>  static void
>  rctx_authority_positive(respctx_t *rctx) {
> @@ -9236,6 +9298,11 @@ rctx_authority_positive(respctx_t *rctx) {
>  	bool done = false;
>  	isc_result_t result;
>  
> +	/* If it's spoofable, don't cache it. */
> +	if (!rctx->secured && (rctx->query->options & DNS_FETCHOPT_TCP) == 0) {
> +		return;
> +	}
> +
>  	result = dns_message_firstname(rctx->query->rmessage,
>  				       DNS_SECTION_AUTHORITY);
>  	while (!done && result == ISC_R_SUCCESS) {
> -- 
> GitLab
> 

Cheers,

 -- Santiago

Attachment: signature.asc
Description: PGP signature


Reply to: