* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
#include <isc/assertions.h>
#include <isc/refcount.h>
#define DNS_RBT_USEHASH 1
* Option values for dns_rbt_findnode() and dns_rbt_findname().
* These are used to form a bitmask.
#define DNS_RBTFIND_NOOPTIONS 0x00
#define DNS_RBTFIND_EMPTYDATA 0x01
#define DNS_RBTFIND_NOEXACT 0x02
#define DNS_RBTFIND_NOPREDECESSOR 0x04
#ifndef DNS_RBT_USEISCREFCOUNT
#ifdef ISC_REFCOUNT_HAVEATOMIC
#define DNS_RBT_USEISCREFCOUNT 1
#define DNS_RBT_USEMAGIC 1
* These should add up to 30.
#define DNS_RBT_LOCKLENGTH 10
#define DNS_RBT_REFLENGTH 20
#define DNS_RBTNODE_MAGIC ISC_MAGIC('R','B','N','O')
#define DNS_RBTNODE_VALID(n) ISC_MAGIC_VALID(n, DNS_RBTNODE_MAGIC)
#define DNS_RBTNODE_VALID(n) true
* This is the structure that is used for each node in the red/black
* tree of trees. NOTE WELL: the implementation manages this as a variable
* length structure, with the actual wire-format name and other data
* appended to this structure. Allocating a contiguous block of memory for
* multiple dns_rbtnode structures will not work.
typedef struct dns_rbtnode dns_rbtnode_t;
DNS_RBT_NSEC_NORMAL=0, /* in main tree */
DNS_RBT_NSEC_HAS_NSEC=1, /* also has node in nsec tree */
DNS_RBT_NSEC_NSEC=2, /* in nsec tree */
DNS_RBT_NSEC_NSEC3=3 /* in nsec3 tree */
* The following bitfields add up to a total bitwidth of 32.
* The range of values necessary for each item is indicated,
* but in the case of "attributes" the field is wider to accommodate
* possible future expansion.
* In each case below the "range" indicated is what's _necessary_ for
* the bitfield to hold, not what it actually _can_ hold.
* Note: Tree lock must be held before modifying these
* Note: The two "unsigned int :0;" unnamed bitfields on either
* side of the bitfields below are scaffolding that border the
* set of bitfields which are accessed after acquiring the tree
* lock. Please don't insert any other bitfield members between
* the unnamed bitfields unless they should also be accessed
* after acquiring the tree lock.
unsigned int :0; /* start of bitfields c/o tree lock */
unsigned int is_root : 1; /*%< range is 0..1 */
unsigned int color : 1; /*%< range is 0..1 */
unsigned int find_callback : 1; /*%< range is 0..1 */
unsigned int attributes : 3; /*%< range is 0..2 */
unsigned int nsec : 2; /*%< range is 0..3 */
unsigned int namelen : 8; /*%< range is 1..255 */
unsigned int offsetlen : 8; /*%< range is 1..128 */
unsigned int oldnamelen : 8; /*%< range is 1..255 */
/* flags needed for serialization to file*/
unsigned int is_mmapped : 1;
unsigned int parent_is_relative : 1;
unsigned int left_is_relative : 1;
unsigned int right_is_relative : 1;
unsigned int down_is_relative : 1;
unsigned int data_is_relative : 1;
/* node needs to be cleaned from rpz */
unsigned int :0; /* end of bitfields c/o tree lock */
dns_rbtnode_t *uppernode;
* Used for LRU cache. This linked list is used to mark nodes which
* have no data any longer, but we cannot unlink at that exact moment
* because we did not or could not obtain a write lock on the tree.
ISC_LINK(dns_rbtnode_t) deadlink;
* These values are used in the RBT DB implementation. The appropriate
* node lock must be held before accessing them.
* Note: The two "unsigned int :0;" unnamed bitfields on either
* side of the bitfields below are scaffolding that border the
* set of bitfields which are accessed after acquiring the node
* lock. Please don't insert any other bitfield members between
* the unnamed bitfields unless they should also be accessed
* after acquiring the node lock.
* NOTE: Do not merge these fields into bitfields above, as
* they'll all be put in the same qword that could be accessed
* without the node lock as it shares the qword with other
* members. Leave these members here so that they occupy a
* separate region of memory.
unsigned int :0; /* start of bitfields c/o node lock */
#ifndef DNS_RBT_USEISCREFCOUNT
unsigned int references:DNS_RBT_REFLENGTH;
unsigned int :0; /* end of bitfields c/o node lock */
#ifdef DNS_RBT_USEISCREFCOUNT
isc_refcount_t references; /* note that this is not in the bitfield */
typedef isc_result_t (*dns_rbtfindcallback_t)(dns_rbtnode_t *node,
typedef isc_result_t (*dns_rbtdatawriter_t)(FILE *file,
typedef isc_result_t (*dns_rbtdatafixer_t)(dns_rbtnode_t *rbtnode,
void *base, size_t offset,
void *arg, uint64_t *crc);
typedef void (*dns_rbtdeleter_t)(void *, void *);
* A chain is used to keep track of the sequence of nodes to reach any given
* node from the root of the tree. Originally nodes did not have parent
* pointers in them (for memory usage reasons) so there was no way to find
* the path back to the root from any given node. Now that nodes have parent
* pointers, chains might be going away in a future release, though the
* movement functionality would remain.
* Chains may be used to iterate over a tree of trees. After setting up the
* chain's structure using dns_rbtnodechain_init(), it needs to be initialized
* to point to the lexically first or lexically last node in the tree of trees
* using dns_rbtnodechain_first() or dns_rbtnodechain_last(), respectively.
* Calling dns_rbtnodechain_next() or dns_rbtnodechain_prev() then moves the
* chain over to the next or previous node, respectively.
* In any event, parent information, whether via parent pointers or chains, is
* necessary information for iterating through the tree or for basic internal
* tree maintenance issues (ie, the rotations that are done to rebalance the
* tree when a node is added). The obvious implication of this is that for a
* chain to remain valid, the tree has to be locked down against writes for the
* duration of the useful life of the chain, because additions or removals can
* change the path from the root to the node the chain has targeted.
* The dns_rbtnodechain_ functions _first, _last, _prev and _next all take
* dns_name_t parameters for the name and the origin, which can be NULL. If
* non-NULL, 'name' will end up pointing to the name data and offsets that are
* stored at the node (and thus it will be read-only), so it should be a
* regular dns_name_t that has been initialized with dns_name_init. When
* 'origin' is non-NULL, it will get the name of the origin stored in it, so it
* needs to have its own buffer space and offsets, which is most easily
* accomplished with a dns_fixedname_t. It is _not_ necessary to reinitialize
* either 'name' or 'origin' between calls to the chain functions.
* NOTE WELL: even though the name data at the root of the tree of trees will
* be absolute (typically just "."), it will will be made into a relative name
* with an origin of "." -- an empty name when the node is ".". This is
* because a common on operation on 'name' and 'origin' is to use
* dns_name_concatenate() on them to generate the complete name. An empty name
* can be detected when dns_name_countlabels == 0, and is printed by
* dns_name_totext()/dns_name_format() as "@", consistent with RFC1035's
* definition of "@" as the current origin.
* dns_rbtnodechain_current is similar to the _first, _last, _prev and _next
* functions but additionally can provide the node to which the chain points.
* The number of level blocks to allocate at a time. Currently the maximum
* number of levels is allocated directly in the structure, but future
* revisions of this code might have a static initial block with dynamic
* growth. Allocating space for 256 levels when the tree is almost never that
* deep is wasteful, but it's not clear that it matters, since the waste is
* only 2MB for 1000 concurrently active chains on a system with 64-bit
#define DNS_RBT_LEVELBLOCK 254
typedef struct dns_rbtnodechain {
* The terminal node of the chain. It is not in levels[].
* This is ostensibly private ... but in a pinch it could be
* used tell that the chain points nowhere without needing to
* call dns_rbtnodechain_current().
* The maximum number of labels in a name is 128; bitstrings mean
* a conceptually very large number (which I have not bothered to
* compute) of logical levels because splitting can potentially occur
* at each bit. However, DNSSEC restricts the number of "logical"
* labels in a name to 255, meaning only 254 pointers are needed
dns_rbtnode_t * levels[DNS_RBT_LEVELBLOCK];
* level_count indicates how deep the chain points into the
* tree of trees, and is the index into the levels[] array.
* Thus, levels[level_count - 1] is the last level node stored.
* A chain that points to the top level of the tree of trees has
* a level_count of 0, the first level has a level_count of 1, and
unsigned int level_count;
* level_matches tells how many levels matched above the node
* returned by dns_rbt_findnode(). A match (partial or exact) found
* in the first level thus results in level_matches being set to 1.
* This is used by the rbtdb to set the start point for a recursive
* search of superdomains until the RR it is looking for is found.
unsigned int level_matches;
dns_rbt_create(isc_mem_t *mctx, dns_rbtdeleter_t deleter,
void *deleter_arg, dns_rbt_t **rbtp);
* Initialize a red-black tree of trees.
*\li The deleter argument, if non-null, points to a function that is
* responsible for cleaning up any memory associated with the data
* pointer of a node when the node is deleted. It is passed the
* deleted node's data pointer as its first argument and deleter_arg
* as its second argument.
* \li mctx is a pointer to a valid memory context.
*\li rbtp != NULL && *rbtp == NULL
*\li arg == NULL iff deleter == NULL
*\li If result is ISC_R_SUCCESS:
* *rbtp points to a valid red-black tree manager
*\li If result is failure:
* *rbtp does not point to a valid red-black tree manager.
*\li #ISC_R_SUCCESS Success
*\li #ISC_R_NOMEMORY Resource limit: Out of Memory
dns_rbt_addname(dns_rbt_t *rbt, dns_name_t *name, void *data);
* Add 'name' to the tree of trees, associated with 'data'.
*\li 'data' is never required to be non-NULL, but specifying it
* when the name is added is faster than searching for 'name'
* again and then setting the data pointer. The lack of a data pointer
* for a node also has other ramifications regarding whether
* dns_rbt_findname considers a node to exist, or dns_rbt_deletename
*\li rbt is a valid rbt manager.
*\li dns_name_isabsolute(name) == TRUE
*\li 'name' is not altered in any way.
*\li Any external references to nodes in the tree are unaffected by
* node splits that are necessary to insert the new name.
*\li If result is #ISC_R_SUCCESS:
* 'name' is findable in the red/black tree of trees in O(log N).
* The data pointer of the node for 'name' is set to 'data'.
*\li If result is #ISC_R_EXISTS or #ISC_R_NOSPACE:
* The tree of trees is unaltered.
*\li If result is #ISC_R_NOMEMORY:
*\li #ISC_R_SUCCESS Success
*\li #ISC_R_EXISTS The name already exists with associated data.
*\li #ISC_R_NOSPACE The name had more logical labels than are allowed.
*\li #ISC_R_NOMEMORY Resource Limit: Out of Memory
dns_rbt_addnode(dns_rbt_t *rbt, dns_name_t *name, dns_rbtnode_t **nodep);
* Just like dns_rbt_addname, but returns the address of the node.
*\li rbt is a valid rbt structure.
*\li dns_name_isabsolute(name) == TRUE
*\li nodep != NULL && *nodep == NULL
*\li 'name' is not altered in any way.
*\li Any external references to nodes in the tree are unaffected by
* node splits that are necessary to insert the new name.
*\li If result is ISC_R_SUCCESS:
* 'name' is findable in the red/black tree of trees in O(log N).
* *nodep is the node that was added for 'name'.
*\li If result is ISC_R_EXISTS:
* The tree of trees is unaltered.
* *nodep is the existing node for 'name'.
*\li If result is ISC_R_NOMEMORY:
*\li #ISC_R_SUCCESS Success
*\li #ISC_R_EXISTS The name already exists, possibly without data.
*\li #ISC_R_NOMEMORY Resource Limit: Out of Memory
dns_rbt_findname(dns_rbt_t *rbt, const dns_name_t *name, unsigned int options,
dns_name_t *foundname, void **data);
* Get the data pointer associated with 'name'.
*\li When #DNS_RBTFIND_NOEXACT is set, the closest matching superdomain is
* returned (also subject to #DNS_RBTFIND_EMPTYDATA), even when there is
* an exact match in the tree.
*\li A node that has no data is considered not to exist for this function,
* unless the #DNS_RBTFIND_EMPTYDATA option is set.
*\li rbt is a valid rbt manager.
*\li dns_name_isabsolute(name) == TRUE
*\li data != NULL && *data == NULL
*\li 'name' and the tree are not altered in any way.
*\li If result is ISC_R_SUCCESS:
* *data is the data associated with 'name'.
*\li If result is DNS_R_PARTIALMATCH:
* *data is the data associated with the deepest superdomain
* of 'name' which has data.
*\li If result is ISC_R_NOTFOUND:
* Neither the name nor a superdomain was found with data.
*\li #ISC_R_SUCCESS Success
*\li #DNS_R_PARTIALMATCH Superdomain found with data
*\li #ISC_R_NOTFOUND No match
*\li #ISC_R_NOSPACE Concatenating nodes to form foundname failed
dns_rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname,
dns_rbtnode_t **node, dns_rbtnodechain_t *chain,
unsigned int options, dns_rbtfindcallback_t callback,
* Find the node for 'name'.
*\li A node that has no data is considered not to exist for this function,
* unless the DNS_RBTFIND_EMPTYDATA option is set. This applies to both
* exact matches and partial matches.
*\li If the chain parameter is non-NULL, then the path through the tree
* to the DNSSEC predecessor of the searched for name is maintained,
* unless the DNS_RBTFIND_NOPREDECESSOR or DNS_RBTFIND_NOEXACT option
* is used. (For more details on those options, see below.)
*\li If there is no predecessor, then the chain will point to nowhere, as
* indicated by chain->end being NULL or dns_rbtnodechain_current
* returning ISC_R_NOTFOUND. Note that in a normal Internet DNS RBT
* there will always be a predecessor for all names except the root
* name, because '.' will exist and '.' is the predecessor of
* everything. But you can certainly construct a trivial tree and a
* search for it that has no predecessor.
*\li Within the chain structure, the 'levels' member of the structure holds
* the root node of each level except the first.
*\li The 'level_count' of the chain indicates how deep the chain to the
* predecessor name is, as an index into the 'levels[]' array. It does
* not count name elements, per se, but only levels of the tree of trees,
* the distinction arising because multiple labels from a name can be
* stored on only one level. It is also does not include the level
* that has the node, since that level is not stored in levels[].
*\li The chain's 'level_matches' is not directly related to the predecessor.
* It is the number of levels above the level of the found 'node',
* regardless of whether it was a partial match or exact match. When
* the node is found in the top level tree, or no node is found at all,
*\li When DNS_RBTFIND_NOEXACT is set, the closest matching superdomain is
* returned (also subject to DNS_RBTFIND_EMPTYDATA), even when
* there is an exact match in the tree. In this case, the chain
* will not point to the DNSSEC predecessor, but will instead point
* to the exact match, if there was any. Thus the preceding paragraphs
* should have "exact match" substituted for "predecessor" to describe
* how the various elements of the chain are set. This was done to
* ensure that the chain's state was sane, and to prevent problems that
* occurred when running the predecessor location code under conditions
* it was not designed for. It is not clear *where* the chain should
* point when DNS_RBTFIND_NOEXACT is set, so if you end up using a chain
* with this option because you want a particular node, let us know
* where you want the chain pointed, so this can be made more firm.
*\li rbt is a valid rbt manager.
*\li dns_name_isabsolute(name) == TRUE.
*\li node != NULL && *node == NULL.
*\li #DNS_RBTFIND_NOEXACT and DNS_RBTFIND_NOPREDECESSOR are mutually
*\li 'name' and the tree are not altered in any way.