From 13cec3831f51a7fca63a549c46b78566c89eeb3d Mon Sep 17 00:00:00 2001 From: Sam James Date: Fri, 17 Nov 2023 12:20:05 +0000 Subject: net-dns/c-ares: backport /etc/hosts lookup performance fixes With particular thanks to Daniel and Brad for chipping in on the bug and looking into it quickly. Closes: https://bugs.gentoo.org/917400 Signed-off-by: Sam James --- net-dns/c-ares/c-ares-1.22.0-r1.ebuild | 109 ++++++ .../files/c-ares-1.22.0-hosts-lookup-perf.patch | 403 +++++++++++++++++++++ .../c-ares/files/c-ares-1.22.0-hosts-lookup.patch | 109 ++++++ 3 files changed, 621 insertions(+) create mode 100644 net-dns/c-ares/c-ares-1.22.0-r1.ebuild create mode 100644 net-dns/c-ares/files/c-ares-1.22.0-hosts-lookup-perf.patch create mode 100644 net-dns/c-ares/files/c-ares-1.22.0-hosts-lookup.patch (limited to 'net-dns/c-ares') diff --git a/net-dns/c-ares/c-ares-1.22.0-r1.ebuild b/net-dns/c-ares/c-ares-1.22.0-r1.ebuild new file mode 100644 index 000000000000..5356eceefd2a --- /dev/null +++ b/net-dns/c-ares/c-ares-1.22.0-r1.ebuild @@ -0,0 +1,109 @@ +# Copyright 1999-2023 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +VERIFY_SIG_OPENPGP_KEY_PATH=/usr/share/openpgp-keys/danielstenberg.asc +inherit edo multilib-minimal verify-sig + +DESCRIPTION="C library that resolves names asynchronously" +HOMEPAGE="https://c-ares.org/" +SRC_URI=" + https://c-ares.org/download/${P}.tar.gz + verify-sig? ( https://c-ares.org/download/${P}.tar.gz.asc ) +" + +# ISC for lib/{bitncmp.c,inet_ntop.c,inet_net_pton.c} (bug #912405) +LICENSE="MIT ISC" +# Subslot = SONAME of libcares.so.2 +SLOT="0/2" +KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~loong ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86 ~amd64-linux ~x86-linux ~arm64-macos ~ppc-macos ~x64-macos ~x64-solaris" +IUSE="static-libs test" +RESTRICT="!test? ( test )" + +BDEPEND="verify-sig? ( sec-keys/openpgp-keys-danielstenberg )" + +DOCS=( AUTHORS CHANGES NEWS README.md RELEASE-NOTES TODO ) + +PATCHES=( + "${FILESDIR}"/${P}-hosts-lookup.patch + "${FILESDIR}"/${P}-hosts-lookup-perf.patch +) + +MULTILIB_WRAPPED_HEADERS=( + /usr/include/ares_build.h +) + +QA_CONFIG_IMPL_DECL_SKIP=( + # Checking for obsolete headers + malloc + calloc + free + + # Non-existent on Linux + closesocket + CloseSocket + ioctlsocket + bitncmp +) + +multilib_src_configure() { + # These configure tests take a long time to run. The args one runs in O(n^4). + export ac_cv_func_getnameinfo='yes' + export ac_cv_func_recvfrom='yes' + export ac_cv_func_recv='yes' + export ac_cv_func_send='yes' + export curl_cv_func_getnameinfo_args='const struct sockaddr *,socklen_t,size_t,int' + export curl_cv_func_recv_args='int,void *,size_t,int,int' + export curl_cv_func_recvfrom_args='int,void *,size_t,int,struct sockaddr *,socklen_t *,int' + export curl_cv_func_send_args='int,const void *,size_t,int,int' + export curl_cv_getnameinfo='yes' + export curl_cv_recv='yes' + export curl_cv_recvfrom='yes' + export curl_cv_send='yes' + + local myeconfargs=( + --cache-file="${BUILD_DIR}"/config.cache + + --enable-nonblocking + --enable-symbol-hiding + $(use_enable static-libs static) + $(use_enable test tests) + ) + + # Needed for running unit tests only + # Violates sandbox and tests pass fine without + export ax_cv_uts_namespace=no + export ax_cv_user_namespace=no + ECONF_SOURCE="${S}" econf "${myeconfargs[@]}" +} + +multilib_src_test() { + cd "${BUILD_DIR}"/test || die + + # We're skipping the "real" network tests with the filter + # see https://github.com/c-ares/c-ares/tree/main/test + local network_tests=( + # Most live tests have Live in the name + *Live* + # These don't but are still in ares-test-live.cc => live + *GetTCPSock* + *TimeoutValue* + *GetSock* + *GetSock_virtualized* + *VerifySocketFunctionCallback* + # Seems flaky, even run manually + # https://github.com/c-ares/c-ares/commit/9e542a8839f81c990bb0dff14beeaf9aa6bcc18d + *MockUDPMaxQueriesTest.GetHostByNameParallelLookups* + ) + + # The format for disabling test1, test2, and test3 looks like: + # -test1:test2:test3 + edo ./arestest --gtest_filter=-$(echo $(IFS=:; echo "${network_tests[*]}")) +} + +multilib_src_install_all() { + einstalldocs + + find "${ED}" -name "*.la" -delete || die +} diff --git a/net-dns/c-ares/files/c-ares-1.22.0-hosts-lookup-perf.patch b/net-dns/c-ares/files/c-ares-1.22.0-hosts-lookup-perf.patch new file mode 100644 index 000000000000..4830c9cb97e9 --- /dev/null +++ b/net-dns/c-ares/files/c-ares-1.22.0-hosts-lookup-perf.patch @@ -0,0 +1,403 @@ +https://github.com/c-ares/c-ares/commit/2724f0e26c8875ce194d68dc15840e9435a1c805 + +From 2724f0e26c8875ce194d68dc15840e9435a1c805 Mon Sep 17 00:00:00 2001 +From: Brad House +Date: Thu, 16 Nov 2023 15:20:48 -0500 +Subject: [PATCH] optimize: large /etc/hosts files reading + +profiling found some hot paths that could be optimized to reduce +insert times into the cache. + +Fix By: Brad House (@bradh352) +--- a/src/lib/ares__hosts_file.c ++++ b/src/lib/ares__hosts_file.c +@@ -313,81 +313,51 @@ static ares_hosts_file_t *ares__hosts_file_create(const char *filename) + return NULL; + } + +-static ares_bool_t ares__hosts_entry_ipaddr_exists(ares_hosts_entry_t *entry, +- const char *ipaddr) +-{ +- ares__llist_node_t *node; +- +- for (node = ares__llist_node_first(entry->ips); node != NULL; +- node = ares__llist_node_next(node)) { +- const char *myaddr = ares__llist_node_val(node); +- if (strcmp(myaddr, ipaddr) == 0) { +- return ARES_TRUE; +- } +- } +- +- return ARES_FALSE; +-} +- +-static ares_bool_t ares__hosts_entry_host_exists(ares_hosts_entry_t *entry, +- const char *host) +-{ +- ares__llist_node_t *node; +- +- for (node = ares__llist_node_first(entry->ips); node != NULL; +- node = ares__llist_node_next(node)) { +- const char *myhost = ares__llist_node_val(node); +- if (strcasecmp(myhost, host) == 0) { +- return ARES_TRUE; +- } +- } +- +- return ARES_FALSE; +-} ++typedef enum { ++ ARES_MATCH_NONE = 0, ++ ARES_MATCH_IPADDR = 1, ++ ARES_MATCH_HOST = 2 ++} ares_hosts_file_match_t; + +-static ares_status_t ares__hosts_file_merge_entry(ares_hosts_entry_t *existing, +- ares_hosts_entry_t *entry) ++static ares_status_t ares__hosts_file_merge_entry(ares_hosts_file_t *hf, ++ ares_hosts_entry_t *existing, ++ ares_hosts_entry_t *entry, ++ ares_hosts_file_match_t matchtype) + { + ares__llist_node_t *node; + +- while ((node = ares__llist_node_first(entry->ips)) != NULL) { +- char *ipaddr = ares__llist_node_claim(node); ++ /* If we matched on IP address, we know there can only be 1, so there's no ++ * reason to do anything */ ++ if (matchtype != ARES_MATCH_IPADDR) { ++ while ((node = ares__llist_node_first(entry->ips)) != NULL) { ++ const char *ipaddr = ares__llist_node_val(node); + +- if (ares__hosts_entry_ipaddr_exists(existing, ipaddr)) { +- ares_free(ipaddr); +- continue; +- } ++ if (ares__htable_strvp_get_direct(hf->iphash, ipaddr) != NULL) { ++ ares__llist_node_destroy(node); ++ continue; ++ } + +- if (ares__llist_insert_last(existing->ips, ipaddr) == NULL) { +- ares_free(ipaddr); +- return ARES_ENOMEM; ++ ares__llist_node_move_parent_last(node, existing->ips); + } + } + + + while ((node = ares__llist_node_first(entry->hosts)) != NULL) { +- char *hostname = ares__llist_node_claim(node); ++ const char *hostname = ares__llist_node_val(node); + +- if (ares__hosts_entry_host_exists(existing, hostname)) { +- ares_free(hostname); ++ if (ares__htable_strvp_get_direct(hf->hosthash, hostname) != NULL) { ++ ares__llist_node_destroy(node); + continue; + } + +- if (ares__llist_insert_last(existing->hosts, hostname) == NULL) { +- ares_free(hostname); +- return ARES_ENOMEM; +- } ++ ares__llist_node_move_parent_last(node, existing->hosts); + } + + ares__hosts_entry_destroy(entry); + return ARES_SUCCESS; + } + +-typedef enum { +- ARES_MATCH_NONE = 0, +- ARES_MATCH_IPADDR = 1, +- ARES_MATCH_HOST = 2 +-} ares_hosts_file_match_t; ++ + + static ares_hosts_file_match_t + ares__hosts_file_match(const ares_hosts_file_t *hf, ares_hosts_entry_t *entry, +@@ -435,7 +405,7 @@ static ares_status_t ares__hosts_file_add(ares_hosts_file_t *hosts, + matchtype = ares__hosts_file_match(hosts, entry, &match); + + if (matchtype != ARES_MATCH_NONE) { +- status = ares__hosts_file_merge_entry(match, entry); ++ status = ares__hosts_file_merge_entry(hosts, match, entry, matchtype); + if (status != ARES_SUCCESS) { + ares__hosts_entry_destroy(entry); + return status; +@@ -481,6 +451,22 @@ static ares_status_t ares__hosts_file_add(ares_hosts_file_t *hosts, + return ARES_SUCCESS; + } + ++static ares_bool_t ares__hosts_entry_isdup(ares_hosts_entry_t *entry, ++ const char *host) ++{ ++ ares__llist_node_t *node; ++ ++ for (node = ares__llist_node_first(entry->ips); node != NULL; ++ node = ares__llist_node_next(node)) { ++ const char *myhost = ares__llist_node_val(node); ++ if (strcasecmp(myhost, host) == 0) { ++ return ARES_TRUE; ++ } ++ } ++ ++ return ARES_FALSE; ++} ++ + static ares_status_t ares__parse_hosts_hostnames(ares__buf_t *buf, + ares_hosts_entry_t *entry) + { +@@ -531,7 +517,7 @@ static ares_status_t ares__parse_hosts_hostnames(ares__buf_t *buf, + } + + /* Don't add a duplicate to the same entry */ +- if (ares__hosts_entry_host_exists(entry, hostname)) { ++ if (ares__hosts_entry_isdup(entry, hostname)) { + continue; + } + +--- a/src/lib/ares__htable.c ++++ b/src/lib/ares__htable.c +@@ -66,7 +66,7 @@ static unsigned int ares__htable_generate_seed(ares__htable_t *htable) + + static void ares__htable_buckets_destroy(ares__llist_t **buckets, + unsigned int size, +- unsigned char destroy_vals) ++ ares_bool_t destroy_vals) + { + unsigned int i; + +@@ -94,7 +94,7 @@ void ares__htable_destroy(ares__htable_t *htable) + if (htable == NULL) { + return; + } +- ares__htable_buckets_destroy(htable->buckets, htable->size, 1); ++ ares__htable_buckets_destroy(htable->buckets, htable->size, ARES_TRUE); + ares_free(htable); + } + +@@ -180,11 +180,40 @@ static ares_bool_t ares__htable_expand(ares__htable_t *htable) + + for (i = 0; i < old_size; i++) { + ares__llist_node_t *node; +- for (node = ares__llist_node_first(htable->buckets[i]); node != NULL; +- node = ares__llist_node_next(node)) { ++ ++ /* Nothing in this bucket */ ++ if (htable->buckets[i] == NULL) ++ continue; ++ ++ /* Fast past optimization (most likely case), there is likely only a single ++ * entry in both the source and destination, check for this to confirm and ++ * if so, just move the bucket over */ ++ if (ares__llist_len(htable->buckets[i]) == 1) { ++ void *val = ares__llist_first_val(htable->buckets[i]); ++ size_t idx = HASH_IDX(htable, htable->bucket_key(val)); ++ ++ if (buckets[idx] == NULL) { ++ /* Swap! */ ++ buckets[idx] = htable->buckets[i]; ++ htable->buckets[i] = NULL; ++ continue; ++ } ++ } ++ ++ /* Slow path, collisions */ ++ while ((node = ares__llist_node_first(htable->buckets[i])) != NULL) { + void *val = ares__llist_node_val(node); + size_t idx = HASH_IDX(htable, htable->bucket_key(val)); + ++ /* Try fast path again as maybe we popped one collision off and the ++ * next we can reuse the llist parent */ ++ if (buckets[idx] == NULL && ares__llist_len(htable->buckets[i]) == 1) { ++ /* Swap! */ ++ buckets[idx] = htable->buckets[i]; ++ htable->buckets[i] = NULL; ++ break; ++ } ++ + if (buckets[idx] == NULL) { + buckets[idx] = ares__llist_create(htable->bucket_free); + } +@@ -192,19 +221,17 @@ static ares_bool_t ares__htable_expand(ares__htable_t *htable) + goto fail; + } + +- if (ares__llist_insert_first(buckets[idx], val) == NULL) { +- goto fail; +- } ++ ares__llist_node_move_parent_first(node, buckets[idx]); + } + } + + /* Swap out buckets */ +- ares__htable_buckets_destroy(htable->buckets, old_size, 0); ++ ares__htable_buckets_destroy(htable->buckets, old_size, ARES_FALSE); + htable->buckets = buckets; + return ARES_TRUE; + + fail: +- ares__htable_buckets_destroy(buckets, htable->size, 0); ++ ares__htable_buckets_destroy(buckets, htable->size, ARES_FALSE); + htable->size = old_size; + + return ARES_FALSE; +--- a/src/lib/ares__llist.c ++++ b/src/lib/ares__llist.c +@@ -71,24 +71,14 @@ typedef enum { + ARES__LLIST_INSERT_BEFORE + } ares__llist_insert_type_t; + +-static ares__llist_node_t *ares__llist_insert_at(ares__llist_t *list, +- ares__llist_insert_type_t type, +- ares__llist_node_t *at, +- void *val) ++static void ares__llist_attach_at(ares__llist_t *list, ++ ares__llist_insert_type_t type, ++ ares__llist_node_t *at, ++ ares__llist_node_t *node) + { +- ares__llist_node_t *node = NULL; +- +- if (list == NULL || val == NULL) { +- return NULL; +- } +- +- node = ares_malloc_zero(sizeof(*node)); +- +- if (node == NULL) { +- return NULL; +- } ++ if (list == NULL || node == NULL) ++ return; + +- node->data = val; + node->parent = list; + + if (type == ARES__LLIST_INSERT_BEFORE && (at == list->head || at == NULL)) { +@@ -126,6 +116,27 @@ static ares__llist_node_t *ares__llist_insert_at(ares__llist_t *list, + } + + list->cnt++; ++} ++ ++static ares__llist_node_t *ares__llist_insert_at(ares__llist_t *list, ++ ares__llist_insert_type_t type, ++ ares__llist_node_t *at, ++ void *val) ++{ ++ ares__llist_node_t *node = NULL; ++ ++ if (list == NULL || val == NULL) { ++ return NULL; ++ } ++ ++ node = ares_malloc_zero(sizeof(*node)); ++ ++ if (node == NULL) { ++ return NULL; ++ } ++ ++ node->data = val; ++ ares__llist_attach_at(list, type, at, node); + + return node; + } +@@ -233,17 +244,14 @@ void *ares__llist_last_val(ares__llist_t *list) + return ares__llist_node_val(ares__llist_node_last(list)); + } + +-void *ares__llist_node_claim(ares__llist_node_t *node) ++static void ares__llist_node_detach(ares__llist_node_t *node) + { +- void *val; + ares__llist_t *list; + +- if (node == NULL) { +- return NULL; +- } ++ if (node == NULL) ++ return; + + list = node->parent; +- val = node->data; + + if (node->prev) { + node->prev->next = node->next; +@@ -260,9 +268,22 @@ void *ares__llist_node_claim(ares__llist_node_t *node) + if (node == list->tail) { + list->tail = node->prev; + } +- ares_free(node); + ++ node->parent = NULL; + list->cnt--; ++} ++ ++void *ares__llist_node_claim(ares__llist_node_t *node) ++{ ++ void *val; ++ ++ if (node == NULL) { ++ return NULL; ++ } ++ ++ val = node->data; ++ ares__llist_node_detach(node); ++ ares_free(node); + + return val; + } +@@ -313,3 +334,23 @@ void ares__llist_destroy(ares__llist_t *list) + } + ares_free(list); + } ++ ++void ares__llist_node_move_parent_last(ares__llist_node_t *node, ++ ares__llist_t *new_parent) ++{ ++ if (node == NULL || new_parent == NULL) ++ return; ++ ++ ares__llist_node_detach(node); ++ ares__llist_attach_at(new_parent, ARES__LLIST_INSERT_TAIL, NULL, node); ++} ++ ++void ares__llist_node_move_parent_first(ares__llist_node_t *node, ++ ares__llist_t *new_parent) ++{ ++ if (node == NULL || new_parent == NULL) ++ return; ++ ++ ares__llist_node_detach(node); ++ ares__llist_attach_at(new_parent, ARES__LLIST_INSERT_HEAD, NULL, node); ++} +--- a/src/lib/ares__llist.h ++++ b/src/lib/ares__llist.h +@@ -198,6 +198,23 @@ void ares__llist_node_destroy(ares__llist_node_t *node); + */ + void ares__llist_destroy(ares__llist_t *list); + ++/*! Detach node from the current list and re-attach it to the new list as the ++ * last entry. ++ * ++ * \param[in] node node to move ++ * \param[in] parent new list ++ */ ++void ares__llist_node_move_parent_last(ares__llist_node_t *node, ++ ares__llist_t *new_parent); ++ ++/*! Detach node from the current list and re-attach it to the new list as the ++ * first entry. ++ * ++ * \param[in] node node to move ++ * \param[in] parent new list ++ */ ++void ares__llist_node_move_parent_first(ares__llist_node_t *node, ++ ares__llist_t *new_parent); + /*! @} */ + + #endif /* __ARES__LLIST_H */ + diff --git a/net-dns/c-ares/files/c-ares-1.22.0-hosts-lookup.patch b/net-dns/c-ares/files/c-ares-1.22.0-hosts-lookup.patch new file mode 100644 index 000000000000..c0e76ca7e83c --- /dev/null +++ b/net-dns/c-ares/files/c-ares-1.22.0-hosts-lookup.patch @@ -0,0 +1,109 @@ +https://github.com/c-ares/c-ares/commit/a3631763ca30804c3095d99474e05625f0c9b0fa + +From a3631763ca30804c3095d99474e05625f0c9b0fa Mon Sep 17 00:00:00 2001 +From: Brad House +Date: Thu, 16 Nov 2023 12:19:39 -0500 +Subject: [PATCH] Fix /etc/hosts processing performance with all entries using + same IP address + +Some users use blacklist files like https://github.com/StevenBlack/hosts which +can contain 200k+ host entries all pointing to 0.0.0.0. Due to the merge +logic in the new hosts processor, all those entries will be associated as +aliases for the same ip address. + +The first issue is that it attempts to check the status of all the hosts for +the merged entry, when it should only be checking the new hosts added to the +merged entry, so this caused exponential time as the entries got longer. + +The next issue is if searching for one of those hosts, it would append all +the matches as cnames/aliases, but there is zero use for 200k aliases +being appended to a lookup, so we are artificially capping this to 100. + +Bug report reference: https://bugs.gentoo.org/917400 + +Fix By: Brad House (@bradh352) +--- a/src/lib/ares__hosts_file.c ++++ b/src/lib/ares__hosts_file.c +@@ -422,9 +422,15 @@ static ares_status_t ares__hosts_file_add(ares_hosts_file_t *hosts, + ares_hosts_entry_t *entry) + { + ares_hosts_entry_t *match = NULL; +- ares_status_t status = ARES_SUCCESS; ++ ares_status_t status = ARES_SUCCESS; + ares__llist_node_t *node; + ares_hosts_file_match_t matchtype; ++ size_t num_hostnames; ++ ++ /* Record the number of hostnames in this entry file. If we merge into an ++ * existing record, these will be *appended* to the entry, so we'll count ++ * backwards when adding to the hosts hashtable */ ++ num_hostnames = ares__llist_len(entry->hosts); + + matchtype = ares__hosts_file_match(hosts, entry, &match); + +@@ -450,10 +456,17 @@ static ares_status_t ares__hosts_file_add(ares_hosts_file_t *hosts, + } + } + +- for (node = ares__llist_node_first(entry->hosts); node != NULL; +- node = ares__llist_node_next(node)) { ++ /* Go backwards, on a merge, hostnames are appended. Breakout once we've ++ * consumed all the hosts that we appended */ ++ for (node = ares__llist_node_last(entry->hosts); node != NULL; ++ node = ares__llist_node_prev(node)) { + const char *val = ares__llist_node_val(node); + ++ if (num_hostnames == 0) ++ break; ++ ++ num_hostnames--; ++ + /* first hostname match wins. If we detect a duplicate hostname for another + * ip it will automatically be added to the same entry */ + if (ares__htable_strvp_get(hosts->hosthash, val, NULL)) { +@@ -950,6 +963,12 @@ ares_status_t ares__hosts_entry_to_hostent(const ares_hosts_entry_t *entry, + + /* Copy aliases */ + naliases = ares__llist_len(entry->hosts) - 1; ++ ++ /* Cap at 100, some people use https://github.com/StevenBlack/hosts and we ++ * don't need 200k+ aliases */ ++ if (naliases > 100) ++ naliases = 100; ++ + (*hostent)->h_aliases = + ares_malloc_zero((naliases + 1) * sizeof(*(*hostent)->h_aliases)); + if ((*hostent)->h_aliases == NULL) { +@@ -968,6 +987,10 @@ ares_status_t ares__hosts_entry_to_hostent(const ares_hosts_entry_t *entry, + goto fail; + } + idx++; ++ ++ /* Break out if artificially capped */ ++ if (idx == naliases) ++ break; + node = ares__llist_node_next(node); + } + +@@ -988,6 +1011,7 @@ static ares_status_t + const char *primaryhost; + ares__llist_node_t *node; + ares_status_t status; ++ size_t cnt = 0; + + node = ares__llist_node_first(entry->hosts); + primaryhost = ares__llist_node_val(node); +@@ -997,6 +1021,12 @@ static ares_status_t + while (node != NULL) { + const char *host = ares__llist_node_val(node); + ++ /* Cap at 100 entries. , some people use https://github.com/StevenBlack/hosts ++ * and we don't need 200k+ aliases */ ++ cnt++; ++ if (cnt > 100) ++ break; ++ + cname = ares__append_addrinfo_cname(&cnames); + if (cname == NULL) { + status = ARES_ENOMEM; + -- cgit v1.2.3-65-gdbad