278 lines
10 KiB
Diff
278 lines
10 KiB
Diff
From c3c931f5608da23a196b216edfcc2af5b626fc9a Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Sa=C3=BAl=20Ibarra=20Corretg=C3=A9?= <s@saghul.net>
|
|
Date: Mon, 4 Aug 2025 10:27:07 +0200
|
|
Subject: [PATCH] Refactor channel destruction logic
|
|
|
|
- Use ares_queue_wait_empty to wait for queries to be complete before
|
|
destruction
|
|
- Make sure NO queries are cancelled as side effects on __del__
|
|
- Start the destruction thread early, as soon as a channel is created
|
|
|
|
Cancelling pending queries while in a query callback seemingly causes
|
|
heap corruption or double-free bugs, so delay the operation until no
|
|
Python code if using the channel anymore, that is, the destructor
|
|
thread.
|
|
|
|
Fixes: https://github.com/aio-libs/aiodns/issues/175
|
|
Fixes: https://github.com/saghul/pycares/issues/248
|
|
---
|
|
docs/channel.rst | 13 ------
|
|
src/_cffi_src/build_cares.py | 78 +++++++++++++++++++++++-------------
|
|
src/pycares/__init__.py | 61 ++++++++++++----------------
|
|
3 files changed, 76 insertions(+), 76 deletions(-)
|
|
|
|
Index: pycares-4.9.0/docs/channel.rst
|
|
===================================================================
|
|
--- pycares-4.9.0.orig/docs/channel.rst
|
|
+++ pycares-4.9.0/docs/channel.rst
|
|
@@ -77,25 +77,6 @@
|
|
While channels will attempt automatic cleanup during garbage collection, explicit
|
|
closing is safer as it gives you control over when resources are released.
|
|
|
|
- .. warning::
|
|
- The channel destruction mechanism has a limited throughput of 60 channels per minute
|
|
- (one channel per second) to ensure thread safety and prevent use-after-free errors
|
|
- in c-ares. This means:
|
|
-
|
|
- - Avoid creating transient channels for individual queries
|
|
- - Reuse channel instances whenever possible
|
|
- - For applications with high query volume, use a single long-lived channel
|
|
- - If you must create multiple channels, consider pooling them
|
|
-
|
|
- Creating and destroying channels rapidly will result in a backlog as the destruction
|
|
- queue processes channels sequentially with a 1-second delay between each.
|
|
-
|
|
- The Channel class supports the context manager protocol for automatic cleanup::
|
|
-
|
|
- with pycares.Channel() as channel:
|
|
- channel.query('example.com', pycares.QUERY_TYPE_A, callback)
|
|
- # Channel is automatically closed when exiting the context
|
|
-
|
|
.. py:method:: getaddrinfo(host, port, callback, family=0, type=0, proto=0, flags=0)
|
|
|
|
:param string host: Hostname to resolve.
|
|
Index: pycares-4.9.0/src/_cffi_src/build_cares.py
|
|
===================================================================
|
|
--- pycares-4.9.0.orig/src/_cffi_src/build_cares.py
|
|
+++ pycares-4.9.0/src/_cffi_src/build_cares.py
|
|
@@ -90,34 +90,6 @@ struct sockaddr_in6 {
|
|
typedef int... ares_socket_t;
|
|
typedef int... ares_socklen_t;
|
|
|
|
-#define ARES_SUCCESS ...
|
|
-
|
|
-#define ARES_ENODATA ...
|
|
-#define ARES_EFORMERR ...
|
|
-#define ARES_ESERVFAIL ...
|
|
-#define ARES_ENOTFOUND ...
|
|
-#define ARES_ENOTIMP ...
|
|
-#define ARES_EREFUSED ...
|
|
-#define ARES_EBADQUERY ...
|
|
-#define ARES_EBADNAME ...
|
|
-#define ARES_EBADFAMILY ...
|
|
-#define ARES_EBADRESP ...
|
|
-#define ARES_ECONNREFUSED ...
|
|
-#define ARES_ETIMEOUT ...
|
|
-#define ARES_EOF ...
|
|
-#define ARES_EFILE ...
|
|
-#define ARES_ENOMEM ...
|
|
-#define ARES_EDESTRUCTION ...
|
|
-#define ARES_EBADSTR ...
|
|
-#define ARES_EBADFLAGS ...
|
|
-#define ARES_ENONAME ...
|
|
-#define ARES_EBADHINTS ...
|
|
-#define ARES_ENOTINITIALIZED ...
|
|
-#define ARES_ELOADIPHLPAPI ...
|
|
-#define ARES_EADDRGETNETWORKPARAMS ...
|
|
-#define ARES_ECANCELLED ...
|
|
-#define ARES_ESERVICE ...
|
|
-
|
|
#define ARES_FLAG_USEVC ...
|
|
#define ARES_FLAG_PRIMARY ...
|
|
#define ARES_FLAG_IGNTC ...
|
|
@@ -229,6 +201,54 @@ struct ares_server_failover_options {
|
|
size_t retry_delay;
|
|
};
|
|
|
|
+typedef enum {
|
|
+ ARES_SUCCESS = 0,
|
|
+
|
|
+ /* Server error codes (ARES_ENODATA indicates no relevant answer) */
|
|
+ ARES_ENODATA = 1,
|
|
+ ARES_EFORMERR = 2,
|
|
+ ARES_ESERVFAIL = 3,
|
|
+ ARES_ENOTFOUND = 4,
|
|
+ ARES_ENOTIMP = 5,
|
|
+ ARES_EREFUSED = 6,
|
|
+
|
|
+ /* Locally generated error codes */
|
|
+ ARES_EBADQUERY = 7,
|
|
+ ARES_EBADNAME = 8,
|
|
+ ARES_EBADFAMILY = 9,
|
|
+ ARES_EBADRESP = 10,
|
|
+ ARES_ECONNREFUSED = 11,
|
|
+ ARES_ETIMEOUT = 12,
|
|
+ ARES_EOF = 13,
|
|
+ ARES_EFILE = 14,
|
|
+ ARES_ENOMEM = 15,
|
|
+ ARES_EDESTRUCTION = 16,
|
|
+ ARES_EBADSTR = 17,
|
|
+
|
|
+ /* ares_getnameinfo error codes */
|
|
+ ARES_EBADFLAGS = 18,
|
|
+
|
|
+ /* ares_getaddrinfo error codes */
|
|
+ ARES_ENONAME = 19,
|
|
+ ARES_EBADHINTS = 20,
|
|
+
|
|
+ /* Uninitialized library error code */
|
|
+ ARES_ENOTINITIALIZED = 21, /* introduced in 1.7.0 */
|
|
+
|
|
+ /* ares_library_init error codes */
|
|
+ ARES_ELOADIPHLPAPI = 22, /* introduced in 1.7.0 */
|
|
+ ARES_EADDRGETNETWORKPARAMS = 23, /* introduced in 1.7.0 */
|
|
+
|
|
+ /* More error codes */
|
|
+ ARES_ECANCELLED = 24, /* introduced in 1.7.0 */
|
|
+
|
|
+ /* More ares_getaddrinfo error codes */
|
|
+ ARES_ESERVICE = 25, /* ares_getaddrinfo() was passed a text service name that
|
|
+ * is not recognized. introduced in 1.16.0 */
|
|
+
|
|
+ ARES_ENOSERVER = 26 /* No DNS servers were configured */
|
|
+} ares_status_t;
|
|
+
|
|
/*! Values for ARES_OPT_EVENT_THREAD */
|
|
typedef enum {
|
|
/*! Default (best choice) event system */
|
|
@@ -597,6 +617,8 @@ const char *ares_inet_ntop(int af, const
|
|
int ares_inet_pton(int af, const char *src, void *dst);
|
|
|
|
ares_bool_t ares_threadsafety(void);
|
|
+
|
|
+ares_status_t ares_queue_wait_empty(ares_channel channel, int timeout_ms);
|
|
"""
|
|
|
|
CALLBACKS = """
|
|
Index: pycares-4.9.0/src/pycares/__init__.py
|
|
===================================================================
|
|
--- pycares-4.9.0.orig/src/pycares/__init__.py
|
|
+++ pycares-4.9.0/src/pycares/__init__.py
|
|
@@ -12,10 +12,8 @@ from ._version import __version__
|
|
import socket
|
|
import math
|
|
import threading
|
|
-import time
|
|
import weakref
|
|
from collections.abc import Callable, Iterable
|
|
-from contextlib import suppress
|
|
from typing import Any, Callable, Optional, Dict, Union
|
|
from queue import SimpleQueue
|
|
|
|
@@ -344,7 +342,7 @@ class _ChannelShutdownManager:
|
|
def __init__(self) -> None:
|
|
self._queue: SimpleQueue = SimpleQueue()
|
|
self._thread: Optional[threading.Thread] = None
|
|
- self._thread_started = False
|
|
+ self._start_lock = threading.Lock()
|
|
|
|
def _run_safe_shutdown_loop(self) -> None:
|
|
"""Process channel destruction requests from the queue."""
|
|
@@ -352,16 +350,27 @@ class _ChannelShutdownManager:
|
|
# Block forever until we get a channel to destroy
|
|
channel = self._queue.get()
|
|
|
|
- # Sleep for 1 second to ensure c-ares has finished processing
|
|
- # Its important that c-ares is past this critcial section
|
|
- # so we use a delay to ensure it has time to finish processing
|
|
- # https://github.com/c-ares/c-ares/blob/4f42928848e8b73d322b15ecbe3e8d753bf8734e/src/lib/ares_process.c#L1422
|
|
- time.sleep(1.0)
|
|
+ # Cancel all pending queries - this will trigger callbacks with ARES_ECANCELLED
|
|
+ _lib.ares_cancel(channel[0])
|
|
+
|
|
+ # Wait for all queries to finish
|
|
+ _lib.ares_queue_wait_empty(channel[0], -1)
|
|
|
|
# Destroy the channel
|
|
if _lib is not None and channel is not None:
|
|
_lib.ares_destroy(channel[0])
|
|
|
|
+ def start(self) -> None:
|
|
+ """Start the background thread if not already started."""
|
|
+ if self._thread is not None:
|
|
+ return
|
|
+ with self._start_lock:
|
|
+ if self._thread is not None:
|
|
+ # Started by another thread while waiting for the lock
|
|
+ return
|
|
+ self._thread = threading.Thread(target=self._run_safe_shutdown_loop, daemon=True)
|
|
+ self._thread.start()
|
|
+
|
|
def destroy_channel(self, channel) -> None:
|
|
"""
|
|
Schedule channel destruction on the background thread with a safety delay.
|
|
@@ -369,17 +378,10 @@ class _ChannelShutdownManager:
|
|
Thread Safety and Synchronization:
|
|
This method uses SimpleQueue which is thread-safe for putting items
|
|
from multiple threads. The background thread processes channels
|
|
- sequentially with a 1-second delay before each destruction.
|
|
+ sequentially waiting for queries to end before each destruction.
|
|
"""
|
|
- # Put the channel in the queue
|
|
self._queue.put(channel)
|
|
|
|
- # Start the background thread if not already started
|
|
- if not self._thread_started:
|
|
- self._thread_started = True
|
|
- self._thread = threading.Thread(target=self._run_safe_shutdown_loop, daemon=True)
|
|
- self._thread.start()
|
|
-
|
|
|
|
# Global shutdown manager instance
|
|
_shutdown_manager = _ChannelShutdownManager()
|
|
@@ -516,11 +518,12 @@ class Channel:
|
|
self.close()
|
|
return False
|
|
|
|
+ # Ensure the shutdown thread is started
|
|
+ _shutdown_manager.start()
|
|
+
|
|
def __del__(self) -> None:
|
|
"""Ensure the channel is destroyed when the object is deleted."""
|
|
- if self._channel is not None:
|
|
- # Schedule channel destruction using the global shutdown manager
|
|
- self._schedule_destruction()
|
|
+ self.close()
|
|
|
|
def _create_callback_handle(self, callback_data):
|
|
"""
|
|
@@ -764,24 +767,12 @@ class Channel:
|
|
# Already destroyed
|
|
return
|
|
|
|
- # Cancel all pending queries - this will trigger callbacks with ARES_ECANCELLED
|
|
- self.cancel()
|
|
+ # NB: don't cancel queries here, it may lead to problem if done from a
|
|
+ # query callback.
|
|
|
|
# Schedule channel destruction
|
|
- self._schedule_destruction()
|
|
-
|
|
- def _schedule_destruction(self) -> None:
|
|
- """Schedule channel destruction using the global shutdown manager."""
|
|
- if self._channel is None:
|
|
- return
|
|
- channel = self._channel
|
|
- self._channel = None
|
|
- # Can't start threads during interpreter shutdown
|
|
- # The channel will be cleaned up by the OS
|
|
- # TODO: Change to PythonFinalizationError when Python 3.12 support is dropped
|
|
- with suppress(RuntimeError):
|
|
- _shutdown_manager.destroy_channel(channel)
|
|
-
|
|
+ channel, self._channel = self._channel, None
|
|
+ _shutdown_manager.destroy_channel(channel)
|
|
|
|
|
|
class AresResult:
|