SHA256
1
0
forked from pool/salt
salt/async-batch-implementation.patch

1157 lines
42 KiB
Diff
Raw Normal View History

From 78faccbd063b8635550935057b8630262958f669 Mon Sep 17 00:00:00 2001
From: Mihai Dinca <mdinca@suse.de>
Date: Fri, 16 Nov 2018 17:05:29 +0100
Subject: [PATCH] Async batch implementation
Add find_job checks
Check if should close on all events
Make batch_delay a request parameter
Allow multiple event handlers
Use config value for gather_job_timeout when not in payload
Add async batch unittests
Allow metadata to pass
Pass metadata only to batch jobs
Add the metadata to the start/done events
Pass only metadata not all **kwargs
Add separate batch presence_ping timeout
---
salt/auth/__init__.py | 2 +
salt/cli/batch.py | 115 +++++++---
salt/cli/batch_async.py | 240 +++++++++++++++++++
salt/client/__init__.py | 14 ++
salt/master.py | 26 ++-
salt/netapi/__init__.py | 3 +-
salt/transport/ipc.py | 43 ++--
salt/utils/event.py | 8 +-
tests/unit/cli/test_batch_async.py | 357 +++++++++++++++++++++++++++++
9 files changed, 741 insertions(+), 67 deletions(-)
create mode 100644 salt/cli/batch_async.py
create mode 100644 tests/unit/cli/test_batch_async.py
diff --git a/salt/auth/__init__.py b/salt/auth/__init__.py
index ee1eac7ce4..22c54e8048 100644
--- a/salt/auth/__init__.py
+++ b/salt/auth/__init__.py
@@ -52,6 +52,8 @@ AUTH_INTERNAL_KEYWORDS = frozenset(
"print_event",
"raw",
"yield_pub_data",
+ "batch",
+ "batch_delay",
]
)
diff --git a/salt/cli/batch.py b/salt/cli/batch.py
index 155dc734b7..527cffdeb7 100644
--- a/salt/cli/batch.py
+++ b/salt/cli/batch.py
@@ -1,10 +1,7 @@
-# -*- coding: utf-8 -*-
"""
Execute batch runs
"""
-# Import python libs
-from __future__ import absolute_import, print_function, unicode_literals
import copy
@@ -17,11 +14,8 @@ from datetime import datetime, timedelta
import salt.client
import salt.exceptions
import salt.output
-
-# Import salt libs
import salt.utils.stringutils
-# Import 3rd-party libs
# pylint: disable=import-error,no-name-in-module,redefined-builtin
from salt.ext import six
from salt.ext.six.moves import range
@@ -29,7 +23,77 @@ from salt.ext.six.moves import range
log = logging.getLogger(__name__)
-class Batch(object):
+def get_bnum(opts, minions, quiet):
+ """
+ Return the active number of minions to maintain
+ """
+ partition = lambda x: float(x) / 100.0 * len(minions)
+ try:
+ if isinstance(opts["batch"], str) and "%" in opts["batch"]:
+ res = partition(float(opts["batch"].strip("%")))
+ if res < 1:
+ return int(math.ceil(res))
+ else:
+ return int(res)
+ else:
+ return int(opts["batch"])
+ except ValueError:
+ if not quiet:
+ salt.utils.stringutils.print_cli(
+ "Invalid batch data sent: {}\nData must be in the "
+ "form of %10, 10% or 3".format(opts["batch"])
+ )
+
+
+def batch_get_opts(
+ tgt, fun, batch, parent_opts, arg=(), tgt_type="glob", ret="", kwarg=None, **kwargs
+):
+ # We need to re-import salt.utils.args here
+ # even though it has already been imported.
+ # when cmd_batch is called via the NetAPI
+ # the module is unavailable.
+ import salt.utils.args
+
+ arg = salt.utils.args.condition_input(arg, kwarg)
+ opts = {
+ "tgt": tgt,
+ "fun": fun,
+ "arg": arg,
+ "tgt_type": tgt_type,
+ "ret": ret,
+ "batch": batch,
+ "failhard": kwargs.get("failhard", parent_opts.get("failhard", False)),
+ "raw": kwargs.get("raw", False),
+ }
+
+ if "timeout" in kwargs:
+ opts["timeout"] = kwargs["timeout"]
+ if "gather_job_timeout" in kwargs:
+ opts["gather_job_timeout"] = kwargs["gather_job_timeout"]
+ if "batch_wait" in kwargs:
+ opts["batch_wait"] = int(kwargs["batch_wait"])
+
+ for key, val in parent_opts.items():
+ if key not in opts:
+ opts[key] = val
+
+ return opts
+
+
+def batch_get_eauth(kwargs):
+ eauth = {}
+ if "eauth" in kwargs:
+ eauth["eauth"] = kwargs.pop("eauth")
+ if "username" in kwargs:
+ eauth["username"] = kwargs.pop("username")
+ if "password" in kwargs:
+ eauth["password"] = kwargs.pop("password")
+ if "token" in kwargs:
+ eauth["token"] = kwargs.pop("token")
+ return eauth
+
+
+class Batch:
"""
Manage the execution of batch runs
"""
@@ -75,7 +139,7 @@ class Batch(object):
continue
else:
try:
- m = next(six.iterkeys(ret))
+ m = next(iter(ret.keys()))
except StopIteration:
if not self.quiet:
salt.utils.stringutils.print_cli(
@@ -87,28 +151,7 @@ class Batch(object):
return (list(fret), ping_gen, nret.difference(fret))
def get_bnum(self):
- """
- Return the active number of minions to maintain
- """
- partition = lambda x: float(x) / 100.0 * len(self.minions)
- try:
- if (
- isinstance(self.opts["batch"], six.string_types)
- and "%" in self.opts["batch"]
- ):
- res = partition(float(self.opts["batch"].strip("%")))
- if res < 1:
- return int(math.ceil(res))
- else:
- return int(res)
- else:
- return int(self.opts["batch"])
- except ValueError:
- if not self.quiet:
- salt.utils.stringutils.print_cli(
- "Invalid batch data sent: {0}\nData must be in the "
- "form of %10, 10% or 3".format(self.opts["batch"])
- )
+ return get_bnum(self.opts, self.minions, self.quiet)
def __update_wait(self, wait):
now = datetime.now()
@@ -161,7 +204,7 @@ class Batch(object):
# the user we won't be attempting to run a job on them
for down_minion in self.down_minions:
salt.utils.stringutils.print_cli(
- "Minion {0} did not respond. No job will be sent.".format(
+ "Minion {} did not respond. No job will be sent.".format(
down_minion
)
)
@@ -190,7 +233,7 @@ class Batch(object):
if next_:
if not self.quiet:
salt.utils.stringutils.print_cli(
- "\nExecuting run on {0}\n".format(sorted(next_))
+ "\nExecuting run on {}\n".format(sorted(next_))
)
# create a new iterator for this batch of minions
return_value = self.opts.get("return", self.opts.get("ret", ""))
@@ -218,7 +261,7 @@ class Batch(object):
for ping_ret in self.ping_gen:
if ping_ret is None:
break
- m = next(six.iterkeys(ping_ret))
+ m = next(iter(ping_ret.keys()))
if m not in self.minions:
self.minions.append(m)
to_run.append(m)
@@ -243,7 +286,7 @@ class Batch(object):
)
else:
salt.utils.stringutils.print_cli(
- "minion {0} was already deleted from tracker, probably a duplicate key".format(
+ "minion {} was already deleted from tracker, probably a duplicate key".format(
part["id"]
)
)
@@ -254,7 +297,7 @@ class Batch(object):
minion_tracker[queue]["minions"].remove(id)
else:
salt.utils.stringutils.print_cli(
- "minion {0} was already deleted from tracker, probably a duplicate key".format(
+ "minion {} was already deleted from tracker, probably a duplicate key".format(
id
)
)
@@ -274,7 +317,7 @@ class Batch(object):
parts[minion] = {}
parts[minion]["ret"] = {}
- for minion, data in six.iteritems(parts):
+ for minion, data in parts.items():
if minion in active:
active.remove(minion)
if bwait:
diff --git a/salt/cli/batch_async.py b/salt/cli/batch_async.py
new file mode 100644
index 0000000000..1557e5105b
--- /dev/null
+++ b/salt/cli/batch_async.py
@@ -0,0 +1,240 @@
+"""
+Execute a job on the targeted minions by using a moving window of fixed size `batch`.
+"""
+
+import fnmatch
+
+# pylint: enable=import-error,no-name-in-module,redefined-builtin
+import logging
+
+import salt.client
+import tornado
+from salt.cli.batch import batch_get_eauth, batch_get_opts, get_bnum
+
+log = logging.getLogger(__name__)
+
+
+class BatchAsync:
+ """
+ Run a job on the targeted minions by using a moving window of fixed size `batch`.
+
+ ``BatchAsync`` is used to execute a job on the targeted minions by keeping
+ the number of concurrent running minions to the size of `batch` parameter.
+
+ The control parameters are:
+ - batch: number/percentage of concurrent running minions
+ - batch_delay: minimum wait time between batches
+ - batch_presence_ping_timeout: time to wait for presence pings before starting the batch
+ - gather_job_timeout: `find_job` timeout
+ - timeout: time to wait before firing a `find_job`
+
+ When the batch stars, a `start` event is fired:
+ - tag: salt/batch/<batch-jid>/start
+ - data: {
+ "available_minions": self.minions,
+ "down_minions": self.down_minions
+ }
+
+ When the batch ends, an `done` event is fired:
+ - tag: salt/batch/<batch-jid>/done
+ - data: {
+ "available_minions": self.minions,
+ "down_minions": self.down_minions,
+ "done_minions": self.done_minions,
+ "timedout_minions": self.timedout_minions
+ }
+ """
+
+ def __init__(self, parent_opts, jid_gen, clear_load):
+ ioloop = tornado.ioloop.IOLoop.current()
+ self.local = salt.client.get_local_client(parent_opts["conf_file"])
+ if "gather_job_timeout" in clear_load["kwargs"]:
+ clear_load["gather_job_timeout"] = clear_load["kwargs"].pop(
+ "gather_job_timeout"
+ )
+ else:
+ clear_load["gather_job_timeout"] = self.local.opts["gather_job_timeout"]
+ self.batch_presence_ping_timeout = clear_load["kwargs"].get(
+ "batch_presence_ping_timeout", None
+ )
+ self.batch_delay = clear_load["kwargs"].get("batch_delay", 1)
+ self.opts = batch_get_opts(
+ clear_load.pop("tgt"),
+ clear_load.pop("fun"),
+ clear_load["kwargs"].pop("batch"),
+ self.local.opts,
+ **clear_load
+ )
+ self.eauth = batch_get_eauth(clear_load["kwargs"])
+ self.metadata = clear_load["kwargs"].get("metadata", {})
+ self.minions = set()
+ self.down_minions = set()
+ self.timedout_minions = set()
+ self.done_minions = set()
+ self.active = set()
+ self.initialized = False
+ self.ping_jid = jid_gen()
+ self.batch_jid = jid_gen()
+ self.find_job_jid = jid_gen()
+ self.find_job_returned = set()
+ self.event = salt.utils.event.get_event(
+ "master",
+ self.opts["sock_dir"],
+ self.opts["transport"],
+ opts=self.opts,
+ listen=True,
+ io_loop=ioloop,
+ keep_loop=True,
+ )
+
+ def __set_event_handler(self):
+ ping_return_pattern = "salt/job/{}/ret/*".format(self.ping_jid)
+ batch_return_pattern = "salt/job/{}/ret/*".format(self.batch_jid)
+ find_job_return_pattern = "salt/job/{}/ret/*".format(self.find_job_jid)
+ self.event.subscribe(ping_return_pattern, match_type="glob")
+ self.event.subscribe(batch_return_pattern, match_type="glob")
+ self.event.subscribe(find_job_return_pattern, match_type="glob")
+ self.event.patterns = {
+ (ping_return_pattern, "ping_return"),
+ (batch_return_pattern, "batch_run"),
+ (find_job_return_pattern, "find_job_return"),
+ }
+ self.event.set_event_handler(self.__event_handler)
+
+ def __event_handler(self, raw):
+ if not self.event:
+ return
+ mtag, data = self.event.unpack(raw, self.event.serial)
+ for (pattern, op) in self.event.patterns:
+ if fnmatch.fnmatch(mtag, pattern):
+ minion = data["id"]
+ if op == "ping_return":
+ self.minions.add(minion)
+ self.down_minions.remove(minion)
+ if not self.down_minions:
+ self.event.io_loop.spawn_callback(self.start_batch)
+ elif op == "find_job_return":
+ self.find_job_returned.add(minion)
+ elif op == "batch_run":
+ if minion in self.active:
+ self.active.remove(minion)
+ self.done_minions.add(minion)
+ # call later so that we maybe gather more returns
+ self.event.io_loop.call_later(
+ self.batch_delay, self.schedule_next
+ )
+
+ if self.initialized and self.done_minions == self.minions.difference(
+ self.timedout_minions
+ ):
+ self.end_batch()
+
+ def _get_next(self):
+ to_run = (
+ self.minions.difference(self.done_minions)
+ .difference(self.active)
+ .difference(self.timedout_minions)
+ )
+ next_batch_size = min(
+ len(to_run), # partial batch (all left)
+ self.batch_size - len(self.active), # full batch or available slots
+ )
+ return set(list(to_run)[:next_batch_size])
+
+ @tornado.gen.coroutine
+ def check_find_job(self, minions):
+ did_not_return = minions.difference(self.find_job_returned)
+ if did_not_return:
+ for minion in did_not_return:
+ if minion in self.find_job_returned:
+ self.find_job_returned.remove(minion)
+ if minion in self.active:
+ self.active.remove(minion)
+ self.timedout_minions.add(minion)
+ running = (
+ minions.difference(did_not_return)
+ .difference(self.done_minions)
+ .difference(self.timedout_minions)
+ )
+ if running:
+ self.event.io_loop.add_callback(self.find_job, running)
+
+ @tornado.gen.coroutine
+ def find_job(self, minions):
+ not_done = minions.difference(self.done_minions)
+ ping_return = yield self.local.run_job_async(
+ not_done,
+ "saltutil.find_job",
+ [self.batch_jid],
+ "list",
+ gather_job_timeout=self.opts["gather_job_timeout"],
+ jid=self.find_job_jid,
+ **self.eauth
+ )
+ self.event.io_loop.call_later(
+ self.opts["gather_job_timeout"], self.check_find_job, not_done
+ )
+
+ @tornado.gen.coroutine
+ def start(self):
+ self.__set_event_handler()
+ # start batching even if not all minions respond to ping
+ self.event.io_loop.call_later(
+ self.batch_presence_ping_timeout or self.opts["gather_job_timeout"],
+ self.start_batch,
+ )
+ ping_return = yield self.local.run_job_async(
+ self.opts["tgt"],
+ "test.ping",
+ [],
+ self.opts.get("selected_target_option", self.opts.get("tgt_type", "glob")),
+ gather_job_timeout=self.opts["gather_job_timeout"],
+ jid=self.ping_jid,
+ metadata=self.metadata,
+ **self.eauth
+ )
+ self.down_minions = set(ping_return["minions"])
+
+ @tornado.gen.coroutine
+ def start_batch(self):
+ if not self.initialized:
+ self.batch_size = get_bnum(self.opts, self.minions, True)
+ self.initialized = True
+ data = {
+ "available_minions": self.minions,
+ "down_minions": self.down_minions,
+ "metadata": self.metadata,
+ }
+ self.event.fire_event(data, "salt/batch/{}/start".format(self.batch_jid))
+ yield self.schedule_next()
+
+ def end_batch(self):
+ data = {
+ "available_minions": self.minions,
+ "down_minions": self.down_minions,
+ "done_minions": self.done_minions,
+ "timedout_minions": self.timedout_minions,
+ "metadata": self.metadata,
+ }
+ self.event.fire_event(data, "salt/batch/{}/done".format(self.batch_jid))
+ self.event.remove_event_handler(self.__event_handler)
+
+ @tornado.gen.coroutine
+ def schedule_next(self):
+ next_batch = self._get_next()
+ if next_batch:
+ yield self.local.run_job_async(
+ next_batch,
+ self.opts["fun"],
+ self.opts["arg"],
+ "list",
+ raw=self.opts.get("raw", False),
+ ret=self.opts.get("return", ""),
+ gather_job_timeout=self.opts["gather_job_timeout"],
+ jid=self.batch_jid,
+ metadata=self.metadata,
+ )
+ self.event.io_loop.call_later(
+ self.opts["timeout"], self.find_job, set(next_batch)
+ )
+ self.active = self.active.union(next_batch)
diff --git a/salt/client/__init__.py b/salt/client/__init__.py
index 6fab45fcbf..1e9f11df4c 100644
--- a/salt/client/__init__.py
+++ b/salt/client/__init__.py
@@ -543,6 +543,20 @@ class LocalClient:
# Late import - not used anywhere else in this file
import salt.cli.batch
+ opts = salt.cli.batch.batch_get_opts(
+ tgt,
+ fun,
+ batch,
+ self.opts,
+ arg=arg,
+ tgt_type=tgt_type,
+ ret=ret,
+ kwarg=kwarg,
+ **kwargs
+ )
+
+ eauth = salt.cli.batch.batch_get_eauth(kwargs)
+
arg = salt.utils.args.condition_input(arg, kwarg)
opts = {
"tgt": tgt,
diff --git a/salt/master.py b/salt/master.py
index 1c91c28209..b9bc1a7a67 100644
--- a/salt/master.py
+++ b/salt/master.py
@@ -3,7 +3,6 @@ This module contains all of the routines needed to set up a master server, this
involves preparing the three listeners and the workers needed by the master.
"""
-# Import python libs
import collections
import copy
@@ -21,10 +20,9 @@ import time
import salt.acl
import salt.auth
+import salt.cli.batch_async
import salt.client
import salt.client.ssh.client
-
-# Import salt libs
import salt.crypt
import salt.daemons.masterapi
import salt.defaults.exitcodes
@@ -89,7 +87,6 @@ except ImportError:
# resource is not available on windows
HAS_RESOURCE = False
-# Import halite libs
try:
import halite # pylint: disable=import-error
@@ -2232,6 +2229,24 @@ class ClearFuncs(TransportMethods):
return False
return self.loadauth.get_tok(clear_load["token"])
+ def publish_batch(self, clear_load, minions, missing):
+ batch_load = {}
+ batch_load.update(clear_load)
+ import salt.cli.batch_async
+
+ batch = salt.cli.batch_async.BatchAsync(
+ self.local.opts,
+ functools.partial(self._prep_jid, clear_load, {}),
+ batch_load,
+ )
+ ioloop = tornado.ioloop.IOLoop.current()
+ ioloop.add_callback(batch.start)
+
+ return {
+ "enc": "clear",
+ "load": {"jid": batch.batch_jid, "minions": minions, "missing": missing},
+ }
+
def publish(self, clear_load):
"""
This method sends out publications to the minions, it can only be used
@@ -2349,6 +2364,9 @@ class ClearFuncs(TransportMethods):
),
},
}
+ if extra.get("batch", None):
+ return self.publish_batch(clear_load, minions, missing)
+
jid = self._prep_jid(clear_load, extra)
if jid is None:
return {"enc": "clear", "load": {"error": "Master failed to assign jid"}}
diff --git a/salt/netapi/__init__.py b/salt/netapi/__init__.py
index 96f57f6c79..dec19b37ef 100644
--- a/salt/netapi/__init__.py
+++ b/salt/netapi/__init__.py
@@ -151,7 +151,8 @@ class NetapiClient:
:return: job ID
"""
local = salt.client.get_local_client(mopts=self.opts)
- return local.run_job(*args, **kwargs)
+ ret = local.run_job(*args, **kwargs)
+ return ret
def local(self, *args, **kwargs):
"""
diff --git a/salt/transport/ipc.py b/salt/transport/ipc.py
index 041718d058..f411907da2 100644
--- a/salt/transport/ipc.py
+++ b/salt/transport/ipc.py
@@ -1,10 +1,7 @@
-# -*- coding: utf-8 -*-
"""
IPC transport classes
"""
-# Import Python libs
-from __future__ import absolute_import, print_function, unicode_literals
import errno
import logging
@@ -12,15 +9,12 @@ import socket
import sys
import time
-# Import Tornado libs
import salt.ext.tornado
import salt.ext.tornado.concurrent
import salt.ext.tornado.gen
import salt.ext.tornado.netutil
import salt.transport.client
import salt.transport.frame
-
-# Import Salt libs
import salt.utils.msgpack
from salt.ext import six
from salt.ext.tornado.ioloop import IOLoop
@@ -42,7 +36,7 @@ def future_with_timeout_callback(future):
class FutureWithTimeout(salt.ext.tornado.concurrent.Future):
def __init__(self, io_loop, future, timeout):
- super(FutureWithTimeout, self).__init__()
+ super().__init__()
self.io_loop = io_loop
self._future = future
if timeout is not None:
@@ -85,7 +79,7 @@ class FutureWithTimeout(salt.ext.tornado.concurrent.Future):
self.set_exception(exc)
-class IPCServer(object):
+class IPCServer:
"""
A Tornado IPC server very similar to Tornado's TCPServer class
but using either UNIX domain sockets or TCP sockets
@@ -181,10 +175,7 @@ class IPCServer(object):
# Under Py2 we still want raw to be set to True
msgpack_kwargs = {"raw": six.PY2}
else:
- if six.PY2:
- msgpack_kwargs = {"encoding": None}
- else:
- msgpack_kwargs = {"encoding": "utf-8"}
+ msgpack_kwargs = {"encoding": "utf-8"}
unpacker = salt.utils.msgpack.Unpacker(**msgpack_kwargs)
while not stream.closed():
try:
@@ -200,7 +191,7 @@ class IPCServer(object):
except StreamClosedError:
log.trace("Client disconnected from IPC %s", self.socket_path)
break
- except socket.error as exc:
+ except OSError as exc:
# On occasion an exception will occur with
# an error code of 0, it's a spurious exception.
if exc.errno == 0:
@@ -247,7 +238,7 @@ class IPCServer(object):
# pylint: enable=W1701
-class IPCClient(object):
+class IPCClient:
"""
A Tornado IPC client very similar to Tornado's TCPClient class
but using either UNIX domain sockets or TCP sockets
@@ -282,10 +273,7 @@ class IPCClient(object):
# Under Py2 we still want raw to be set to True
msgpack_kwargs = {"raw": six.PY2}
else:
- if six.PY2:
- msgpack_kwargs = {"encoding": None}
- else:
- msgpack_kwargs = {"encoding": "utf-8"}
+ msgpack_kwargs = {"encoding": "utf-8"}
self.unpacker = salt.utils.msgpack.Unpacker(**msgpack_kwargs)
def connected(self):
@@ -385,10 +373,10 @@ class IPCClient(object):
if self.stream is not None and not self.stream.closed():
try:
self.stream.close()
- except socket.error as exc:
+ except OSError as exc:
if exc.errno != errno.EBADF:
# If its not a bad file descriptor error, raise
- six.reraise(*sys.exc_info())
+ raise
class IPCMessageClient(IPCClient):
@@ -483,7 +471,7 @@ class IPCMessageServer(IPCServer):
"""
-class IPCMessagePublisher(object):
+class IPCMessagePublisher:
"""
A Tornado IPC Publisher similar to Tornado's TCPServer class
but using either UNIX domain sockets or TCP sockets
@@ -645,10 +633,11 @@ class IPCMessageSubscriber(IPCClient):
"""
def __init__(self, socket_path, io_loop=None):
- super(IPCMessageSubscriber, self).__init__(socket_path, io_loop=io_loop)
+ super().__init__(socket_path, io_loop=io_loop)
self._read_stream_future = None
self._saved_data = []
self._read_in_progress = Lock()
+ self.callbacks = set()
@salt.ext.tornado.gen.coroutine
def _read(self, timeout, callback=None):
@@ -725,8 +714,12 @@ class IPCMessageSubscriber(IPCClient):
return self._saved_data.pop(0)
return self.io_loop.run_sync(lambda: self._read(timeout))
+ def __run_callbacks(self, raw):
+ for callback in self.callbacks:
+ self.io_loop.spawn_callback(callback, raw)
+
@salt.ext.tornado.gen.coroutine
- def read_async(self, callback):
+ def read_async(self):
"""
Asynchronously read messages and invoke a callback when they are ready.
@@ -744,7 +737,7 @@ class IPCMessageSubscriber(IPCClient):
except Exception as exc: # pylint: disable=broad-except
log.error("Exception occurred while Subscriber connecting: %s", exc)
yield salt.ext.tornado.gen.sleep(1)
- yield self._read(None, callback)
+ yield self._read(None, self.__run_callbacks)
def close(self):
"""
@@ -754,7 +747,7 @@ class IPCMessageSubscriber(IPCClient):
"""
if self._closing:
return
- super(IPCMessageSubscriber, self).close()
+ super().close()
# This will prevent this message from showing up:
# '[ERROR ] Future exception was never retrieved:
# StreamClosedError'
diff --git a/salt/utils/event.py b/salt/utils/event.py
index 6f7edef4e5..ae200f9dfa 100644
--- a/salt/utils/event.py
+++ b/salt/utils/event.py
@@ -867,6 +867,10 @@ class SaltEvent:
# Minion fired a bad retcode, fire an event
self._fire_ret_load_specific_fun(load)
+ def remove_event_handler(self, event_handler):
+ if event_handler in self.subscriber.callbacks:
+ self.subscriber.callbacks.remove(event_handler)
+
def set_event_handler(self, event_handler):
"""
Invoke the event_handler callback each time an event arrives.
@@ -875,8 +879,10 @@ class SaltEvent:
if not self.cpub:
self.connect_pub()
+
+ self.subscriber.callbacks.add(event_handler)
# This will handle reconnects
- return self.subscriber.read_async(event_handler)
+ return self.subscriber.read_async()
# pylint: disable=W1701
def __del__(self):
diff --git a/tests/unit/cli/test_batch_async.py b/tests/unit/cli/test_batch_async.py
new file mode 100644
index 0000000000..3f8626a2dd
--- /dev/null
+++ b/tests/unit/cli/test_batch_async.py
@@ -0,0 +1,357 @@
+import tornado
+from salt.cli.batch_async import BatchAsync
+from tests.support.mock import NO_MOCK, NO_MOCK_REASON, MagicMock, patch
+from tests.support.unit import TestCase, skipIf
+from tornado.testing import AsyncTestCase
+
+
+@skipIf(NO_MOCK, NO_MOCK_REASON)
+class AsyncBatchTestCase(AsyncTestCase, TestCase):
+ def setUp(self):
+ self.io_loop = self.get_new_ioloop()
+ opts = {
+ "batch": "1",
+ "conf_file": {},
+ "tgt": "*",
+ "timeout": 5,
+ "gather_job_timeout": 5,
+ "batch_presence_ping_timeout": 1,
+ "transport": None,
+ "sock_dir": "",
+ }
+
+ with patch("salt.client.get_local_client", MagicMock(return_value=MagicMock())):
+ with patch(
+ "salt.cli.batch_async.batch_get_opts", MagicMock(return_value=opts)
+ ):
+ self.batch = BatchAsync(
+ opts,
+ MagicMock(side_effect=["1234", "1235", "1236"]),
+ {
+ "tgt": "",
+ "fun": "",
+ "kwargs": {"batch": "", "batch_presence_ping_timeout": 1},
+ },
+ )
+
+ def test_ping_jid(self):
+ self.assertEqual(self.batch.ping_jid, "1234")
+
+ def test_batch_jid(self):
+ self.assertEqual(self.batch.batch_jid, "1235")
+
+ def test_find_job_jid(self):
+ self.assertEqual(self.batch.find_job_jid, "1236")
+
+ def test_batch_size(self):
+ """
+ Tests passing batch value as a number
+ """
+ self.batch.opts = {"batch": "2", "timeout": 5}
+ self.batch.minions = {"foo", "bar"}
+ self.batch.start_batch()
+ self.assertEqual(self.batch.batch_size, 2)
+
+ @tornado.testing.gen_test
+ def test_batch_start_on_batch_presence_ping_timeout(self):
+ self.batch.event = MagicMock()
+ future = tornado.gen.Future()
+ future.set_result({"minions": ["foo", "bar"]})
+ self.batch.local.run_job_async.return_value = future
+ ret = self.batch.start()
+ # assert start_batch is called later with batch_presence_ping_timeout as param
+ self.assertEqual(
+ self.batch.event.io_loop.call_later.call_args[0],
+ (self.batch.batch_presence_ping_timeout, self.batch.start_batch),
+ )
+ # assert test.ping called
+ self.assertEqual(
+ self.batch.local.run_job_async.call_args[0], ("*", "test.ping", [], "glob")
+ )
+ # assert down_minions == all minions matched by tgt
+ self.assertEqual(self.batch.down_minions, {"foo", "bar"})
+
+ @tornado.testing.gen_test
+ def test_batch_start_on_gather_job_timeout(self):
+ self.batch.event = MagicMock()
+ future = tornado.gen.Future()
+ future.set_result({"minions": ["foo", "bar"]})
+ self.batch.local.run_job_async.return_value = future
+ self.batch.batch_presence_ping_timeout = None
+ ret = self.batch.start()
+ # assert start_batch is called later with gather_job_timeout as param
+ self.assertEqual(
+ self.batch.event.io_loop.call_later.call_args[0],
+ (self.batch.opts["gather_job_timeout"], self.batch.start_batch),
+ )
+
+ def test_batch_fire_start_event(self):
+ self.batch.minions = {"foo", "bar"}
+ self.batch.opts = {"batch": "2", "timeout": 5}
+ self.batch.event = MagicMock()
+ self.batch.metadata = {"mykey": "myvalue"}
+ self.batch.start_batch()
+ self.assertEqual(
+ self.batch.event.fire_event.call_args[0],
+ (
+ {
+ "available_minions": {"foo", "bar"},
+ "down_minions": set(),
+ "metadata": self.batch.metadata,
+ },
+ "salt/batch/1235/start",
+ ),
+ )
+
+ @tornado.testing.gen_test
+ def test_start_batch_calls_next(self):
+ self.batch.schedule_next = MagicMock(return_value=MagicMock())
+ self.batch.event = MagicMock()
+ future = tornado.gen.Future()
+ future.set_result(None)
+ self.batch.schedule_next = MagicMock(return_value=future)
+ self.batch.start_batch()
+ self.assertEqual(self.batch.initialized, True)
+ self.assertEqual(len(self.batch.schedule_next.mock_calls), 1)
+
+ def test_batch_fire_done_event(self):
+ self.batch.minions = {"foo", "bar"}
+ self.batch.event = MagicMock()
+ self.batch.metadata = {"mykey": "myvalue"}
+ self.batch.end_batch()
+ self.assertEqual(
+ self.batch.event.fire_event.call_args[0],
+ (
+ {
+ "available_minions": {"foo", "bar"},
+ "done_minions": set(),
+ "down_minions": set(),
+ "timedout_minions": set(),
+ "metadata": self.batch.metadata,
+ },
+ "salt/batch/1235/done",
+ ),
+ )
+ self.assertEqual(len(self.batch.event.remove_event_handler.mock_calls), 1)
+
+ @tornado.testing.gen_test
+ def test_batch_next(self):
+ self.batch.event = MagicMock()
+ self.batch.opts["fun"] = "my.fun"
+ self.batch.opts["arg"] = []
+ self.batch._get_next = MagicMock(return_value={"foo", "bar"})
+ self.batch.batch_size = 2
+ future = tornado.gen.Future()
+ future.set_result({"minions": ["foo", "bar"]})
+ self.batch.local.run_job_async.return_value = future
+ ret = self.batch.schedule_next().result()
+ self.assertEqual(
+ self.batch.local.run_job_async.call_args[0],
+ ({"foo", "bar"}, "my.fun", [], "list"),
+ )
+ self.assertEqual(
+ self.batch.event.io_loop.call_later.call_args[0],
+ (self.batch.opts["timeout"], self.batch.find_job, {"foo", "bar"}),
+ )
+ self.assertEqual(self.batch.active, {"bar", "foo"})
+
+ def test_next_batch(self):
+ self.batch.minions = {"foo", "bar"}
+ self.batch.batch_size = 2
+ self.assertEqual(self.batch._get_next(), {"foo", "bar"})
+
+ def test_next_batch_one_done(self):
+ self.batch.minions = {"foo", "bar"}
+ self.batch.done_minions = {"bar"}
+ self.batch.batch_size = 2
+ self.assertEqual(self.batch._get_next(), {"foo"})
+
+ def test_next_batch_one_done_one_active(self):
+ self.batch.minions = {"foo", "bar", "baz"}
+ self.batch.done_minions = {"bar"}
+ self.batch.active = {"baz"}
+ self.batch.batch_size = 2
+ self.assertEqual(self.batch._get_next(), {"foo"})
+
+ def test_next_batch_one_done_one_active_one_timedout(self):
+ self.batch.minions = {"foo", "bar", "baz", "faz"}
+ self.batch.done_minions = {"bar"}
+ self.batch.active = {"baz"}
+ self.batch.timedout_minions = {"faz"}
+ self.batch.batch_size = 2
+ self.assertEqual(self.batch._get_next(), {"foo"})
+
+ def test_next_batch_bigger_size(self):
+ self.batch.minions = {"foo", "bar"}
+ self.batch.batch_size = 3
+ self.assertEqual(self.batch._get_next(), {"foo", "bar"})
+
+ def test_next_batch_all_done(self):
+ self.batch.minions = {"foo", "bar"}
+ self.batch.done_minions = {"foo", "bar"}
+ self.batch.batch_size = 2
+ self.assertEqual(self.batch._get_next(), set())
+
+ def test_next_batch_all_active(self):
+ self.batch.minions = {"foo", "bar"}
+ self.batch.active = {"foo", "bar"}
+ self.batch.batch_size = 2
+ self.assertEqual(self.batch._get_next(), set())
+
+ def test_next_batch_all_timedout(self):
+ self.batch.minions = {"foo", "bar"}
+ self.batch.timedout_minions = {"foo", "bar"}
+ self.batch.batch_size = 2
+ self.assertEqual(self.batch._get_next(), set())
+
+ def test_batch__event_handler_ping_return(self):
+ self.batch.down_minions = {"foo"}
+ self.batch.event = MagicMock(
+ unpack=MagicMock(return_value=("salt/job/1234/ret/foo", {"id": "foo"}))
+ )
+ self.batch.start()
+ self.assertEqual(self.batch.minions, set())
+ self.batch._BatchAsync__event_handler(MagicMock())
+ self.assertEqual(self.batch.minions, {"foo"})
+ self.assertEqual(self.batch.done_minions, set())
+
+ def test_batch__event_handler_call_start_batch_when_all_pings_return(self):
+ self.batch.down_minions = {"foo"}
+ self.batch.event = MagicMock(
+ unpack=MagicMock(return_value=("salt/job/1234/ret/foo", {"id": "foo"}))
+ )
+ self.batch.start()
+ self.batch._BatchAsync__event_handler(MagicMock())
+ self.assertEqual(
+ self.batch.event.io_loop.spawn_callback.call_args[0],
+ (self.batch.start_batch,),
+ )
+
+ def test_batch__event_handler_not_call_start_batch_when_not_all_pings_return(self):
+ self.batch.down_minions = {"foo", "bar"}
+ self.batch.event = MagicMock(
+ unpack=MagicMock(return_value=("salt/job/1234/ret/foo", {"id": "foo"}))
+ )
+ self.batch.start()
+ self.batch._BatchAsync__event_handler(MagicMock())
+ self.assertEqual(len(self.batch.event.io_loop.spawn_callback.mock_calls), 0)
+
+ def test_batch__event_handler_batch_run_return(self):
+ self.batch.event = MagicMock(
+ unpack=MagicMock(return_value=("salt/job/1235/ret/foo", {"id": "foo"}))
+ )
+ self.batch.start()
+ self.batch.active = {"foo"}
+ self.batch._BatchAsync__event_handler(MagicMock())
+ self.assertEqual(self.batch.active, set())
+ self.assertEqual(self.batch.done_minions, {"foo"})
+ self.assertEqual(
+ self.batch.event.io_loop.call_later.call_args[0],
+ (self.batch.batch_delay, self.batch.schedule_next),
+ )
+
+ def test_batch__event_handler_find_job_return(self):
+ self.batch.event = MagicMock(
+ unpack=MagicMock(return_value=("salt/job/1236/ret/foo", {"id": "foo"}))
+ )
+ self.batch.start()
+ self.batch._BatchAsync__event_handler(MagicMock())
+ self.assertEqual(self.batch.find_job_returned, {"foo"})
+
+ @tornado.testing.gen_test
+ def test_batch__event_handler_end_batch(self):
+ self.batch.event = MagicMock(
+ unpack=MagicMock(
+ return_value=("salt/job/not-my-jid/ret/foo", {"id": "foo"})
+ )
+ )
+ future = tornado.gen.Future()
+ future.set_result({"minions": ["foo", "bar", "baz"]})
+ self.batch.local.run_job_async.return_value = future
+ self.batch.start()
+ self.batch.initialized = True
+ self.assertEqual(self.batch.down_minions, {"foo", "bar", "baz"})
+ self.batch.end_batch = MagicMock()
+ self.batch.minions = {"foo", "bar", "baz"}
+ self.batch.done_minions = {"foo", "bar"}
+ self.batch.timedout_minions = {"baz"}
+ self.batch._BatchAsync__event_handler(MagicMock())
+ self.assertEqual(len(self.batch.end_batch.mock_calls), 1)
+
+ @tornado.testing.gen_test
+ def test_batch_find_job(self):
+ self.batch.event = MagicMock()
+ future = tornado.gen.Future()
+ future.set_result({})
+ self.batch.local.run_job_async.return_value = future
+ self.batch.find_job({"foo", "bar"})
+ self.assertEqual(
+ self.batch.event.io_loop.call_later.call_args[0],
+ (
+ self.batch.opts["gather_job_timeout"],
+ self.batch.check_find_job,
+ {"foo", "bar"},
+ ),
+ )
+
+ @tornado.testing.gen_test
+ def test_batch_find_job_with_done_minions(self):
+ self.batch.done_minions = {"bar"}
+ self.batch.event = MagicMock()
+ future = tornado.gen.Future()
+ future.set_result({})
+ self.batch.local.run_job_async.return_value = future
+ self.batch.find_job({"foo", "bar"})
+ self.assertEqual(
+ self.batch.event.io_loop.call_later.call_args[0],
+ (self.batch.opts["gather_job_timeout"], self.batch.check_find_job, {"foo"}),
+ )
+
+ def test_batch_check_find_job_did_not_return(self):
+ self.batch.event = MagicMock()
+ self.batch.active = {"foo"}
+ self.batch.find_job_returned = set()
+ self.batch.check_find_job({"foo"})
+ self.assertEqual(self.batch.find_job_returned, set())
+ self.assertEqual(self.batch.active, set())
+ self.assertEqual(len(self.batch.event.io_loop.add_callback.mock_calls), 0)
+
+ def test_batch_check_find_job_did_return(self):
+ self.batch.event = MagicMock()
+ self.batch.find_job_returned = {"foo"}
+ self.batch.check_find_job({"foo"})
+ self.assertEqual(
+ self.batch.event.io_loop.add_callback.call_args[0],
+ (self.batch.find_job, {"foo"}),
+ )
+
+ def test_batch_check_find_job_multiple_states(self):
+ self.batch.event = MagicMock()
+ # currently running minions
+ self.batch.active = {"foo", "bar"}
+
+ # minion is running and find_job returns
+ self.batch.find_job_returned = {"foo"}
+
+ # minion started running but find_job did not return
+ self.batch.timedout_minions = {"faz"}
+
+ # minion finished
+ self.batch.done_minions = {"baz"}
+
+ # both not yet done but only 'foo' responded to find_job
+ not_done = {"foo", "bar"}
+
+ self.batch.check_find_job(not_done)
+
+ # assert 'bar' removed from active
+ self.assertEqual(self.batch.active, {"foo"})
+
+ # assert 'bar' added to timedout_minions
+ self.assertEqual(self.batch.timedout_minions, {"bar", "faz"})
+
+ # assert 'find_job' schedueled again only for 'foo'
+ self.assertEqual(
+ self.batch.event.io_loop.add_callback.call_args[0],
+ (self.batch.find_job, {"foo"}),
+ )
--
2.29.2