Accepting request 733437 from systemsmanagement:saltstack

- Improve batch_async to release consumed memory (bsc#1140912)
- Fix memory leak produced by batch async find_jobs mechanism (bsc#1140912)
- Grant read and execute permission to others (bsc#1150447)
- Added:
  * improve-batch_async-to-release-consumed-memory-bsc-1.patch
  * fix-memory-leak-produced-by-batch-async-find_jobs-me.patch

OBS-URL: https://build.opensuse.org/request/show/733437
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/salt?expand=0&rev=93
This commit is contained in:
Dominique Leuenberger 2019-10-14 10:30:50 +00:00 committed by Git OBS Bridge
commit 32b7ac93c9
5 changed files with 403 additions and 14 deletions

View File

@ -1 +1 @@
24e59963d380d183a48f6ddd4d66dbf6a8fa4210
6fdab5ecba96a2695eadd798abc2dcfe6ff2b99b

View File

@ -0,0 +1,182 @@
From 8941de5a64b6330c6a814059e6e337f7ad3aa6cd Mon Sep 17 00:00:00 2001
From: Mihai Dinca <mdinca@suse.de>
Date: Mon, 16 Sep 2019 11:27:30 +0200
Subject: [PATCH] Fix memory leak produced by batch async find_jobs
mechanism (bsc#1140912)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Multiple fixes:
- use different JIDs per find_job
- fix bug in detection of find_job returns
- fix timeout passed from request payload
- better cleanup at the end of batching
Co-authored-by: Pablo Suárez Hernández <psuarezhernandez@suse.com>
---
salt/cli/batch_async.py | 60 +++++++++++++++++++++++++++--------------
salt/client/__init__.py | 1 +
salt/master.py | 1 -
3 files changed, 41 insertions(+), 21 deletions(-)
diff --git a/salt/cli/batch_async.py b/salt/cli/batch_async.py
index 8c8f481e34..8a67331102 100644
--- a/salt/cli/batch_async.py
+++ b/salt/cli/batch_async.py
@@ -72,6 +72,7 @@ class BatchAsync(object):
self.done_minions = set()
self.active = set()
self.initialized = False
+ self.jid_gen = jid_gen
self.ping_jid = jid_gen()
self.batch_jid = jid_gen()
self.find_job_jid = jid_gen()
@@ -89,14 +90,11 @@ class BatchAsync(object):
def __set_event_handler(self):
ping_return_pattern = 'salt/job/{0}/ret/*'.format(self.ping_jid)
batch_return_pattern = 'salt/job/{0}/ret/*'.format(self.batch_jid)
- find_job_return_pattern = 'salt/job/{0}/ret/*'.format(self.find_job_jid)
self.event.subscribe(ping_return_pattern, match_type='glob')
self.event.subscribe(batch_return_pattern, match_type='glob')
- self.event.subscribe(find_job_return_pattern, match_type='glob')
- self.event.patterns = {
+ self.patterns = {
(ping_return_pattern, 'ping_return'),
(batch_return_pattern, 'batch_run'),
- (find_job_return_pattern, 'find_job_return')
}
self.event.set_event_handler(self.__event_handler)
@@ -104,7 +102,7 @@ class BatchAsync(object):
if not self.event:
return
mtag, data = self.event.unpack(raw, self.event.serial)
- for (pattern, op) in self.event.patterns:
+ for (pattern, op) in self.patterns:
if fnmatch.fnmatch(mtag, pattern):
minion = data['id']
if op == 'ping_return':
@@ -112,7 +110,8 @@ class BatchAsync(object):
if self.targeted_minions == self.minions:
self.event.io_loop.spawn_callback(self.start_batch)
elif op == 'find_job_return':
- self.find_job_returned.add(minion)
+ if data.get("return", None):
+ self.find_job_returned.add(minion)
elif op == 'batch_run':
if minion in self.active:
self.active.remove(minion)
@@ -131,31 +130,46 @@ class BatchAsync(object):
return set(list(to_run)[:next_batch_size])
@tornado.gen.coroutine
- def check_find_job(self, batch_minions):
+ def check_find_job(self, batch_minions, jid):
+ find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid)
+ self.event.unsubscribe(find_job_return_pattern, match_type='glob')
+ self.patterns.remove((find_job_return_pattern, "find_job_return"))
+
timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions)
self.timedout_minions = self.timedout_minions.union(timedout_minions)
self.active = self.active.difference(self.timedout_minions)
running = batch_minions.difference(self.done_minions).difference(self.timedout_minions)
+
if timedout_minions:
self.schedule_next()
+
if running:
+ self.find_job_returned = self.find_job_returned.difference(running)
self.event.io_loop.add_callback(self.find_job, running)
@tornado.gen.coroutine
def find_job(self, minions):
- not_done = minions.difference(self.done_minions)
- ping_return = yield self.local.run_job_async(
- not_done,
- 'saltutil.find_job',
- [self.batch_jid],
- 'list',
- gather_job_timeout=self.opts['gather_job_timeout'],
- jid=self.find_job_jid,
- **self.eauth)
- self.event.io_loop.call_later(
- self.opts['gather_job_timeout'],
- self.check_find_job,
- not_done)
+ not_done = minions.difference(self.done_minions).difference(self.timedout_minions)
+
+ if not_done:
+ jid = self.jid_gen()
+ find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid)
+ self.patterns.add((find_job_return_pattern, "find_job_return"))
+ self.event.subscribe(find_job_return_pattern, match_type='glob')
+
+ ret = yield self.local.run_job_async(
+ not_done,
+ 'saltutil.find_job',
+ [self.batch_jid],
+ 'list',
+ gather_job_timeout=self.opts['gather_job_timeout'],
+ jid=jid,
+ **self.eauth)
+ self.event.io_loop.call_later(
+ self.opts['gather_job_timeout'],
+ self.check_find_job,
+ not_done,
+ jid)
@tornado.gen.coroutine
def start(self):
@@ -203,6 +217,9 @@ class BatchAsync(object):
}
self.event.fire_event(data, "salt/batch/{0}/done".format(self.batch_jid))
self.event.remove_event_handler(self.__event_handler)
+ for (pattern, label) in self.patterns:
+ if label in ["ping_return", "batch_run"]:
+ self.event.unsubscribe(pattern, match_type='glob')
def schedule_next(self):
if not self.scheduled:
@@ -226,9 +243,12 @@ class BatchAsync(object):
gather_job_timeout=self.opts['gather_job_timeout'],
jid=self.batch_jid,
metadata=self.metadata)
+
self.event.io_loop.call_later(self.opts['timeout'], self.find_job, set(next_batch))
except Exception as ex:
+ log.error("Error in scheduling next batch: %s", ex)
self.active = self.active.difference(next_batch)
else:
self.end_batch()
self.scheduled = False
+ yield
diff --git a/salt/client/__init__.py b/salt/client/__init__.py
index aff354a021..0bb6d2b111 100644
--- a/salt/client/__init__.py
+++ b/salt/client/__init__.py
@@ -1624,6 +1624,7 @@ class LocalClient(object):
'key': self.key,
'tgt_type': tgt_type,
'ret': ret,
+ 'timeout': timeout,
'jid': jid}
# if kwargs are passed, pack them.
diff --git a/salt/master.py b/salt/master.py
index f08c126280..0e4bba0505 100644
--- a/salt/master.py
+++ b/salt/master.py
@@ -2043,7 +2043,6 @@ class ClearFuncs(object):
def publish_batch(self, clear_load, minions, missing):
batch_load = {}
batch_load.update(clear_load)
- import salt.cli.batch_async
batch = salt.cli.batch_async.BatchAsync(
self.local.opts,
functools.partial(self._prep_jid, clear_load, {}),
--
2.23.0

View File

@ -0,0 +1,190 @@
From 002543df392f65d95dbc127dc058ac897f2035ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pablo=20Su=C3=A1rez=20Hern=C3=A1ndez?=
<psuarezhernandez@suse.com>
Date: Thu, 26 Sep 2019 10:41:06 +0100
Subject: [PATCH] Improve batch_async to release consumed memory
(bsc#1140912)
---
salt/cli/batch_async.py | 73 +++++++++++++++++++++++++----------------
1 file changed, 45 insertions(+), 28 deletions(-)
diff --git a/salt/cli/batch_async.py b/salt/cli/batch_async.py
index 8a67331102..2bb50459c8 100644
--- a/salt/cli/batch_async.py
+++ b/salt/cli/batch_async.py
@@ -5,6 +5,7 @@ Execute a job on the targeted minions by using a moving window of fixed size `ba
# Import python libs
from __future__ import absolute_import, print_function, unicode_literals
+import gc
import tornado
# Import salt libs
@@ -77,6 +78,7 @@ class BatchAsync(object):
self.batch_jid = jid_gen()
self.find_job_jid = jid_gen()
self.find_job_returned = set()
+ self.ended = False
self.event = salt.utils.event.get_event(
'master',
self.opts['sock_dir'],
@@ -86,6 +88,7 @@ class BatchAsync(object):
io_loop=ioloop,
keep_loop=True)
self.scheduled = False
+ self.patterns = {}
def __set_event_handler(self):
ping_return_pattern = 'salt/job/{0}/ret/*'.format(self.ping_jid)
@@ -116,7 +119,7 @@ class BatchAsync(object):
if minion in self.active:
self.active.remove(minion)
self.done_minions.add(minion)
- self.schedule_next()
+ self.event.io_loop.spawn_callback(self.schedule_next)
def _get_next(self):
to_run = self.minions.difference(
@@ -129,23 +132,23 @@ class BatchAsync(object):
)
return set(list(to_run)[:next_batch_size])
- @tornado.gen.coroutine
def check_find_job(self, batch_minions, jid):
- find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid)
- self.event.unsubscribe(find_job_return_pattern, match_type='glob')
- self.patterns.remove((find_job_return_pattern, "find_job_return"))
+ if self.event:
+ find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid)
+ self.event.unsubscribe(find_job_return_pattern, match_type='glob')
+ self.patterns.remove((find_job_return_pattern, "find_job_return"))
- timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions)
- self.timedout_minions = self.timedout_minions.union(timedout_minions)
- self.active = self.active.difference(self.timedout_minions)
- running = batch_minions.difference(self.done_minions).difference(self.timedout_minions)
+ timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions)
+ self.timedout_minions = self.timedout_minions.union(timedout_minions)
+ self.active = self.active.difference(self.timedout_minions)
+ running = batch_minions.difference(self.done_minions).difference(self.timedout_minions)
- if timedout_minions:
- self.schedule_next()
+ if timedout_minions:
+ self.schedule_next()
- if running:
- self.find_job_returned = self.find_job_returned.difference(running)
- self.event.io_loop.add_callback(self.find_job, running)
+ if running:
+ self.find_job_returned = self.find_job_returned.difference(running)
+ self.event.io_loop.spawn_callback(self.find_job, running)
@tornado.gen.coroutine
def find_job(self, minions):
@@ -165,8 +168,8 @@ class BatchAsync(object):
gather_job_timeout=self.opts['gather_job_timeout'],
jid=jid,
**self.eauth)
- self.event.io_loop.call_later(
- self.opts['gather_job_timeout'],
+ yield tornado.gen.sleep(self.opts['gather_job_timeout'])
+ self.event.io_loop.spawn_callback(
self.check_find_job,
not_done,
jid)
@@ -174,10 +177,6 @@ class BatchAsync(object):
@tornado.gen.coroutine
def start(self):
self.__set_event_handler()
- #start batching even if not all minions respond to ping
- self.event.io_loop.call_later(
- self.batch_presence_ping_timeout or self.opts['gather_job_timeout'],
- self.start_batch)
ping_return = yield self.local.run_job_async(
self.opts['tgt'],
'test.ping',
@@ -191,6 +190,10 @@ class BatchAsync(object):
metadata=self.metadata,
**self.eauth)
self.targeted_minions = set(ping_return['minions'])
+ #start batching even if not all minions respond to ping
+ yield tornado.gen.sleep(self.batch_presence_ping_timeout or self.opts['gather_job_timeout'])
+ self.event.io_loop.spawn_callback(self.start_batch)
+
@tornado.gen.coroutine
def start_batch(self):
@@ -202,12 +205,14 @@ class BatchAsync(object):
"down_minions": self.targeted_minions.difference(self.minions),
"metadata": self.metadata
}
- self.event.fire_event(data, "salt/batch/{0}/start".format(self.batch_jid))
- yield self.run_next()
+ ret = self.event.fire_event(data, "salt/batch/{0}/start".format(self.batch_jid))
+ self.event.io_loop.spawn_callback(self.run_next)
+ @tornado.gen.coroutine
def end_batch(self):
left = self.minions.symmetric_difference(self.done_minions.union(self.timedout_minions))
- if not left:
+ if not left and not self.ended:
+ self.ended = True
data = {
"available_minions": self.minions,
"down_minions": self.targeted_minions.difference(self.minions),
@@ -220,20 +225,26 @@ class BatchAsync(object):
for (pattern, label) in self.patterns:
if label in ["ping_return", "batch_run"]:
self.event.unsubscribe(pattern, match_type='glob')
+ del self
+ gc.collect()
+ yield
+ @tornado.gen.coroutine
def schedule_next(self):
if not self.scheduled:
self.scheduled = True
# call later so that we maybe gather more returns
- self.event.io_loop.call_later(self.batch_delay, self.run_next)
+ yield tornado.gen.sleep(self.batch_delay)
+ self.event.io_loop.spawn_callback(self.run_next)
@tornado.gen.coroutine
def run_next(self):
+ self.scheduled = False
next_batch = self._get_next()
if next_batch:
self.active = self.active.union(next_batch)
try:
- yield self.local.run_job_async(
+ ret = yield self.local.run_job_async(
next_batch,
self.opts['fun'],
self.opts['arg'],
@@ -244,11 +255,17 @@ class BatchAsync(object):
jid=self.batch_jid,
metadata=self.metadata)
- self.event.io_loop.call_later(self.opts['timeout'], self.find_job, set(next_batch))
+ yield tornado.gen.sleep(self.opts['timeout'])
+ self.event.io_loop.spawn_callback(self.find_job, set(next_batch))
except Exception as ex:
log.error("Error in scheduling next batch: %s", ex)
self.active = self.active.difference(next_batch)
else:
- self.end_batch()
- self.scheduled = False
+ yield self.end_batch()
+ gc.collect()
yield
+
+ def __del__(self):
+ self.event = None
+ self.ioloop = None
+ gc.collect()
--
2.22.0

View File

@ -1,3 +1,14 @@
-------------------------------------------------------------------
Thu Sep 26 10:23:39 UTC 2019 - Pablo Suárez Hernández <pablo.suarezhernandez@suse.com>
- Improve batch_async to release consumed memory (bsc#1140912)
- Fix memory leak produced by batch async find_jobs mechanism (bsc#1140912)
- Grant read and execute permission to others (bsc#1150447)
- Added:
* improve-batch_async-to-release-consumed-memory-bsc-1.patch
* fix-memory-leak-produced-by-batch-async-find_jobs-me.patch
-------------------------------------------------------------------
Thu Sep 5 17:45:50 UTC 2019 - Jochen Breuer <jbreuer@suse.de>

View File

@ -220,25 +220,29 @@ Patch72: avoid-traceback-when-http.query-request-cannot-be-pe.patch
# https://github.com/saltstack/salt/pull/54022
# https://github.com/saltstack/salt/pull/54024
Patch73: accumulated-changes-required-for-yomi-165.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/177
Patch74: restore-default-behaviour-of-pkg-list-return.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/commit/6af07030a502c427781991fc9a2b994fa04ef32e
Patch75: fix-memory-leak-produced-by-batch-async-find_jobs-me.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/159
Patch74: move-server_id-deprecation-warning-to-reduce-log-spa.patch
Patch76: move-server_id-deprecation-warning-to-reduce-log-spa.patch
# PATCH_FIX_UPSTREAM: https://github.com/saltstack/salt/pull/54077
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/166
Patch75: fix-aptpkg-systemd-call-bsc-1143301.patch
Patch77: fix-aptpkg-systemd-call-bsc-1143301.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/170
Patch76: strip-trailing-from-repo.uri-when-comparing-repos-in.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/177
Patch77: restore-default-behaviour-of-pkg-list-return.patch
Patch78: strip-trailing-from-repo.uri-when-comparing-repos-in.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/172
Patch78: implement-network.fqdns-module-function-bsc-1134860-.patch
Patch79: implement-network.fqdns-module-function-bsc-1134860-.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/173
Patch79: 2019.2.0-pr-54196-backport-173.patch
Patch80: 2019.2.0-pr-54196-backport-173.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/174
Patch80: virt.volume_infos-needs-to-ignore-inactive-pools-174.patch
Patch81: virt.volume_infos-needs-to-ignore-inactive-pools-174.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/175
Patch81: virt.volume_infos-silence-libvirt-error-message-175.patch
Patch82: virt.volume_infos-silence-libvirt-error-message-175.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/176
Patch82: fix-virt.full_info-176.patch
Patch83: fix-virt.full_info-176.patch
# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/commit/002543df392f65d95dbc127dc058ac897f2035ed
Patch84: improve-batch_async-to-release-consumed-memory-bsc-1.patch
# BuildRoot: %{_tmppath}/%{name}-%{version}-build
BuildRoot: %{_tmppath}/%{name}-%{version}-build
@ -799,6 +803,8 @@ cp %{S:5} ./.travis.yml
%patch80 -p1
%patch81 -p1
%patch82 -p1
%patch83 -p1
%patch84 -p1
%build
%if 0%{?build_py2}
@ -1519,9 +1525,9 @@ rm -f %{_localstatedir}/cache/salt/minion/thin/version
%files standalone-formulas-configuration
%defattr(-,root,root)
%config(noreplace) %attr(0640, root, salt) %{_sysconfdir}/salt/master.d/standalone-formulas-configuration.conf
%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/
%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/states/
%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/metadata/
%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/
%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/states/
%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/metadata/
%changelog