diff --git a/_lastrevision b/_lastrevision index 2092378..2975e7d 100644 --- a/_lastrevision +++ b/_lastrevision @@ -1 +1 @@ -24e59963d380d183a48f6ddd4d66dbf6a8fa4210 \ No newline at end of file +6fdab5ecba96a2695eadd798abc2dcfe6ff2b99b \ No newline at end of file diff --git a/fix-memory-leak-produced-by-batch-async-find_jobs-me.patch b/fix-memory-leak-produced-by-batch-async-find_jobs-me.patch new file mode 100644 index 0000000..608f5c3 --- /dev/null +++ b/fix-memory-leak-produced-by-batch-async-find_jobs-me.patch @@ -0,0 +1,182 @@ +From 8941de5a64b6330c6a814059e6e337f7ad3aa6cd Mon Sep 17 00:00:00 2001 +From: Mihai Dinca +Date: Mon, 16 Sep 2019 11:27:30 +0200 +Subject: [PATCH] Fix memory leak produced by batch async find_jobs + mechanism (bsc#1140912) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Multiple fixes: + +- use different JIDs per find_job +- fix bug in detection of find_job returns +- fix timeout passed from request payload +- better cleanup at the end of batching + +Co-authored-by: Pablo Suárez Hernández +--- + salt/cli/batch_async.py | 60 +++++++++++++++++++++++++++-------------- + salt/client/__init__.py | 1 + + salt/master.py | 1 - + 3 files changed, 41 insertions(+), 21 deletions(-) + +diff --git a/salt/cli/batch_async.py b/salt/cli/batch_async.py +index 8c8f481e34..8a67331102 100644 +--- a/salt/cli/batch_async.py ++++ b/salt/cli/batch_async.py +@@ -72,6 +72,7 @@ class BatchAsync(object): + self.done_minions = set() + self.active = set() + self.initialized = False ++ self.jid_gen = jid_gen + self.ping_jid = jid_gen() + self.batch_jid = jid_gen() + self.find_job_jid = jid_gen() +@@ -89,14 +90,11 @@ class BatchAsync(object): + def __set_event_handler(self): + ping_return_pattern = 'salt/job/{0}/ret/*'.format(self.ping_jid) + batch_return_pattern = 'salt/job/{0}/ret/*'.format(self.batch_jid) +- find_job_return_pattern = 'salt/job/{0}/ret/*'.format(self.find_job_jid) + self.event.subscribe(ping_return_pattern, match_type='glob') + self.event.subscribe(batch_return_pattern, match_type='glob') +- self.event.subscribe(find_job_return_pattern, match_type='glob') +- self.event.patterns = { ++ self.patterns = { + (ping_return_pattern, 'ping_return'), + (batch_return_pattern, 'batch_run'), +- (find_job_return_pattern, 'find_job_return') + } + self.event.set_event_handler(self.__event_handler) + +@@ -104,7 +102,7 @@ class BatchAsync(object): + if not self.event: + return + mtag, data = self.event.unpack(raw, self.event.serial) +- for (pattern, op) in self.event.patterns: ++ for (pattern, op) in self.patterns: + if fnmatch.fnmatch(mtag, pattern): + minion = data['id'] + if op == 'ping_return': +@@ -112,7 +110,8 @@ class BatchAsync(object): + if self.targeted_minions == self.minions: + self.event.io_loop.spawn_callback(self.start_batch) + elif op == 'find_job_return': +- self.find_job_returned.add(minion) ++ if data.get("return", None): ++ self.find_job_returned.add(minion) + elif op == 'batch_run': + if minion in self.active: + self.active.remove(minion) +@@ -131,31 +130,46 @@ class BatchAsync(object): + return set(list(to_run)[:next_batch_size]) + + @tornado.gen.coroutine +- def check_find_job(self, batch_minions): ++ def check_find_job(self, batch_minions, jid): ++ find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid) ++ self.event.unsubscribe(find_job_return_pattern, match_type='glob') ++ self.patterns.remove((find_job_return_pattern, "find_job_return")) ++ + timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions) + self.timedout_minions = self.timedout_minions.union(timedout_minions) + self.active = self.active.difference(self.timedout_minions) + running = batch_minions.difference(self.done_minions).difference(self.timedout_minions) ++ + if timedout_minions: + self.schedule_next() ++ + if running: ++ self.find_job_returned = self.find_job_returned.difference(running) + self.event.io_loop.add_callback(self.find_job, running) + + @tornado.gen.coroutine + def find_job(self, minions): +- not_done = minions.difference(self.done_minions) +- ping_return = yield self.local.run_job_async( +- not_done, +- 'saltutil.find_job', +- [self.batch_jid], +- 'list', +- gather_job_timeout=self.opts['gather_job_timeout'], +- jid=self.find_job_jid, +- **self.eauth) +- self.event.io_loop.call_later( +- self.opts['gather_job_timeout'], +- self.check_find_job, +- not_done) ++ not_done = minions.difference(self.done_minions).difference(self.timedout_minions) ++ ++ if not_done: ++ jid = self.jid_gen() ++ find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid) ++ self.patterns.add((find_job_return_pattern, "find_job_return")) ++ self.event.subscribe(find_job_return_pattern, match_type='glob') ++ ++ ret = yield self.local.run_job_async( ++ not_done, ++ 'saltutil.find_job', ++ [self.batch_jid], ++ 'list', ++ gather_job_timeout=self.opts['gather_job_timeout'], ++ jid=jid, ++ **self.eauth) ++ self.event.io_loop.call_later( ++ self.opts['gather_job_timeout'], ++ self.check_find_job, ++ not_done, ++ jid) + + @tornado.gen.coroutine + def start(self): +@@ -203,6 +217,9 @@ class BatchAsync(object): + } + self.event.fire_event(data, "salt/batch/{0}/done".format(self.batch_jid)) + self.event.remove_event_handler(self.__event_handler) ++ for (pattern, label) in self.patterns: ++ if label in ["ping_return", "batch_run"]: ++ self.event.unsubscribe(pattern, match_type='glob') + + def schedule_next(self): + if not self.scheduled: +@@ -226,9 +243,12 @@ class BatchAsync(object): + gather_job_timeout=self.opts['gather_job_timeout'], + jid=self.batch_jid, + metadata=self.metadata) ++ + self.event.io_loop.call_later(self.opts['timeout'], self.find_job, set(next_batch)) + except Exception as ex: ++ log.error("Error in scheduling next batch: %s", ex) + self.active = self.active.difference(next_batch) + else: + self.end_batch() + self.scheduled = False ++ yield +diff --git a/salt/client/__init__.py b/salt/client/__init__.py +index aff354a021..0bb6d2b111 100644 +--- a/salt/client/__init__.py ++++ b/salt/client/__init__.py +@@ -1624,6 +1624,7 @@ class LocalClient(object): + 'key': self.key, + 'tgt_type': tgt_type, + 'ret': ret, ++ 'timeout': timeout, + 'jid': jid} + + # if kwargs are passed, pack them. +diff --git a/salt/master.py b/salt/master.py +index f08c126280..0e4bba0505 100644 +--- a/salt/master.py ++++ b/salt/master.py +@@ -2043,7 +2043,6 @@ class ClearFuncs(object): + def publish_batch(self, clear_load, minions, missing): + batch_load = {} + batch_load.update(clear_load) +- import salt.cli.batch_async + batch = salt.cli.batch_async.BatchAsync( + self.local.opts, + functools.partial(self._prep_jid, clear_load, {}), +-- +2.23.0 + + diff --git a/improve-batch_async-to-release-consumed-memory-bsc-1.patch b/improve-batch_async-to-release-consumed-memory-bsc-1.patch new file mode 100644 index 0000000..d8bf586 --- /dev/null +++ b/improve-batch_async-to-release-consumed-memory-bsc-1.patch @@ -0,0 +1,190 @@ +From 002543df392f65d95dbc127dc058ac897f2035ed Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Pablo=20Su=C3=A1rez=20Hern=C3=A1ndez?= + +Date: Thu, 26 Sep 2019 10:41:06 +0100 +Subject: [PATCH] Improve batch_async to release consumed memory + (bsc#1140912) + +--- + salt/cli/batch_async.py | 73 +++++++++++++++++++++++++---------------- + 1 file changed, 45 insertions(+), 28 deletions(-) + +diff --git a/salt/cli/batch_async.py b/salt/cli/batch_async.py +index 8a67331102..2bb50459c8 100644 +--- a/salt/cli/batch_async.py ++++ b/salt/cli/batch_async.py +@@ -5,6 +5,7 @@ Execute a job on the targeted minions by using a moving window of fixed size `ba + + # Import python libs + from __future__ import absolute_import, print_function, unicode_literals ++import gc + import tornado + + # Import salt libs +@@ -77,6 +78,7 @@ class BatchAsync(object): + self.batch_jid = jid_gen() + self.find_job_jid = jid_gen() + self.find_job_returned = set() ++ self.ended = False + self.event = salt.utils.event.get_event( + 'master', + self.opts['sock_dir'], +@@ -86,6 +88,7 @@ class BatchAsync(object): + io_loop=ioloop, + keep_loop=True) + self.scheduled = False ++ self.patterns = {} + + def __set_event_handler(self): + ping_return_pattern = 'salt/job/{0}/ret/*'.format(self.ping_jid) +@@ -116,7 +119,7 @@ class BatchAsync(object): + if minion in self.active: + self.active.remove(minion) + self.done_minions.add(minion) +- self.schedule_next() ++ self.event.io_loop.spawn_callback(self.schedule_next) + + def _get_next(self): + to_run = self.minions.difference( +@@ -129,23 +132,23 @@ class BatchAsync(object): + ) + return set(list(to_run)[:next_batch_size]) + +- @tornado.gen.coroutine + def check_find_job(self, batch_minions, jid): +- find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid) +- self.event.unsubscribe(find_job_return_pattern, match_type='glob') +- self.patterns.remove((find_job_return_pattern, "find_job_return")) ++ if self.event: ++ find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid) ++ self.event.unsubscribe(find_job_return_pattern, match_type='glob') ++ self.patterns.remove((find_job_return_pattern, "find_job_return")) + +- timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions) +- self.timedout_minions = self.timedout_minions.union(timedout_minions) +- self.active = self.active.difference(self.timedout_minions) +- running = batch_minions.difference(self.done_minions).difference(self.timedout_minions) ++ timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions) ++ self.timedout_minions = self.timedout_minions.union(timedout_minions) ++ self.active = self.active.difference(self.timedout_minions) ++ running = batch_minions.difference(self.done_minions).difference(self.timedout_minions) + +- if timedout_minions: +- self.schedule_next() ++ if timedout_minions: ++ self.schedule_next() + +- if running: +- self.find_job_returned = self.find_job_returned.difference(running) +- self.event.io_loop.add_callback(self.find_job, running) ++ if running: ++ self.find_job_returned = self.find_job_returned.difference(running) ++ self.event.io_loop.spawn_callback(self.find_job, running) + + @tornado.gen.coroutine + def find_job(self, minions): +@@ -165,8 +168,8 @@ class BatchAsync(object): + gather_job_timeout=self.opts['gather_job_timeout'], + jid=jid, + **self.eauth) +- self.event.io_loop.call_later( +- self.opts['gather_job_timeout'], ++ yield tornado.gen.sleep(self.opts['gather_job_timeout']) ++ self.event.io_loop.spawn_callback( + self.check_find_job, + not_done, + jid) +@@ -174,10 +177,6 @@ class BatchAsync(object): + @tornado.gen.coroutine + def start(self): + self.__set_event_handler() +- #start batching even if not all minions respond to ping +- self.event.io_loop.call_later( +- self.batch_presence_ping_timeout or self.opts['gather_job_timeout'], +- self.start_batch) + ping_return = yield self.local.run_job_async( + self.opts['tgt'], + 'test.ping', +@@ -191,6 +190,10 @@ class BatchAsync(object): + metadata=self.metadata, + **self.eauth) + self.targeted_minions = set(ping_return['minions']) ++ #start batching even if not all minions respond to ping ++ yield tornado.gen.sleep(self.batch_presence_ping_timeout or self.opts['gather_job_timeout']) ++ self.event.io_loop.spawn_callback(self.start_batch) ++ + + @tornado.gen.coroutine + def start_batch(self): +@@ -202,12 +205,14 @@ class BatchAsync(object): + "down_minions": self.targeted_minions.difference(self.minions), + "metadata": self.metadata + } +- self.event.fire_event(data, "salt/batch/{0}/start".format(self.batch_jid)) +- yield self.run_next() ++ ret = self.event.fire_event(data, "salt/batch/{0}/start".format(self.batch_jid)) ++ self.event.io_loop.spawn_callback(self.run_next) + ++ @tornado.gen.coroutine + def end_batch(self): + left = self.minions.symmetric_difference(self.done_minions.union(self.timedout_minions)) +- if not left: ++ if not left and not self.ended: ++ self.ended = True + data = { + "available_minions": self.minions, + "down_minions": self.targeted_minions.difference(self.minions), +@@ -220,20 +225,26 @@ class BatchAsync(object): + for (pattern, label) in self.patterns: + if label in ["ping_return", "batch_run"]: + self.event.unsubscribe(pattern, match_type='glob') ++ del self ++ gc.collect() ++ yield + ++ @tornado.gen.coroutine + def schedule_next(self): + if not self.scheduled: + self.scheduled = True + # call later so that we maybe gather more returns +- self.event.io_loop.call_later(self.batch_delay, self.run_next) ++ yield tornado.gen.sleep(self.batch_delay) ++ self.event.io_loop.spawn_callback(self.run_next) + + @tornado.gen.coroutine + def run_next(self): ++ self.scheduled = False + next_batch = self._get_next() + if next_batch: + self.active = self.active.union(next_batch) + try: +- yield self.local.run_job_async( ++ ret = yield self.local.run_job_async( + next_batch, + self.opts['fun'], + self.opts['arg'], +@@ -244,11 +255,17 @@ class BatchAsync(object): + jid=self.batch_jid, + metadata=self.metadata) + +- self.event.io_loop.call_later(self.opts['timeout'], self.find_job, set(next_batch)) ++ yield tornado.gen.sleep(self.opts['timeout']) ++ self.event.io_loop.spawn_callback(self.find_job, set(next_batch)) + except Exception as ex: + log.error("Error in scheduling next batch: %s", ex) + self.active = self.active.difference(next_batch) + else: +- self.end_batch() +- self.scheduled = False ++ yield self.end_batch() ++ gc.collect() + yield ++ ++ def __del__(self): ++ self.event = None ++ self.ioloop = None ++ gc.collect() +-- +2.22.0 + + diff --git a/salt.changes b/salt.changes index 862d38f..2c9018b 100644 --- a/salt.changes +++ b/salt.changes @@ -1,3 +1,14 @@ +------------------------------------------------------------------- +Thu Sep 26 10:23:39 UTC 2019 - Pablo Suárez Hernández + +- Improve batch_async to release consumed memory (bsc#1140912) +- Fix memory leak produced by batch async find_jobs mechanism (bsc#1140912) +- Grant read and execute permission to others (bsc#1150447) + +- Added: + * improve-batch_async-to-release-consumed-memory-bsc-1.patch + * fix-memory-leak-produced-by-batch-async-find_jobs-me.patch + ------------------------------------------------------------------- Thu Sep 5 17:45:50 UTC 2019 - Jochen Breuer diff --git a/salt.spec b/salt.spec index ca2afe5..7935a33 100644 --- a/salt.spec +++ b/salt.spec @@ -220,25 +220,29 @@ Patch72: avoid-traceback-when-http.query-request-cannot-be-pe.patch # https://github.com/saltstack/salt/pull/54022 # https://github.com/saltstack/salt/pull/54024 Patch73: accumulated-changes-required-for-yomi-165.patch +# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/177 +Patch74: restore-default-behaviour-of-pkg-list-return.patch +# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/commit/6af07030a502c427781991fc9a2b994fa04ef32e +Patch75: fix-memory-leak-produced-by-batch-async-find_jobs-me.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/159 -Patch74: move-server_id-deprecation-warning-to-reduce-log-spa.patch +Patch76: move-server_id-deprecation-warning-to-reduce-log-spa.patch # PATCH_FIX_UPSTREAM: https://github.com/saltstack/salt/pull/54077 # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/166 -Patch75: fix-aptpkg-systemd-call-bsc-1143301.patch +Patch77: fix-aptpkg-systemd-call-bsc-1143301.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/170 -Patch76: strip-trailing-from-repo.uri-when-comparing-repos-in.patch -# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/177 -Patch77: restore-default-behaviour-of-pkg-list-return.patch +Patch78: strip-trailing-from-repo.uri-when-comparing-repos-in.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/172 -Patch78: implement-network.fqdns-module-function-bsc-1134860-.patch +Patch79: implement-network.fqdns-module-function-bsc-1134860-.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/173 -Patch79: 2019.2.0-pr-54196-backport-173.patch +Patch80: 2019.2.0-pr-54196-backport-173.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/174 -Patch80: virt.volume_infos-needs-to-ignore-inactive-pools-174.patch +Patch81: virt.volume_infos-needs-to-ignore-inactive-pools-174.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/175 -Patch81: virt.volume_infos-silence-libvirt-error-message-175.patch +Patch82: virt.volume_infos-silence-libvirt-error-message-175.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/176 -Patch82: fix-virt.full_info-176.patch +Patch83: fix-virt.full_info-176.patch +# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/commit/002543df392f65d95dbc127dc058ac897f2035ed +Patch84: improve-batch_async-to-release-consumed-memory-bsc-1.patch # BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -799,6 +803,8 @@ cp %{S:5} ./.travis.yml %patch80 -p1 %patch81 -p1 %patch82 -p1 +%patch83 -p1 +%patch84 -p1 %build %if 0%{?build_py2} @@ -1519,9 +1525,9 @@ rm -f %{_localstatedir}/cache/salt/minion/thin/version %files standalone-formulas-configuration %defattr(-,root,root) %config(noreplace) %attr(0640, root, salt) %{_sysconfdir}/salt/master.d/standalone-formulas-configuration.conf -%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/ -%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/states/ -%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/metadata/ +%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/ +%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/states/ +%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/metadata/ %changelog