Merge pull request #54806 from Oloremo/failhard-batch-fix-2019.2.1

[Regression] Batch with failhard fix
2025-04-17 10:10:20 +00:00 · 2019-10-01 07:51:46 -07:00 · 2019-10-01 07:51:46 -07:00 · 433b6fa681
commit 433b6fa681
parent ed94aa53eb 6684793bf1
6 changed files with 115 additions and 1 deletions
--- a/salt/cli/batch.py
+++ b/salt/cli/batch.py
@ -254,6 +254,9 @@ class Batch(object):
                    data['ret']['retcode'] = data['retcode']
                    if self.opts.get('failhard') and data['ret']['retcode'] > 0:
                        failhard = True
+                else:
+                    if self.opts.get('failhard') and data['retcode'] > 0:
+                        failhard = True

                if self.opts.get('raw'):
                    ret[minion] = data
--- a/salt/client/init.py
+++ b/salt/client/init.py
@ -547,7 +547,7 @@ class LocalClient(object):
                'tgt_type': tgt_type,
                'ret': ret,
                'batch': batch,
-                'failhard': kwargs.get('failhard', False),
+                'failhard': kwargs.get('failhard', self.opts.get('failhard', False)),
                'raw': kwargs.get('raw', False)}

        if 'timeout' in kwargs:
--- a/salt/states/saltmod.py
+++ b/salt/states/saltmod.py
@ -128,6 +128,7 @@ def state(name,
        queue=False,
        subset=None,
        orchestration_jid=None,
+        failhard=None,
        **kwargs):
    '''
    Invoke a state run on a given target
@ -221,6 +222,11 @@ def state(name,

        .. versionadded:: 2017.7.0

+    failhard
+        pass failhard down to the executing state
+
+        .. versionadded:: 2019.2.2
+
    Examples:

    Run a list of sls files via :py:func:`state.sls <salt.state.sls>` on target
@ -311,9 +317,13 @@ def state(name,

    if batch is not None:
        cmd_kw['batch'] = six.text_type(batch)
+
    if subset is not None:
        cmd_kw['subset'] = subset

+    if failhard is True or __opts__.get('failhard'):
+        cmd_kw['failhard'] = True
+
    masterless = __opts__['__role'] == 'minion' and \
                 __opts__['file_client'] == 'local'
    if not masterless:
@ -428,6 +438,7 @@ def function(
        timeout=None,
        batch=None,
        subset=None,
+        failhard=None,
        **kwargs):  # pylint: disable=unused-argument
    '''
    Execute a single module function on a remote minion via salt or salt-ssh
@ -477,6 +488,11 @@ def function(

        .. versionadded:: 2017.7.0

+    failhard
+        pass failhard down to the executing state
+
+        .. versionadded:: 2019.2.2
+
    '''
    func_ret = {'name': name,
                'changes': {},
@ -502,6 +518,9 @@ def function(
    cmd_kw['expect_minions'] = expect_minions
    cmd_kw['_cmd_meta'] = True

+    if failhard is True or __opts__.get('failhard'):
+        cmd_kw['failhard'] = True
+
    if ret_config:
        cmd_kw['ret_config'] = ret_config

--- a/tests/integration/cli/test_batch.py
+++ b/tests/integration/cli/test_batch.py
@ -75,3 +75,63 @@ class BatchTest(ShellCase):
            timeout=self.run_timeout,
        )
        self.assertEqual(cmd[-1], 2)
+
+# Test for failhard + batch. The best possible solution here was to do something like that:
+# assertRaises(StopIteration)
+# But it's impossible due to nature of the tests execution via fork()
+
+    def test_batch_module_stopping_after_error(self):
+        '''
+        Test that a failed command stops the batch run
+        '''
+
+        minions_list = []
+        retcode = None
+
+        # Executing salt with batch: 1 and with failhard. It should stop after the first error.
+        cmd = self.run_salt(
+            '"*minion" test.retcode 42 -b 1 --out=yaml --failhard',
+            timeout=self.run_timeout,
+        )
+
+        # Parsing the output. Idea is to fetch number on minions and retcode of the execution.
+        # retcode var could be overwritten in case of broken failhard but number of minions check should still fail.
+        for line in cmd:
+            if line.startswith('Executing run on'):
+                minions_list.append(line)
+            if line.startswith('retcode'):
+                retcode = line[-1]
+        # We expect to have only one minion to be run
+        self.assertEqual(1, len(minions_list))
+        # We expect to find a retcode in the output
+        self.assertIsNot(None, retcode)
+        # We expect retcode to be non-zero
+        self.assertNotEqual(0, retcode)
+
+    def test_batch_state_stopping_after_error(self):
+        '''
+        Test that a failed state stops the batch run
+        '''
+
+        minions_list = []
+        retcode = None
+
+        # Executing salt with batch: 1 and with failhard. It should stop after the first error.
+        cmd = self.run_salt(
+            '"*minion" state.single test.fail_without_changes name=test_me -b 1 --out=yaml --failhard',
+            timeout=self.run_timeout,
+        )
+
+        # Parsing the output. Idea is to fetch number on minions and retcode of the execution.
+        # retcode var could be overwritten in case of broken failhard but number of minions check should still fail.
+        for line in cmd:
+            if line.startswith('Executing run on'):
+                minions_list.append(line)
+            if line.startswith('retcode'):
+                retcode = line[-1]
+        # We expect to have only one minion to be run
+        self.assertEqual(1, len(minions_list))
+        # We expect to find a retcode in the output
+        self.assertIsNot(None, retcode)
+        # We expect retcode to be non-zero
+        self.assertNotEqual(0, retcode)
--- a/tests/integration/files/file/base/orch/batch.sls
+++ b/tests/integration/files/file/base/orch/batch.sls
@ -0,0 +1,6 @@
+call_fail_state:
+  salt.state:
+    - tgt: '*minion'
+    - batch: 1
+    - failhard: True
+    - sls: fail
--- a/tests/integration/runners/test_state.py
+++ b/tests/integration/runners/test_state.py
@ -242,6 +242,32 @@ class StateRunnerTest(ShellCase):
            for item in out:
                assert item in ret

+    def test_orchestrate_batch_with_failhard_error(self):
+        '''
+        test orchestration properly stops with failhard and batch.
+        '''
+        ret = self.run_run('state.orchestrate orch.batch --out=json -l critical')
+        ret_json = salt.utils.json.loads('\n'.join(ret))
+        retcode = ret_json['retcode']
+        result = ret_json['data']['master']['salt_|-call_fail_state_|-call_fail_state_|-state']['result']
+        changes = ret_json['data']['master']['salt_|-call_fail_state_|-call_fail_state_|-state']['changes']
+
+        # Looks like there is a platform differences in execution.
+        # I see empty changes dict in MacOS for some reason. Maybe it's a bug?
+        if changes:
+            changes_ret = changes['ret']
+
+        # Debug
+        print('Retcode: {}'.format(retcode))
+        print('Changes: {}'.format(changes))
+        print('Result: {}'.format(result))
+
+        assert retcode != 0
+        assert result is False
+        if changes:
+            # The execution should stop after first error, so return dict should contain only one minion
+            assert len(changes_ret) == 1
+
    def test_state_event(self):
        '''
        test to ensure state.event