Docs and changelogs for timeout handling

This commit is contained in:
Daniel A. Wozniak 2023-07-31 18:34:16 -07:00 committed by Pedro Algarvio
parent 179583e93f
commit f7ca58e0af
6 changed files with 36 additions and 3 deletions

1
changelog/63824.fixed.md Normal file
View file

@ -0,0 +1 @@
Allow long running pillar and file client requests to finish using request_channel_timeout and request_channel_tries minion config.

1
changelog/64651.fixed.md Normal file
View file

@ -0,0 +1 @@
Show user friendly message when pillars timeout

1
changelog/64653.fixed.md Normal file
View file

@ -0,0 +1 @@
File client timeouts durring jobs show user friendly errors instead of tracbacks

1
changelog/64729.fixed.md Normal file
View file

@ -0,0 +1 @@
SaltClientError does not log a traceback on minions, we expect these to happen so a user friendly log is shown.

View file

@ -1305,6 +1305,36 @@ restart.
auth_safemode: False
.. conf_minion:: request_channel_timeout
``request_channel_timeout``
---------------------------
.. versionadded:: 3006.2
Default: ``30``
The default timeout timeout for request channel requests. This setting can be used to tune minions to better handle long running pillar and file client requests.
.. code-block:: yaml
request_channel_timeout: 30
``request_channel_tries``
-------------------------
.. versionadded:: 3006.2
Default: ``3``
The default number of times the minion will try request channel requests. This
setting can be used to tune minions to better handle long running pillar and
file client requests by retrying them after a timeout happens.
.. code-block:: yaml
request_channel_tries: 3
.. conf_minion:: ping_interval
``ping_interval``

View file

@ -30,7 +30,7 @@ def salt_minion_retry(salt_master_factory, salt_minion_id):
@pytest.mark.slow_test
def test_publish_retry(salt_master, salt_minion_retry, salt_cli, salt_run_cli):
# run job that takes some time for warmup
rtn = salt_cli.run("test.sleep", "5", "--async", minion_tgt=salt_minion_retry.id)
rtn = salt_cli.run("test.sleep", "4.9", "--async", minion_tgt=salt_minion_retry.id)
# obtain JID
jid = rtn.stdout.strip().split(" ")[-1]
@ -110,13 +110,12 @@ def test_pillar_timeout(salt_master_factory):
with master.started(), minion1.started(), minion2.started(), minion3.started(), minion4.started(), sls_tempfile:
proc = cli.run("state.sls", sls_name, minion_tgt="*")
# At least one minion should have a Pillar timeout
print(proc)
assert proc.returncode == 1
minion_timed_out = False
# Find the minion that has a Pillar timeout
for key in proc.data:
if isinstance(proc.data[key], str):
if proc.data[key].find("Pillar timed out") != -1:
if "Pillar timed out" in proc.data[key]:
minion_timed_out = True
break
assert minion_timed_out is True