Reap child processes after each of the ssh tests in case of timeouts

This commit is contained in:
Pedro Algarvio 2024-03-22 11:49:22 +00:00
parent bfacdbf98f
commit 37e56ba0a6
3 changed files with 70 additions and 43 deletions

View file

@ -5,7 +5,6 @@
import logging
import os
import pathlib
import pprint
import re
import shutil
import stat
@ -16,7 +15,6 @@ from unittest import TestCase # pylint: disable=blacklisted-module
import _pytest.logging
import _pytest.skipping
import more_itertools
import psutil
import pytest
import salt
@ -1299,7 +1297,6 @@ def salt_call_cli(salt_minion_factory):
@pytest.fixture(scope="session", autouse=True)
def bridge_pytest_and_runtests(
reap_stray_processes,
salt_factories,
salt_syndic_master_factory,
salt_syndic_factory,
@ -1336,6 +1333,8 @@ def bridge_pytest_and_runtests(
salt_syndic_factory.config["conf_file"]
)
RUNTIME_VARS.TMP_SSH_CONF_DIR = str(sshd_config_dir)
with reap_stray_processes():
yield
@pytest.fixture(scope="session")
@ -1635,46 +1634,6 @@ def from_filenames_collection_modifyitems(config, items):
# ----- Custom Fixtures --------------------------------------------------------------------------------------------->
@pytest.fixture(scope="session")
def reap_stray_processes():
# Run tests
yield
children = psutil.Process(os.getpid()).children(recursive=True)
if not children:
log.info("No astray processes found")
return
def on_terminate(proc):
log.debug("Process %s terminated with exit code %s", proc, proc.returncode)
if children:
# Reverse the order, sublings first, parents after
children.reverse()
log.warning(
"Test suite left %d astray processes running. Killing those processes:\n%s",
len(children),
pprint.pformat(children),
)
_, alive = psutil.wait_procs(children, timeout=3, callback=on_terminate)
for child in alive:
try:
child.kill()
except psutil.NoSuchProcess:
continue
_, alive = psutil.wait_procs(alive, timeout=3, callback=on_terminate)
if alive:
# Give up
for child in alive:
log.warning(
"Process %s survived SIGKILL, giving up:\n%s",
child,
pprint.pformat(child.as_dict()),
)
@pytest.fixture(scope="session")
def sminion():
return create_sminion()

View file

@ -1,5 +1,7 @@
import pytest
from tests.support.pytest.helpers import reap_stray_processes
@pytest.fixture(scope="package", autouse=True)
def _auto_skip_on_fedora_39(grains):
@ -11,3 +13,12 @@ def _auto_skip_on_fedora_39(grains):
# has been deprecated since Python 3.7, so, the logic goes into trying to import
# backports.ssl-match-hostname which is not installed on the system.
)
@pytest.fixture(autouse=True)
def _reap_stray_processes(grains):
# when tests timeout, we migth leave child processes behind
# nuke them
with reap_stray_processes():
# Run test
yield

View file

@ -8,6 +8,7 @@
import logging
import os
import pathlib
import pprint
import shutil
import subprocess
import tempfile
@ -18,6 +19,7 @@ import warnings
from contextlib import contextmanager
import attr
import psutil
import pytest
from saltfactories.utils import random_string
from saltfactories.utils.tempfiles import temp_file
@ -818,6 +820,61 @@ def change_cwd(path):
os.chdir(old_cwd)
@contextmanager
def reap_stray_processes(pid: int = os.getpid()):
try:
pre_children = psutil.Process(pid).children(recursive=True)
# Do stuff
yield
finally:
post_children = psutil.Process(pid).children(recursive=True)
children = []
for process in post_children:
if process in pre_children:
# Process existed before entering the context
continue
if not psutil.pid_exists(process.pid):
# Process just died
continue
# This process is alive and was not running before entering the context
children.append(process)
if not children:
log.info("No astray processes found")
return
def on_terminate(proc):
log.debug("Process %s terminated with exit code %s", proc, proc.returncode)
if children:
# Reverse the order, sublings first, parents after
children.reverse()
log.warning(
"Test suite left %d astray processes running. Killing those processes:\n%s",
len(children),
pprint.pformat(children),
)
_, alive = psutil.wait_procs(children, timeout=3, callback=on_terminate)
for child in alive:
try:
child.kill()
except psutil.NoSuchProcess:
continue
_, alive = psutil.wait_procs(alive, timeout=3, callback=on_terminate)
if alive:
# Give up
for child in alive:
log.warning(
"Process %s survived SIGKILL, giving up:\n%s",
child,
pprint.pformat(child.as_dict()),
)
# Only allow star importing the functions defined in this module
__all__ = [
name