Add a check workflow run workflow

This allows failed workflows to be restarted two more times, automatically,
if less than half of the jobs failed.

This workflow should be removed as soon as we can have a more stable
test suite by removing flaky test failures.

Signed-off-by: Pedro Algarvio <palgarvio@vmware.com>
This commit is contained in:
Pedro Algarvio 2023-02-05 08:21:33 +00:00 committed by Pedro Algarvio
parent 64280e9298
commit 03478f3eba
4 changed files with 177 additions and 0 deletions

View file

@ -0,0 +1,41 @@
---
name: Check Workflow Run
on:
workflow_run:
types:
- completed
workflows:
- CI
- Nightly
- Scheduled
permissions:
actions: write
jobs:
restart-failed-workflow-runs:
name: "Restart Workflow (ID: ${{ github.event.workflow_run.id }}; Attempt: ${{ github.event.workflow_run.run_attempt }})"
if: ${{ github.event.workflow_run.conclusion == 'failure' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
id: setup-python
with:
python-version: "3.10"
- name: Setup Python Tools Scripts
uses: ./.github/actions/setup-python-tools-scripts
- name: Pretty Print The GH Actions Event
run:
tools ci print-gh-event
- name: Restart Workflow
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
tools ci rerun-workflow

View file

@ -0,0 +1,45 @@
{#- This workflow will restart failed workflow runs.
We should stop using this workflow once we remove the flakyness from
Salt's test suite
-#}
---
name: <{ workflow_name }>
on:
workflow_run:
types:
- completed
workflows:
<%- for workflow in check_workflows %>
- <{ workflow }>
<%- endfor %>
permissions:
actions: write
jobs:
restart-failed-workflow-runs:
name: "Restart Workflow (ID: ${{ github.event.workflow_run.id }}; Attempt: ${{ github.event.workflow_run.run_attempt }})"
if: ${{ github.event.workflow_run.conclusion == 'failure' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
id: setup-python
with:
python-version: "3.10"
- name: Setup Python Tools Scripts
uses: ./.github/actions/setup-python-tools-scripts
- name: Pretty Print The GH Actions Event
run:
tools ci print-gh-event
- name: Restart Workflow
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
tools ci rerun-workflow

View file

@ -511,3 +511,88 @@ def transport_matrix(ctx: Context, distro_slug: str):
_matrix.append({"transport": transport})
print(json.dumps(_matrix))
ctx.exit(0)
@ci.command(
name="rerun-workflow",
)
def rerun_workflow(ctx: Context):
"""
Re-run failed workflows, up to a maximum of 3 times.
Only restarts workflows for which less than 25% of the jobs failed.
"""
gh_event_path = os.environ.get("GITHUB_EVENT_PATH") or None
if gh_event_path is None:
ctx.warn("The 'GITHUB_EVENT_PATH' variable is not set.")
ctx.exit(1)
if TYPE_CHECKING:
assert gh_event_path is not None
try:
gh_event = json.loads(open(gh_event_path).read())
except Exception as exc:
ctx.error(f"Could not load the GH Event payload from {gh_event_path!r}:\n", exc)
ctx.exit(1)
workflow_run = gh_event["workflow_run"]
ctx.info(
f"Processing Workflow ID {workflow_run['id']}, attempt {workflow_run['run_attempt']}..."
)
if workflow_run["run_attempt"] >= 3:
ctx.info(
f"This workflow has failed for the past {workflow_run['run_attempt']} attempts. "
"Not re-running it."
)
ctx.exit(0)
run_id = str(workflow_run["id"])
repository = workflow_run["repository"]["full_name"]
page = 1
total = failed = 0
# Get all jobs from workflow run to see how many failed
while True:
cmdline = [
"gh",
"api",
"-H",
"Accept: application/vnd.github+json",
f"/repos/{repository}/actions/runs/{run_id}/jobs?filter=latest&per_page=100&page={page}",
]
ret = ctx.run(*cmdline, capture=True, check=False)
if ret.returncode:
ctx.error("Failed to get the jobs for the workflow run")
ctx.exit(0)
jobs = json.loads(ret.stdout.strip().decode())["jobs"]
if not jobs:
break
for job in jobs:
total += 1
if job["conclusion"] == "failure":
failed += 1
page += 1
ctx.info(f"{failed} out of {total} jobs failed.")
if failed > total / 2:
ctx.info("More than half of the jobs failed. Not automatically restarting.")
ctx.exit(0)
cmdline = [
"gh",
"run",
"-R",
repository,
"rerun",
run_id,
"--failed",
]
ctx.info(f"Running {' '.join(cmdline)!r} ...")
ret = ctx.run(*cmdline, check=False)
if ret.returncode:
ctx.error("Failed to re-run workflow")
else:
ctx.info("Restarted workflow successfully")
ctx.exit(0)

View file

@ -59,6 +59,9 @@ def generate_workflows(ctx: Context):
"Scheduled": {
"template": "scheduled.yml",
},
"Check Workflow Run": {
"template": "check-workflow-run.yml",
},
}
env = Environment(
block_start_string="<%",
@ -88,6 +91,9 @@ def generate_workflows(ctx: Context):
"conclusion_needs": NeedsTracker(),
"test_salt_needs": NeedsTracker(),
}
if workflow_name == "Check Workflow Run":
check_workflows = [wf for wf in sorted(workflows) if wf != workflow_name]
context["check_workflows"] = check_workflows
loaded_template = env.get_template(f"{template}.j2")
rendered_template = loaded_template.render(**context)
workflow_path.write_text(rendered_template.rstrip() + "\n")