-
Notifications
You must be signed in to change notification settings - Fork 134
184 lines (168 loc) · 6.17 KB
/
bench.yml
File metadata and controls
184 lines (168 loc) · 6.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
name: 'Benchmark'
on:
pull_request:
pull_request_review:
types: [submitted]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}${{ github.event_name == 'pull_request_review' && format('-review-{0}', github.run_id) || '' }}
cancel-in-progress: true
jobs:
file-changes:
name: Detect File Changes
if: >
github.event_name != 'pull_request_review' ||
github.event.review.user.type != 'Bot'
runs-on: 'ubuntu-latest'
outputs:
checkall: ${{ steps.changes.outputs.checkall }}
steps:
- name: Clone
uses: actions/checkout@v4
- name: Detect Changes
uses: dorny/paths-filter@v3
id: changes
with:
filters: ".github/file-filter.yml"
self:
name: "${{ matrix.name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }})"
if: ${{ github.repository=='MFlowCode/MFC' && needs.file-changes.outputs.checkall=='true' && github.event.pull_request.draft != true && ((github.event_name=='pull_request_review' && github.event.review.state=='approved') || (github.event_name=='pull_request' && (github.event.pull_request.user.login=='sbryngelson' || github.event.pull_request.user.login=='wilfonba')) || github.event_name=='workflow_dispatch') }}
needs: file-changes
strategy:
fail-fast: false
matrix:
include:
- cluster: phoenix
name: Georgia Tech | Phoenix (NVHPC)
group: phoenix
labels: gt
flag: p
device: cpu
interface: none
build_script: ""
- cluster: phoenix
name: Georgia Tech | Phoenix (NVHPC)
group: phoenix
labels: gt
flag: p
device: gpu
interface: acc
build_script: ""
- cluster: phoenix
name: Georgia Tech | Phoenix (NVHPC)
group: phoenix
labels: gt
flag: p
device: gpu
interface: omp
build_script: ""
- cluster: frontier
name: Oak Ridge | Frontier (CCE)
group: phoenix
labels: frontier
flag: f
device: gpu
interface: acc
build_script: "bash .github/workflows/frontier/build.sh gpu acc"
- cluster: frontier
name: Oak Ridge | Frontier (CCE)
group: phoenix
labels: frontier
flag: f
device: gpu
interface: omp
build_script: "bash .github/workflows/frontier/build.sh gpu omp"
- cluster: frontier_amd
name: Oak Ridge | Frontier (AMD)
group: phoenix
labels: frontier
flag: famd
device: gpu
interface: omp
build_script: "bash .github/workflows/frontier_amd/build.sh gpu omp"
continue-on-error: ${{ matrix.cluster == 'frontier' || matrix.cluster == 'frontier_amd' }}
runs-on:
group: ${{ matrix.group }}
labels: ${{ matrix.labels }}
timeout-minutes: 480
steps:
- name: Clean stale output files
run: rm -f *.out
- name: Clone - PR
uses: actions/checkout@v4
with:
path: pr
clean: false
- name: Clone - Master
uses: actions/checkout@v4
with:
repository: MFlowCode/MFC
ref: master
path: master
clean: false
- name: Fetch Dependencies
if: matrix.build_script != ''
timeout-minutes: 150
run: |
(cd pr && ${{ matrix.build_script }}) &
pid1=$!
(cd master && ${{ matrix.build_script }}) &
pid2=$!
e1=0; e2=0
wait $pid1 || e1=$?
wait $pid2 || e2=$?
if [ $e1 -ne 0 ] || [ $e2 -ne 0 ]; then
echo "Build failures: pr=$e1 master=$e2"
exit 1
fi
- name: Bench (Master v. PR)
run: bash pr/.github/scripts/run_parallel_benchmarks.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
- name: Cancel SLURM Jobs
if: cancelled()
run: |
find . -name "*.slurm_job_id" | while read -r f; do
job_id=$(cat "$f")
echo "Cancelling SLURM job $job_id"
scancel "$job_id" 2>/dev/null || true
done
- name: Generate & Post Comment
if: always()
run: |
(cd pr && . ./mfc.sh load -c ${{ matrix.flag }} -m g)
(cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}-${{ matrix.interface }}.yaml ../pr/bench-${{ matrix.device }}-${{ matrix.interface }}.yaml)
- name: Print Logs
if: always()
run: |
cat pr/bench-${{ matrix.device }}-${{ matrix.interface }}.* 2>/dev/null || true
cat master/bench-${{ matrix.device }}-${{ matrix.interface }}.* 2>/dev/null || true
- name: Print Per-Case Logs
if: always()
run: |
passed=() failed=()
for out in pr/build/benchmarks/*/*.out master/build/benchmarks/*/*.out; do
[ -f "$out" ] || continue
[ -f "${out%.out}.yaml" ] && passed+=("$out") || failed+=("$out")
done
echo "=== Per-Case Summary: ${#failed[@]} failed, ${#passed[@]} passed ==="
for out in "${failed[@]}"; do echo " [FAILED] $out"; done
for out in "${passed[@]}"; do echo " [PASSED] $out"; done
if [ ${#failed[@]} -gt 0 ]; then
echo ""
echo "=== Failed Case Logs ==="
for out in "${failed[@]}"; do
echo "--- $out ---"
cat "$out"
echo ""
done
fi
# All other runners (non-Phoenix) just run without special env
- name: Archive Logs (Frontier)
if: always() && matrix.cluster != 'phoenix'
uses: actions/upload-artifact@v4
with:
name: ${{ matrix.cluster }}-${{ matrix.device }}-${{ matrix.interface }}
path: |
pr/bench-${{ matrix.device }}-${{ matrix.interface }}.*
pr/build/benchmarks/*
master/bench-${{ matrix.device }}-${{ matrix.interface }}.*
master/build/benchmarks/*