Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
kardolus
GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/scripts/smoke.sh
3426 views
1
set -euo pipefail
2
3
# smoke.sh
4
#
5
# Bucketed, cheap smoke test for chatgpt-cli.
6
# - Discovers models via: chatgpt --list-models
7
# - Buckets them by *what your CLI will do* (heuristics aligned with GetCapabilities)
8
# - Runs 1 minimal probe per bucket (cheap)
9
# - ALSO probes:
10
# - the model marked "(current)"
11
# - the latest dated releases for key families (gpt-5*, gpt-4o, o1)
12
#
13
# Optional env:
14
# CHATGPT_BIN (default: chatgpt)
15
# SMOKE_WEB (default: false) # if true, run a web probe (costs more)
16
# WEB_CONTEXT_SIZE (default: low)
17
# MAX_TOKENS (default: 256) # o1 can need >=256 to avoid "reasoning-only" empties
18
# TIMEOUT_SECS (default: 30)
19
20
CHATGPT_BIN="${CHATGPT_BIN:-chatgpt}"
21
SMOKE_WEB="${SMOKE_WEB:-false}"
22
WEB_CONTEXT_SIZE="${WEB_CONTEXT_SIZE:-low}"
23
MAX_TOKENS="${MAX_TOKENS:-256}"
24
MAX_TOKENS_PRO="${MAX_TOKENS_PRO:-2048}"
25
TIMEOUT_SECS="${TIMEOUT_SECS:-30}"
26
27
require() { command -v "$1" >/dev/null 2>&1 || { echo "ERROR: missing dependency: $1"; exit 2; }; }
28
require "$CHATGPT_BIN"
29
require awk
30
require sed
31
require grep
32
require sort
33
require tail
34
require head
35
require tr
36
37
PROMPT='Reply with exactly: pong'
38
39
run_with_timeout() {
40
if command -v timeout >/dev/null 2>&1; then
41
timeout "$TIMEOUT_SECS" "$@"
42
elif command -v gtimeout >/dev/null 2>&1; then
43
gtimeout "$TIMEOUT_SECS" "$@"
44
else
45
"$@"
46
fi
47
}
48
49
# Raw list models output (keeps "(current)")
50
list_models_raw() {
51
"$CHATGPT_BIN" --list-models 2>/dev/null \
52
| sed -E 's/\x1B\[[0-9;]*[mK]//g'
53
}
54
55
# Parse model ids from raw output.
56
# Accept lines like:
57
# - gpt-4o-mini
58
# * gpt-5 (current)
59
list_models() {
60
list_models_raw \
61
| awk '
62
/^- / { sub(/^- /,""); print; next }
63
/^\* / {
64
sub(/^\* /,"")
65
sub(/ \(current\).*$/,"")
66
print
67
next
68
}
69
' \
70
| sed -e 's/[[:space:]]*$//' \
71
| grep -v '^$'
72
}
73
74
# Extract the "(current)" model from raw output.
75
pick_current_model() {
76
list_models_raw \
77
| awk '
78
/^\* / {
79
sub(/^\* /,"")
80
sub(/ \(current\).*$/,"")
81
print
82
exit
83
}
84
'
85
}
86
87
normalize() {
88
tr -d '\r' | sed -E 's/[[:space:]]+/ /g; s/^ +| +$//g' | tr '[:upper:]' '[:lower:]'
89
}
90
91
assert_pong_line() {
92
# Accept any output that contains a line that is exactly "pong" (case-insensitive),
93
# ignoring whitespace and other chatter.
94
printf "%s\n" "$1" \
95
| tr -d '\r' \
96
| sed -E 's/^[[:space:]]+|[[:space:]]+$//g' \
97
| tr '[:upper:]' '[:lower:]' \
98
| grep -xq 'pong'
99
}
100
101
pass() { echo "✅ $*"; }
102
fail() { echo "❌ $*"; }
103
104
effort_for_model() {
105
local model="$1"
106
if echo "$model" | grep -qi 'gpt-5-pro'; then
107
echo "high"
108
else
109
echo ""
110
fi
111
}
112
113
pick_first() {
114
# pick first model matching regex from list
115
local pattern="$1"; shift
116
printf '%s\n' "$@" | grep -E "$pattern" | head -n 1 || true
117
}
118
119
is_empty() { [[ -z "${1:-}" ]]; }
120
121
# Pick the newest YYYY-MM-DD variant by lexicographic sort (works for ISO dates).
122
pick_latest_dated() {
123
local pattern="$1"; shift
124
printf '%s\n' "$@" \
125
| grep -E "$pattern" \
126
| grep -v 'search-api' \
127
| sort \
128
| tail -n 1 || true
129
}
130
131
# Pick "latest family" preferring dated variants, then chat-latest, then plain name.
132
# Examples:
133
# pick_latest_family "gpt-5" "${models[@]}"
134
# pick_latest_family "gpt-4o" "${models[@]}"
135
# pick_latest_family "o1" "${models[@]}"
136
pick_latest_family() {
137
local family="$1"; shift
138
139
# e.g. gpt-5-mini-2025-08-07, gpt-5.2-pro-2025-12-11, gpt-4o-2024-11-20, o1-2024-12-17
140
local dated
141
dated="$(pick_latest_dated "^${family}(-[a-z0-9.]+)*-[0-9]{4}-[0-9]{2}-[0-9]{2}$" "$@")"
142
if [[ -n "$dated" ]]; then
143
echo "$dated"
144
return 0
145
fi
146
147
# e.g. gpt-5-chat-latest, gpt-5.2-chat-latest
148
local chat_latest
149
chat_latest="$(printf '%s\n' "$@" \
150
| grep -E "^${family}(-[a-z0-9.]+)*-chat-latest$" \
151
| grep -v 'search-api' \
152
| head -n 1 || true)"
153
if [[ -n "$chat_latest" ]]; then
154
echo "$chat_latest"
155
return 0
156
fi
157
158
# fallback: plain family name if present
159
printf '%s\n' "$@" | grep -E "^${family}$" | head -n 1 || true
160
}
161
162
run_query() {
163
local name="$1"
164
local model="$2"
165
shift 2
166
167
local effort
168
effort="$(effort_for_model "$model")"
169
170
local max_tokens="$MAX_TOKENS"
171
if [[ -n "$effort" ]]; then
172
max_tokens="$MAX_TOKENS_PRO"
173
fi
174
175
# Build args ONCE so the debug rerun is identical (+ --debug).
176
local args=(
177
--new-thread
178
--temperature 0
179
--model "$model"
180
--max-tokens "$max_tokens"
181
)
182
183
if [[ -n "$effort" ]]; then
184
args+=(--effort "$effort")
185
fi
186
187
# Allow extra args like: --web true --web-context-size low
188
if [[ $# -gt 0 ]]; then
189
args+=("$@")
190
fi
191
192
# Always set the query last.
193
args+=(--query "$PROMPT")
194
195
local out="" status=0
196
set +e
197
out="$(run_with_timeout "$CHATGPT_BIN" "${args[@]}" 2>&1)"
198
status=$?
199
set -e
200
201
_dump_failure() {
202
local why="$1"
203
fail "$name (model=$model) $why (exit=$status)"
204
echo "meta: effort='${effort:-}' max_tokens=${max_tokens}" >&2
205
echo "----- RAW OUTPUT -----" >&2
206
printf "%s\n" "$out" >&2
207
echo "----------------------" >&2
208
209
echo "----- DEBUG RERUN -----" >&2
210
local dbg="" dbg_status=0
211
set +e
212
dbg="$(run_with_timeout "$CHATGPT_BIN" "${args[@]}" --debug 2>&1)"
213
dbg_status=$?
214
set -e
215
echo "(debug exit=$dbg_status)" >&2
216
printf "%s\n" "$dbg" >&2
217
echo "-----------------------" >&2
218
}
219
220
if [[ $status -ne 0 ]]; then
221
_dump_failure "failed to run"
222
return 1
223
fi
224
225
if [[ -z "$(printf "%s" "$out" | tr -d '[:space:]')" ]]; then
226
_dump_failure "returned empty output"
227
return 1
228
fi
229
230
if ! assert_pong_line "$out"; then
231
_dump_failure "did not produce 'pong' as a standalone line"
232
return 1
233
fi
234
235
pass "$name (model=$model)"
236
return 0
237
}
238
239
main() {
240
echo "chatgpt-cli smoke test (bucketed + current/latest)"
241
echo "bin: $CHATGPT_BIN"
242
echo "max_tokens: $MAX_TOKENS"
243
echo "web: $SMOKE_WEB (context_size=$WEB_CONTEXT_SIZE)"
244
echo
245
246
local models=()
247
while IFS= read -r line; do
248
[[ -n "$line" ]] && models+=("$line")
249
done < <(list_models)
250
251
if [[ "${#models[@]}" -eq 0 ]]; then
252
echo "ERROR: no models found from --list-models"
253
exit 2
254
fi
255
256
# ---- Bucket counts (roughly aligned with your GetCapabilities) ----
257
local count_realtime count_search count_gpt5 count_o1
258
count_realtime="$(printf '%s\n' "${models[@]}" | grep -c 'realtime' || true)"
259
count_search="$(printf '%s\n' "${models[@]}" | grep -c -- '-search' || true)"
260
count_gpt5="$(printf '%s\n' "${models[@]}" | grep -c '^gpt-5' || true)"
261
count_gpt4="$(printf '%s\n' "${models[@]}" | grep -c '^gpt-4' || true)"
262
count_gpt3="$(printf '%s\n' "${models[@]}" | grep -c '^gpt-3' || true)"
263
count_o1="$(printf '%s\n' "${models[@]}" | grep -c '^o1' || true)"
264
265
echo "Discovered ${#models[@]} model(s)"
266
echo " realtime: $count_realtime"
267
echo " search: $count_search"
268
echo " gpt-3*: $count_gpt3"
269
echo " gpt-4*: $count_gpt4"
270
echo " gpt-5*: $count_gpt5"
271
echo " o1*: $count_o1"
272
echo
273
274
# ---- Existing bucket probes (keep these) ----
275
276
local m_completions
277
m_completions="$(pick_first '^gpt-4o-mini$' "${models[@]}")"
278
if is_empty "$m_completions"; then m_completions="$(pick_first '^gpt-4\.1-mini$' "${models[@]}")"; fi
279
if is_empty "$m_completions"; then m_completions="$(pick_first '^gpt-4o$' "${models[@]}")"; fi
280
281
local m_responses
282
m_responses="$(pick_first '^gpt-5-mini$' "${models[@]}")"
283
if is_empty "$m_responses"; then m_responses="$(pick_first '^gpt-5$' "${models[@]}")"; fi
284
if is_empty "$m_responses"; then m_responses="$(pick_first '^gpt-5(\.|-).*' "${models[@]}")"; fi
285
if is_empty "$m_responses"; then m_responses="$(pick_first '^o1-pro$' "${models[@]}")"; fi
286
287
local m_search
288
m_search="$(pick_first '^gpt-4o-mini-search-preview$' "${models[@]}")"
289
if is_empty "$m_search"; then m_search="$(pick_first '^gpt-4o-search-preview$' "${models[@]}")"; fi
290
if is_empty "$m_search"; then m_search="$(pick_first 'search' "${models[@]}")"; fi
291
292
local m_o1
293
m_o1="$(pick_first '^o1-mini$' "${models[@]}")"
294
if is_empty "$m_o1"; then m_o1="$(pick_first '^o1$' "${models[@]}")"; fi
295
296
local m_web=""
297
if [[ "$SMOKE_WEB" == "true" ]]; then
298
m_web="$(pick_first '^gpt-5-mini$' "${models[@]}")"
299
if is_empty "$m_web"; then m_web="$(pick_first '^gpt-5$' "${models[@]}")"; fi
300
fi
301
302
# ---- New probes: current + latest releases ----
303
304
local m_current
305
m_current="$(pick_current_model || true)"
306
307
local m_latest_gpt5
308
m_latest_gpt5="$(pick_latest_family 'gpt-5' "${models[@]}")"
309
310
local m_latest_4o
311
m_latest_4o="$(pick_latest_family 'gpt-4o' "${models[@]}")"
312
313
local m_latest_o1
314
m_latest_o1="$(pick_latest_family 'o1' "${models[@]}")"
315
316
local failures=0
317
local ran=0
318
319
# Existing probes
320
if is_empty "$m_completions"; then
321
echo "WARN: no completions-ish model found (skipping)"
322
else
323
ran=$((ran+1))
324
run_query "probe:completions" "$m_completions" || failures=$((failures+1))
325
fi
326
327
if is_empty "$m_responses"; then
328
echo "WARN: no responses-ish model found (skipping)"
329
else
330
ran=$((ran+1))
331
run_query "probe:responses" "$m_responses" || failures=$((failures+1))
332
fi
333
334
if is_empty "$m_search"; then
335
echo "WARN: no search-preview model found (skipping)"
336
else
337
ran=$((ran+1))
338
run_query "probe:search-preview" "$m_search" || failures=$((failures+1))
339
fi
340
341
if is_empty "$m_o1"; then
342
echo "WARN: no o1 model found (skipping)"
343
else
344
ran=$((ran+1))
345
run_query "probe:o1" "$m_o1" || failures=$((failures+1))
346
fi
347
348
if [[ "$SMOKE_WEB" == "true" ]]; then
349
if is_empty "$m_web"; then
350
echo "WARN: no gpt-5 model found for web probe (skipping)"
351
else
352
ran=$((ran+1))
353
run_query "probe:web" "$m_web" --web true --web-context-size "$WEB_CONTEXT_SIZE" || failures=$((failures+1))
354
fi
355
fi
356
357
# New probes
358
if is_empty "$m_current"; then
359
echo "WARN: could not detect '(current)' model (skipping probe:current)"
360
else
361
ran=$((ran+1))
362
run_query "probe:current" "$m_current" || failures=$((failures+1))
363
fi
364
365
# Only run latest probes if they add coverage (avoid duplicates).
366
if ! is_empty "$m_latest_gpt5" && [[ "$m_latest_gpt5" != "$m_responses" ]]; then
367
ran=$((ran+1))
368
run_query "probe:latest-gpt5" "$m_latest_gpt5" || failures=$((failures+1))
369
fi
370
371
if ! is_empty "$m_latest_4o" && [[ "$m_latest_4o" != "$m_completions" ]]; then
372
ran=$((ran+1))
373
run_query "probe:latest-4o" "$m_latest_4o" || failures=$((failures+1))
374
fi
375
376
if ! is_empty "$m_latest_o1" && [[ "$m_latest_o1" != "$m_o1" ]]; then
377
ran=$((ran+1))
378
run_query "probe:latest-o1" "$m_latest_o1" || failures=$((failures+1))
379
fi
380
381
echo
382
echo "Ran $ran probe(s)."
383
384
if [[ "$failures" -gt 0 ]]; then
385
echo "Smoke test finished with $failures failure(s)."
386
exit 1
387
fi
388
389
echo "Smoke test passed."
390
}
391
392
main "$@"
393
394