CoCalc -- smoke.sh

GitHub Repository: kardolus/chatgpt-cli
Path: blob/main/scripts/smoke.sh
³⁴²⁶ views
1
set -euo pipefail
2

3
# smoke.sh
4
#
5
# Bucketed, cheap smoke test for chatgpt-cli.
6
# - Discovers models via: chatgpt --list-models
7
# - Buckets them by *what your CLI will do* (heuristics aligned with GetCapabilities)
8
# - Runs 1 minimal probe per bucket (cheap)
9
# - ALSO probes:
10
#     - the model marked "(current)"
11
#     - the latest dated releases for key families (gpt-5*, gpt-4o, o1)
12
#
13
# Optional env:
14
#   CHATGPT_BIN (default: chatgpt)
15
#   SMOKE_WEB (default: false)       # if true, run a web probe (costs more)
16
#   WEB_CONTEXT_SIZE (default: low)
17
#   MAX_TOKENS (default: 256)        # o1 can need >=256 to avoid "reasoning-only" empties
18
#   TIMEOUT_SECS (default: 30)
19

20
CHATGPT_BIN="${CHATGPT_BIN:-chatgpt}"
21
SMOKE_WEB="${SMOKE_WEB:-false}"
22
WEB_CONTEXT_SIZE="${WEB_CONTEXT_SIZE:-low}"
23
MAX_TOKENS="${MAX_TOKENS:-256}"
24
MAX_TOKENS_PRO="${MAX_TOKENS_PRO:-2048}"
25
TIMEOUT_SECS="${TIMEOUT_SECS:-30}"
26

27
require() { command -v "$1" >/dev/null 2>&1 || { echo "ERROR: missing dependency: $1"; exit 2; }; }
28
require "$CHATGPT_BIN"
29
require awk
30
require sed
31
require grep
32
require sort
33
require tail
34
require head
35
require tr
36

37
PROMPT='Reply with exactly: pong'
38

39
run_with_timeout() {
40
  if command -v timeout >/dev/null 2>&1; then
41
    timeout "$TIMEOUT_SECS" "$@"
42
  elif command -v gtimeout >/dev/null 2>&1; then
43
    gtimeout "$TIMEOUT_SECS" "$@"
44
  else
45
    "$@"
46
  fi
47
}
48

49
# Raw list models output (keeps "(current)")
50
list_models_raw() {
51
  "$CHATGPT_BIN" --list-models 2>/dev/null \
52
    | sed -E 's/\x1B\[[0-9;]*[mK]//g'
53
}
54

55
# Parse model ids from raw output.
56
# Accept lines like:
57
#   - gpt-4o-mini
58
#   * gpt-5 (current)
59
list_models() {
60
  list_models_raw \
61
    | awk '
62
        /^- / { sub(/^- /,""); print; next }
63
        /^\* / {
64
          sub(/^\* /,"")
65
          sub(/ \(current\).*$/,"")
66
          print
67
          next
68
        }
69
      ' \
70
    | sed -e 's/[[:space:]]*$//' \
71
    | grep -v '^$'
72
}
73

74
# Extract the "(current)" model from raw output.
75
pick_current_model() {
76
  list_models_raw \
77
    | awk '
78
        /^\* / {
79
          sub(/^\* /,"")
80
          sub(/ \(current\).*$/,"")
81
          print
82
          exit
83
        }
84
      '
85
}
86

87
normalize() {
88
  tr -d '\r' | sed -E 's/[[:space:]]+/ /g; s/^ +| +$//g' | tr '[:upper:]' '[:lower:]'
89
}
90

91
assert_pong_line() {
92
  # Accept any output that contains a line that is exactly "pong" (case-insensitive),
93
  # ignoring whitespace and other chatter.
94
  printf "%s\n" "$1" \
95
    | tr -d '\r' \
96
    | sed -E 's/^[[:space:]]+|[[:space:]]+$//g' \
97
    | tr '[:upper:]' '[:lower:]' \
98
    | grep -xq 'pong'
99
}
100

101
pass() { echo "✅ $*"; }
102
fail() { echo "❌ $*"; }
103

104
effort_for_model() {
105
  local model="$1"
106
  if echo "$model" | grep -qi 'gpt-5-pro'; then
107
    echo "high"
108
  else
109
    echo ""
110
  fi
111
}
112

113
pick_first() {
114
  # pick first model matching regex from list
115
  local pattern="$1"; shift
116
  printf '%s\n' "$@" | grep -E "$pattern" | head -n 1 || true
117
}
118

119
is_empty() { [[ -z "${1:-}" ]]; }
120

121
# Pick the newest YYYY-MM-DD variant by lexicographic sort (works for ISO dates).
122
pick_latest_dated() {
123
  local pattern="$1"; shift
124
  printf '%s\n' "$@" \
125
    | grep -E "$pattern" \
126
    | grep -v 'search-api' \
127
    | sort \
128
    | tail -n 1 || true
129
}
130

131
# Pick "latest family" preferring dated variants, then chat-latest, then plain name.
132
# Examples:
133
#   pick_latest_family "gpt-5" "${models[@]}"
134
#   pick_latest_family "gpt-4o" "${models[@]}"
135
#   pick_latest_family "o1" "${models[@]}"
136
pick_latest_family() {
137
  local family="$1"; shift
138

139
  # e.g. gpt-5-mini-2025-08-07, gpt-5.2-pro-2025-12-11, gpt-4o-2024-11-20, o1-2024-12-17
140
  local dated
141
  dated="$(pick_latest_dated "^${family}(-[a-z0-9.]+)*-[0-9]{4}-[0-9]{2}-[0-9]{2}$" "$@")"
142
  if [[ -n "$dated" ]]; then
143
    echo "$dated"
144
    return 0
145
  fi
146

147
  # e.g. gpt-5-chat-latest, gpt-5.2-chat-latest
148
  local chat_latest
149
  chat_latest="$(printf '%s\n' "$@" \
150
  | grep -E "^${family}(-[a-z0-9.]+)*-chat-latest$" \
151
  | grep -v 'search-api' \
152
  | head -n 1 || true)"
153
  if [[ -n "$chat_latest" ]]; then
154
    echo "$chat_latest"
155
    return 0
156
  fi
157

158
  # fallback: plain family name if present
159
  printf '%s\n' "$@" | grep -E "^${family}$" | head -n 1 || true
160
}
161

162
run_query() {
163
  local name="$1"
164
  local model="$2"
165
  shift 2
166

167
  local effort
168
  effort="$(effort_for_model "$model")"
169

170
  local max_tokens="$MAX_TOKENS"
171
  if [[ -n "$effort" ]]; then
172
    max_tokens="$MAX_TOKENS_PRO"
173
  fi
174

175
  # Build args ONCE so the debug rerun is identical (+ --debug).
176
  local args=(
177
    --new-thread
178
    --temperature 0
179
    --model "$model"
180
    --max-tokens "$max_tokens"
181
  )
182

183
  if [[ -n "$effort" ]]; then
184
    args+=(--effort "$effort")
185
  fi
186

187
  # Allow extra args like: --web true --web-context-size low
188
  if [[ $# -gt 0 ]]; then
189
    args+=("$@")
190
  fi
191

192
  # Always set the query last.
193
  args+=(--query "$PROMPT")
194

195
  local out="" status=0
196
  set +e
197
  out="$(run_with_timeout "$CHATGPT_BIN" "${args[@]}" 2>&1)"
198
  status=$?
199
  set -e
200

201
  _dump_failure() {
202
    local why="$1"
203
    fail "$name (model=$model) $why (exit=$status)"
204
    echo "meta: effort='${effort:-}' max_tokens=${max_tokens}" >&2
205
    echo "----- RAW OUTPUT -----" >&2
206
    printf "%s\n" "$out" >&2
207
    echo "----------------------" >&2
208

209
    echo "----- DEBUG RERUN -----" >&2
210
    local dbg="" dbg_status=0
211
    set +e
212
    dbg="$(run_with_timeout "$CHATGPT_BIN" "${args[@]}" --debug 2>&1)"
213
    dbg_status=$?
214
    set -e
215
    echo "(debug exit=$dbg_status)" >&2
216
    printf "%s\n" "$dbg" >&2
217
    echo "-----------------------" >&2
218
  }
219

220
  if [[ $status -ne 0 ]]; then
221
    _dump_failure "failed to run"
222
    return 1
223
  fi
224

225
  if [[ -z "$(printf "%s" "$out" | tr -d '[:space:]')" ]]; then
226
    _dump_failure "returned empty output"
227
    return 1
228
  fi
229

230
  if ! assert_pong_line "$out"; then
231
    _dump_failure "did not produce 'pong' as a standalone line"
232
    return 1
233
  fi
234

235
  pass "$name (model=$model)"
236
  return 0
237
}
238

239
main() {
240
  echo "chatgpt-cli smoke test (bucketed + current/latest)"
241
  echo "bin: $CHATGPT_BIN"
242
  echo "max_tokens: $MAX_TOKENS"
243
  echo "web: $SMOKE_WEB (context_size=$WEB_CONTEXT_SIZE)"
244
  echo
245

246
  local models=()
247
  while IFS= read -r line; do
248
    [[ -n "$line" ]] && models+=("$line")
249
  done < <(list_models)
250

251
  if [[ "${#models[@]}" -eq 0 ]]; then
252
    echo "ERROR: no models found from --list-models"
253
    exit 2
254
  fi
255

256
  # ---- Bucket counts (roughly aligned with your GetCapabilities) ----
257
  local count_realtime count_search count_gpt5 count_o1
258
  count_realtime="$(printf '%s\n' "${models[@]}" | grep -c 'realtime' || true)"
259
  count_search="$(printf '%s\n' "${models[@]}" | grep -c -- '-search' || true)"
260
  count_gpt5="$(printf '%s\n' "${models[@]}" | grep -c '^gpt-5' || true)"
261
  count_gpt4="$(printf '%s\n' "${models[@]}" | grep -c '^gpt-4' || true)"
262
  count_gpt3="$(printf '%s\n' "${models[@]}" | grep -c '^gpt-3' || true)"
263
  count_o1="$(printf '%s\n' "${models[@]}" | grep -c '^o1' || true)"
264

265
  echo "Discovered ${#models[@]} model(s)"
266
  echo "  realtime:      $count_realtime"
267
  echo "  search:        $count_search"
268
  echo "  gpt-3*:        $count_gpt3"
269
  echo "  gpt-4*:        $count_gpt4"
270
  echo "  gpt-5*:        $count_gpt5"
271
  echo "  o1*:           $count_o1"
272
  echo
273

274
  # ---- Existing bucket probes (keep these) ----
275

276
  local m_completions
277
  m_completions="$(pick_first '^gpt-4o-mini$' "${models[@]}")"
278
  if is_empty "$m_completions"; then m_completions="$(pick_first '^gpt-4\.1-mini$' "${models[@]}")"; fi
279
  if is_empty "$m_completions"; then m_completions="$(pick_first '^gpt-4o$' "${models[@]}")"; fi
280

281
  local m_responses
282
  m_responses="$(pick_first '^gpt-5-mini$' "${models[@]}")"
283
  if is_empty "$m_responses"; then m_responses="$(pick_first '^gpt-5$' "${models[@]}")"; fi
284
  if is_empty "$m_responses"; then m_responses="$(pick_first '^gpt-5(\.|-).*' "${models[@]}")"; fi
285
  if is_empty "$m_responses"; then m_responses="$(pick_first '^o1-pro$' "${models[@]}")"; fi
286

287
  local m_search
288
  m_search="$(pick_first '^gpt-4o-mini-search-preview$' "${models[@]}")"
289
  if is_empty "$m_search"; then m_search="$(pick_first '^gpt-4o-search-preview$' "${models[@]}")"; fi
290
  if is_empty "$m_search"; then m_search="$(pick_first 'search' "${models[@]}")"; fi
291

292
  local m_o1
293
  m_o1="$(pick_first '^o1-mini$' "${models[@]}")"
294
  if is_empty "$m_o1"; then m_o1="$(pick_first '^o1$' "${models[@]}")"; fi
295

296
  local m_web=""
297
  if [[ "$SMOKE_WEB" == "true" ]]; then
298
    m_web="$(pick_first '^gpt-5-mini$' "${models[@]}")"
299
    if is_empty "$m_web"; then m_web="$(pick_first '^gpt-5$' "${models[@]}")"; fi
300
  fi
301

302
  # ---- New probes: current + latest releases ----
303

304
  local m_current
305
  m_current="$(pick_current_model || true)"
306

307
  local m_latest_gpt5
308
  m_latest_gpt5="$(pick_latest_family 'gpt-5' "${models[@]}")"
309

310
  local m_latest_4o
311
  m_latest_4o="$(pick_latest_family 'gpt-4o' "${models[@]}")"
312

313
  local m_latest_o1
314
  m_latest_o1="$(pick_latest_family 'o1' "${models[@]}")"
315

316
  local failures=0
317
  local ran=0
318

319
  # Existing probes
320
  if is_empty "$m_completions"; then
321
    echo "WARN: no completions-ish model found (skipping)"
322
  else
323
    ran=$((ran+1))
324
    run_query "probe:completions" "$m_completions" || failures=$((failures+1))
325
  fi
326

327
  if is_empty "$m_responses"; then
328
    echo "WARN: no responses-ish model found (skipping)"
329
  else
330
    ran=$((ran+1))
331
    run_query "probe:responses" "$m_responses" || failures=$((failures+1))
332
  fi
333

334
  if is_empty "$m_search"; then
335
    echo "WARN: no search-preview model found (skipping)"
336
  else
337
    ran=$((ran+1))
338
    run_query "probe:search-preview" "$m_search" || failures=$((failures+1))
339
  fi
340

341
  if is_empty "$m_o1"; then
342
    echo "WARN: no o1 model found (skipping)"
343
  else
344
    ran=$((ran+1))
345
    run_query "probe:o1" "$m_o1" || failures=$((failures+1))
346
  fi
347

348
  if [[ "$SMOKE_WEB" == "true" ]]; then
349
    if is_empty "$m_web"; then
350
      echo "WARN: no gpt-5 model found for web probe (skipping)"
351
    else
352
      ran=$((ran+1))
353
      run_query "probe:web" "$m_web" --web true --web-context-size "$WEB_CONTEXT_SIZE" || failures=$((failures+1))
354
    fi
355
  fi
356

357
  # New probes
358
  if is_empty "$m_current"; then
359
    echo "WARN: could not detect '(current)' model (skipping probe:current)"
360
  else
361
    ran=$((ran+1))
362
    run_query "probe:current" "$m_current" || failures=$((failures+1))
363
  fi
364

365
  # Only run latest probes if they add coverage (avoid duplicates).
366
  if ! is_empty "$m_latest_gpt5" && [[ "$m_latest_gpt5" != "$m_responses" ]]; then
367
    ran=$((ran+1))
368
    run_query "probe:latest-gpt5" "$m_latest_gpt5" || failures=$((failures+1))
369
  fi
370

371
  if ! is_empty "$m_latest_4o" && [[ "$m_latest_4o" != "$m_completions" ]]; then
372
    ran=$((ran+1))
373
    run_query "probe:latest-4o" "$m_latest_4o" || failures=$((failures+1))
374
  fi
375

376
  if ! is_empty "$m_latest_o1" && [[ "$m_latest_o1" != "$m_o1" ]]; then
377
    ran=$((ran+1))
378
    run_query "probe:latest-o1" "$m_latest_o1" || failures=$((failures+1))
379
  fi
380

381
  echo
382
  echo "Ran $ran probe(s)."
383

384
  if [[ "$failures" -gt 0 ]]; then
385
    echo "Smoke test finished with $failures failure(s)."
386
    exit 1
387
  fi
388

389
  echo "Smoke test passed."
390
}
391

392
main "$@"
393

394
Product

Resources

Company