CoCalc -- pool_starvation

GitHub Repository: projectdiscovery/nuclei
Path: blob/dev/pkg/js/compiler/pool_starvation_test.go
⁴⁵³⁸ views
1
package compiler
2

3
import (
4
	"context"
5
	"sync"
6
	"testing"
7
	"time"
8

9
	"github.com/stretchr/testify/require"
10

11
	syncutil "github.com/projectdiscovery/utils/sync"
12
)
13

14
// TestPoolSlotStarvation reproduces the core bug from issue #6894:
15
// zombie goroutines from timed-out JS executions hold pool slots
16
// indefinitely, causing subsequent executions to fail.
17
//
18
// The flow:
19
//  1. ExecFuncWithTwoReturns wraps ExecuteProgram in a goroutine with a deadline.
20
//  2. Inside ExecuteProgram, pool.Add() acquires a slot, defer pool.Done() is set.
21
//  3. The JS script makes a network call using context.TODO() (no deadline).
22
//  4. The deadline fires and ExecFuncWithTwoReturns returns the deadline error.
23
//  5. The goroutine is STILL running (zombie), holding the pool slot via defer.
24
//  6. With enough zombies, all slots are consumed and new executions time out
25
//     waiting for a slot that will never be released.
26
func TestPoolSlotStarvation(t *testing.T) {
27
	const poolSize = 3
28
	pool, err := syncutil.New(syncutil.WithSize(poolSize))
29
	require.NoError(t, err)
30

31
	// Simulate zombies: goroutines that acquire a slot, then block for a
32
	// long time (as if stuck on a network call with context.TODO()).
33
	// The caller "abandons" them via a short deadline.
34
	var zombieWg sync.WaitGroup
35
	for i := range poolSize {
36
		zombieWg.Add(1)
37
		go func(idx int) {
38
			defer zombieWg.Done()
39
			// This is what happens inside executeWithoutPooling/executeWithPoolingProgram:
40
			pool.Add()
41
			defer pool.Done()
42
			// Simulate a stuck network call (15s in the original report).
43
			time.Sleep(10 * time.Second)
44
		}(i)
45
	}
46

47
	// Give zombies time to acquire their slots.
48
	time.Sleep(100 * time.Millisecond)
49

50
	// All slots are held by zombies. Try to acquire a new slot with a
51
	// deadline - this should fail because no slots are available and
52
	// zombies won't release them for ~10s.
53
	ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)
54
	defer cancel()
55

56
	start := time.Now()
57
	err = pool.AddWithContext(ctx)
58
	elapsed := time.Since(start)
59

60
	// This demonstrates the starvation: the pool is fully exhausted by zombies,
61
	// and the new acquisition times out.
62
	require.Error(t, err, "should fail, all slots held by zombie goroutines")
63
	require.Less(t, elapsed, 2*time.Second, "should fail fast at deadline, not block forever")
64
	t.Logf("Pool starvation confirmed: new slot acquisition failed after %v (pool exhausted by %d zombies)", elapsed, poolSize)
65
}
66

67
// TestWatchdogPreventsStarvation demonstrates the fix: a watchdog goroutine
68
// releases pool slots when the deadline expires, even if the zombie is still
69
// running. This is the core of the fix in PR #6896.
70
func TestWatchdogPreventsStarvation(t *testing.T) {
71
	const poolSize = 3
72
	pool, err := syncutil.New(syncutil.WithSize(poolSize))
73
	require.NoError(t, err)
74

75
	// Fill all slots with "zombies" that have a 100ms deadline but block for
76
	// 10s. The watchdog pattern releases each slot when the deadline fires.
77
	for i := range poolSize {
78
		ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
79
		defer cancel()
80

81
		require.NoError(t, pool.AddWithContext(ctx), "initial acquisition %d", i)
82

83
		watchdogDone := make(chan struct{})
84
		var once sync.Once
85

86
		releaseSlot := func() {
87
			once.Do(func() { pool.Done() })
88
		}
89

90
		// Watchdog: free the slot when deadline expires.
91
		go func() {
92
			select {
93
			case <-ctx.Done():
94
				releaseSlot()
95
			case <-watchdogDone:
96
			}
97
		}()
98

99
		// Zombie worker: blocks for 10s but the watchdog will free its slot.
100
		go func() {
101
			defer func() {
102
				close(watchdogDone)
103
				releaseSlot()
104
			}()
105
			time.Sleep(10 * time.Second)
106
		}()
107
	}
108

109
	// Wait for all deadlines to fire and watchdogs to release slots.
110
	time.Sleep(200 * time.Millisecond)
111

112
	// All slots should be free now, so new acquisitions should work.
113
	for i := range poolSize {
114
		ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
115
		defer cancel()
116
		require.NoError(t, pool.AddWithContext(ctx),
117
			"post-recovery acquisition %d/%d (pool should no longer be starved)", i+1, poolSize)
118
		pool.Done()
119
	}
120

121
	t.Log("Watchdog fix confirmed: all slots recovered after zombie deadline expiry")
122
}
123

124
Product

Resources

Company