Path: blob/dev/pkg/js/compiler/pool_starvation_test.go
4538 views
package compiler12import (3"context"4"sync"5"testing"6"time"78"github.com/stretchr/testify/require"910syncutil "github.com/projectdiscovery/utils/sync"11)1213// TestPoolSlotStarvation reproduces the core bug from issue #6894:14// zombie goroutines from timed-out JS executions hold pool slots15// indefinitely, causing subsequent executions to fail.16//17// The flow:18// 1. ExecFuncWithTwoReturns wraps ExecuteProgram in a goroutine with a deadline.19// 2. Inside ExecuteProgram, pool.Add() acquires a slot, defer pool.Done() is set.20// 3. The JS script makes a network call using context.TODO() (no deadline).21// 4. The deadline fires and ExecFuncWithTwoReturns returns the deadline error.22// 5. The goroutine is STILL running (zombie), holding the pool slot via defer.23// 6. With enough zombies, all slots are consumed and new executions time out24// waiting for a slot that will never be released.25func TestPoolSlotStarvation(t *testing.T) {26const poolSize = 327pool, err := syncutil.New(syncutil.WithSize(poolSize))28require.NoError(t, err)2930// Simulate zombies: goroutines that acquire a slot, then block for a31// long time (as if stuck on a network call with context.TODO()).32// The caller "abandons" them via a short deadline.33var zombieWg sync.WaitGroup34for i := range poolSize {35zombieWg.Add(1)36go func(idx int) {37defer zombieWg.Done()38// This is what happens inside executeWithoutPooling/executeWithPoolingProgram:39pool.Add()40defer pool.Done()41// Simulate a stuck network call (15s in the original report).42time.Sleep(10 * time.Second)43}(i)44}4546// Give zombies time to acquire their slots.47time.Sleep(100 * time.Millisecond)4849// All slots are held by zombies. Try to acquire a new slot with a50// deadline - this should fail because no slots are available and51// zombies won't release them for ~10s.52ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond)53defer cancel()5455start := time.Now()56err = pool.AddWithContext(ctx)57elapsed := time.Since(start)5859// This demonstrates the starvation: the pool is fully exhausted by zombies,60// and the new acquisition times out.61require.Error(t, err, "should fail, all slots held by zombie goroutines")62require.Less(t, elapsed, 2*time.Second, "should fail fast at deadline, not block forever")63t.Logf("Pool starvation confirmed: new slot acquisition failed after %v (pool exhausted by %d zombies)", elapsed, poolSize)64}6566// TestWatchdogPreventsStarvation demonstrates the fix: a watchdog goroutine67// releases pool slots when the deadline expires, even if the zombie is still68// running. This is the core of the fix in PR #6896.69func TestWatchdogPreventsStarvation(t *testing.T) {70const poolSize = 371pool, err := syncutil.New(syncutil.WithSize(poolSize))72require.NoError(t, err)7374// Fill all slots with "zombies" that have a 100ms deadline but block for75// 10s. The watchdog pattern releases each slot when the deadline fires.76for i := range poolSize {77ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)78defer cancel()7980require.NoError(t, pool.AddWithContext(ctx), "initial acquisition %d", i)8182watchdogDone := make(chan struct{})83var once sync.Once8485releaseSlot := func() {86once.Do(func() { pool.Done() })87}8889// Watchdog: free the slot when deadline expires.90go func() {91select {92case <-ctx.Done():93releaseSlot()94case <-watchdogDone:95}96}()9798// Zombie worker: blocks for 10s but the watchdog will free its slot.99go func() {100defer func() {101close(watchdogDone)102releaseSlot()103}()104time.Sleep(10 * time.Second)105}()106}107108// Wait for all deadlines to fire and watchdogs to release slots.109time.Sleep(200 * time.Millisecond)110111// All slots should be free now, so new acquisitions should work.112for i := range poolSize {113ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)114defer cancel()115require.NoError(t, pool.AddWithContext(ctx),116"post-recovery acquisition %d/%d (pool should no longer be starved)", i+1, poolSize)117pool.Done()118}119120t.Log("Watchdog fix confirmed: all slots recovered after zombie deadline expiry")121}122123124