Skip to content

Commit 7fad2c9

Browse files
adonovangopherbot
authored andcommitted
errgroup: revert propagation of panics
This change reverts CL 644575, which caused panics in the f() call after group.Go(f) to be propagated to the subsequent group.Wait call. This caused more problems than it solved. Also: - preserve some of the doc comment wording of Group.Go. - leave a "tsunami stone" comment in Group.Go. Fixes golang/go#53757 Updates golang/go#74275 Updates golang/go#74304 Updates golang/go#74306 Change-Id: I6e3992510944db7d69c72eaf241aedf8b84e62dd Reviewed-on: https://go-review.googlesource.com/c/sync/+/682935 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: qiu laidongfeng2 <[email protected]> Reviewed-by: Junyang Shao <[email protected]> Reviewed-by: Sean Liao <[email protected]> Auto-Submit: Sean Liao <[email protected]>
1 parent 8a14946 commit 7fad2c9

File tree

2 files changed

+31
-151
lines changed

2 files changed

+31
-151
lines changed

errgroup/errgroup.go

Lines changed: 31 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ package errgroup
1212
import (
1313
"context"
1414
"fmt"
15-
"runtime"
16-
"runtime/debug"
1715
"sync"
1816
)
1917

@@ -33,10 +31,6 @@ type Group struct {
3331

3432
errOnce sync.Once
3533
err error
36-
37-
mu sync.Mutex
38-
panicValue any // = PanicError | PanicValue; non-nil if some Group.Go goroutine panicked.
39-
abnormal bool // some Group.Go goroutine terminated abnormally (panic or goexit).
4034
}
4135

4236
func (g *Group) done() {
@@ -56,22 +50,13 @@ func WithContext(ctx context.Context) (*Group, context.Context) {
5650
return &Group{cancel: cancel}, ctx
5751
}
5852

59-
// Wait blocks until all function calls from the Go method have returned
60-
// normally, then returns the first non-nil error (if any) from them.
61-
//
62-
// If any of the calls panics, Wait panics with a [PanicValue];
63-
// and if any of them calls [runtime.Goexit], Wait calls runtime.Goexit.
53+
// Wait blocks until all function calls from the Go method have returned, then
54+
// returns the first non-nil error (if any) from them.
6455
func (g *Group) Wait() error {
6556
g.wg.Wait()
6657
if g.cancel != nil {
6758
g.cancel(g.err)
6859
}
69-
if g.panicValue != nil {
70-
panic(g.panicValue)
71-
}
72-
if g.abnormal {
73-
runtime.Goexit()
74-
}
7560
return g.err
7661
}
7762

@@ -81,53 +66,31 @@ func (g *Group) Wait() error {
8166
// It blocks until the new goroutine can be added without the number of
8267
// goroutines in the group exceeding the configured limit.
8368
//
84-
// The first goroutine in the group that returns a non-nil error, panics, or
85-
// invokes [runtime.Goexit] will cancel the associated Context, if any.
69+
// The first goroutine in the group that returns a non-nil error will
70+
// cancel the associated Context, if any. The error will be returned
71+
// by Wait.
8672
func (g *Group) Go(f func() error) {
8773
if g.sem != nil {
8874
g.sem <- token{}
8975
}
9076

91-
g.add(f)
92-
}
93-
94-
func (g *Group) add(f func() error) {
9577
g.wg.Add(1)
9678
go func() {
9779
defer g.done()
98-
normalReturn := false
99-
defer func() {
100-
if normalReturn {
101-
return
102-
}
103-
v := recover()
104-
g.mu.Lock()
105-
defer g.mu.Unlock()
106-
if !g.abnormal {
107-
if g.cancel != nil {
108-
g.cancel(g.err)
109-
}
110-
g.abnormal = true
111-
}
112-
if v != nil && g.panicValue == nil {
113-
switch v := v.(type) {
114-
case error:
115-
g.panicValue = PanicError{
116-
Recovered: v,
117-
Stack: debug.Stack(),
118-
}
119-
default:
120-
g.panicValue = PanicValue{
121-
Recovered: v,
122-
Stack: debug.Stack(),
123-
}
124-
}
125-
}
126-
}()
12780

128-
err := f()
129-
normalReturn = true
130-
if err != nil {
81+
// It is tempting to propagate panics from f()
82+
// up to the goroutine that calls Wait, but
83+
// it creates more problems than it solves:
84+
// - it delays panics arbitrarily,
85+
// making bugs harder to detect;
86+
// - it turns f's panic stack into a mere value,
87+
// hiding it from crash-monitoring tools;
88+
// - it risks deadlocks that hide the panic entirely,
89+
// if f's panic leaves the program in a state
90+
// that prevents the Wait call from being reached.
91+
// See #53757, #74275, #74304, #74306.
92+
93+
if err := f(); err != nil {
13194
g.errOnce.Do(func() {
13295
g.err = err
13396
if g.cancel != nil {
@@ -152,7 +115,19 @@ func (g *Group) TryGo(f func() error) bool {
152115
}
153116
}
154117

155-
g.add(f)
118+
g.wg.Add(1)
119+
go func() {
120+
defer g.done()
121+
122+
if err := f(); err != nil {
123+
g.errOnce.Do(func() {
124+
g.err = err
125+
if g.cancel != nil {
126+
g.cancel(g.err)
127+
}
128+
})
129+
}
130+
}()
156131
return true
157132
}
158133

@@ -174,34 +149,3 @@ func (g *Group) SetLimit(n int) {
174149
}
175150
g.sem = make(chan token, n)
176151
}
177-
178-
// PanicError wraps an error recovered from an unhandled panic
179-
// when calling a function passed to Go or TryGo.
180-
type PanicError struct {
181-
Recovered error
182-
Stack []byte // result of call to [debug.Stack]
183-
}
184-
185-
func (p PanicError) Error() string {
186-
if len(p.Stack) > 0 {
187-
return fmt.Sprintf("recovered from errgroup.Group: %v\n%s", p.Recovered, p.Stack)
188-
}
189-
return fmt.Sprintf("recovered from errgroup.Group: %v", p.Recovered)
190-
}
191-
192-
func (p PanicError) Unwrap() error { return p.Recovered }
193-
194-
// PanicValue wraps a value that does not implement the error interface,
195-
// recovered from an unhandled panic when calling a function passed to Go or
196-
// TryGo.
197-
type PanicValue struct {
198-
Recovered any
199-
Stack []byte // result of call to [debug.Stack]
200-
}
201-
202-
func (p PanicValue) String() string {
203-
if len(p.Stack) > 0 {
204-
return fmt.Sprintf("recovered from errgroup.Group: %v\n%s", p.Recovered, p.Stack)
205-
}
206-
return fmt.Sprintf("recovered from errgroup.Group: %v", p.Recovered)
207-
}

errgroup/errgroup_test.go

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"fmt"
1111
"net/http"
1212
"os"
13-
"strings"
1413
"sync/atomic"
1514
"testing"
1615
"time"
@@ -290,69 +289,6 @@ func TestCancelCause(t *testing.T) {
290289
}
291290
}
292291

293-
func TestPanic(t *testing.T) {
294-
t.Run("error", func(t *testing.T) {
295-
g := &errgroup.Group{}
296-
p := errors.New("")
297-
g.Go(func() error {
298-
panic(p)
299-
})
300-
defer func() {
301-
err := recover()
302-
if err == nil {
303-
t.Fatalf("should propagate panic through Wait")
304-
}
305-
pe, ok := err.(errgroup.PanicError)
306-
if !ok {
307-
t.Fatalf("type should is errgroup.PanicError, but is %T", err)
308-
}
309-
if pe.Recovered != p {
310-
t.Fatalf("got %v, want %v", pe.Recovered, p)
311-
}
312-
if !strings.Contains(pe.Error(), "TestPanic.func") {
313-
t.Log(pe.Error())
314-
t.Fatalf("stack trace incomplete, does not contain TestPanic.func")
315-
}
316-
}()
317-
g.Wait()
318-
})
319-
t.Run("any", func(t *testing.T) {
320-
g := &errgroup.Group{}
321-
g.Go(func() error {
322-
panic(1)
323-
})
324-
defer func() {
325-
err := recover()
326-
if err == nil {
327-
t.Fatalf("should propagate panic through Wait")
328-
}
329-
pe, ok := err.(errgroup.PanicValue)
330-
if !ok {
331-
t.Fatalf("type should is errgroup.PanicValue, but is %T", err)
332-
}
333-
if pe.Recovered != 1 {
334-
t.Fatalf("got %v, want %v", pe.Recovered, 1)
335-
}
336-
if !strings.Contains(string(pe.Stack), "TestPanic.func") {
337-
t.Log(string(pe.Stack))
338-
t.Fatalf("stack trace incomplete")
339-
}
340-
}()
341-
g.Wait()
342-
})
343-
}
344-
345-
func TestGoexit(t *testing.T) {
346-
g := &errgroup.Group{}
347-
g.Go(func() error {
348-
t.Skip()
349-
t.Fatalf("Goexit fail")
350-
return nil
351-
})
352-
g.Wait()
353-
t.Fatalf("should call runtime.Goexit from Wait")
354-
}
355-
356292
func BenchmarkGo(b *testing.B) {
357293
fn := func() {}
358294
g := &errgroup.Group{}

0 commit comments

Comments
 (0)