From a6ef73f4f26a22cc605516113625a404bd064250 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 1 May 2025 17:06:53 -0700 Subject: [PATCH] ggml: Fix race that resulted in "context canceled" when loading Successfully completing processing with an errgroup cancels the associated context. However, we also have a goroutine that is checking for cancelation of the context. As a result, there is a race where the goroutine can pick up the cancelation and report an error, replacing the sucessful error message. To avoid that, this replaces the goroutine with a cancelation check when we are reading files. This also has the advantage of stopping all reads relatively quickly on error and also ensuring that there are no outstanding I/O operations when we return in this case. The downside is that if a file read blocks forever (for example, over the network) then cancelation of the context effectively won't be honored. However, this is also true for other smaller files we read and the tensors are read in small chunks (128K), so it's consistent and better on balance overall. --- ml/backend/ggml/ggml.go | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 177ac6fd0..cd760643c 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -341,6 +341,11 @@ func New(ctx context.Context, r *os.File, params ml.BackendParams) (ml.Backend, var s uint64 for s < t.Size() { + // Stop if either the parent context has been canceled or if any of the other tensors returned an error + if err := ctx.Err(); err != nil { + return err + } + n, err := io.ReadFull(sr, bts[:min(len(bts), int(t.Size()-s))]) if err != nil { slog.Warn("file read error", "file", r.Name(), "error", err) @@ -363,14 +368,6 @@ func New(ctx context.Context, r *os.File, params ml.BackendParams) (ml.Backend, }) } - // start a goroutine to cancel the errgroup if the parent context is done - go func() { - <-ctx.Done() - g.Go(func() error { - return ctx.Err() - }) - }() - if err := g.Wait(); err != nil { return nil, err }