diff --git a/kvcache/causal_test.go b/kvcache/causal_test.go index bd7d0ae8b..84d8de54e 100644 --- a/kvcache/causal_test.go +++ b/kvcache/causal_test.go @@ -309,7 +309,7 @@ func (b *testBackend) SystemInfo() string { type testContext struct{} -func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor { +func (c *testContext) Empty(dtype ml.DType, shape ...int) ml.Tensor { total := 0 if len(shape) > 0 { @@ -322,8 +322,12 @@ func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor { return &testTensor{dtype: dtype, elementSize: 4, data: make([]float32, total), shape: shape} } +func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor { + return c.Empty(dtype, shape...) +} + func (c *testContext) FromFloatSlice(s []float32, shape ...int) (ml.Tensor, error) { - t := c.Zeros(ml.DTypeF32, shape...).(*testTensor) + t := c.Empty(ml.DTypeF32, shape...).(*testTensor) copy(t.data, s) @@ -391,7 +395,7 @@ func (t *testTensor) Floats() []float32 { } func (t *testTensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor { - out := ctx.Zeros(t.DType(), t.Shape()...).(*testTensor) + out := ctx.Empty(t.DType(), t.Shape()...).(*testTensor) for i := range out.data { out.data[i] = t.data[i] + t2.(*testTensor).data[i] @@ -468,7 +472,7 @@ func (t *testTensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor { context := &testContext{} - view := context.Zeros(t.dtype, s...).(*testTensor) + view := context.Empty(t.dtype, s...).(*testTensor) view.data = t.data[offset : offset+len(view.data)] return view diff --git a/kvcache/encoder.go b/kvcache/encoder.go index c55da2b4a..39b4cdfb6 100644 --- a/kvcache/encoder.go +++ b/kvcache/encoder.go @@ -105,8 +105,8 @@ func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) { } if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil { - c.keys[c.curLayer] = c.cacheCtx.Zeros(key.DType(), key.Shape()...) - c.values[c.curLayer] = c.cacheCtx.Zeros(value.DType(), value.Shape()...) + c.keys[c.curLayer] = c.cacheCtx.Empty(key.DType(), key.Shape()...) + c.values[c.curLayer] = c.cacheCtx.Empty(value.DType(), value.Shape()...) } ctx.Forward( diff --git a/ml/backend.go b/ml/backend.go index ccab915c7..de2725c02 100644 --- a/ml/backend.go +++ b/ml/backend.go @@ -82,6 +82,7 @@ func NewBackend(f *os.File, params BackendParams) (Backend, error) { } type Context interface { + Empty(dtype DType, shape ...int) Tensor Zeros(dtype DType, shape ...int) Tensor FromFloatSlice(s []float32, shape ...int) (Tensor, error) FromIntSlice(s []int32, shape ...int) (Tensor, error) @@ -195,7 +196,7 @@ func Dump(ctx Context, t Tensor, opts ...DumpOptions) string { return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32) }) case DTypeF16: - f32 := ctx.Zeros(DTypeF32, t.Shape()...) + f32 := ctx.Empty(DTypeF32, t.Shape()...) f32 = t.Copy(ctx, f32) return dump[[]float32](ctx, f32, opts[0].Items, func(f float32) string { return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32) diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go index 24943111c..2c7e856cc 100644 --- a/ml/backend/ggml/ggml.go +++ b/ml/backend/ggml/ggml.go @@ -304,7 +304,7 @@ func shapeToGGML(shape []int) *C.int64_t { return &sh[0] } -func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor { +func newTensor(ctx Context, dtype ml.DType, zero bool, shape []int) ml.Tensor { if len(shape) < 1 || len(shape) > 4 { panic("unsupported number of dimensions") } @@ -318,19 +318,29 @@ func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor { var t *C.struct_ggml_tensor switch dtype { case ml.DTypeF32: - t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F32, C.int(len(shape)), shapeToGGML(shape)) + t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_F32, C.int(len(shape)), shapeToGGML(shape)) case ml.DTypeF16: - t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F16, C.int(len(shape)), shapeToGGML(shape)) + t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_F16, C.int(len(shape)), shapeToGGML(shape)) case ml.DTypeI32: - t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_I32, C.int(len(shape)), shapeToGGML(shape)) + t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_I32, C.int(len(shape)), shapeToGGML(shape)) default: panic("unsupported dtype") } - b := C.ggml_backend_alloc_buffer(c.backend, C.ggml_nbytes(t)) + b := C.ggml_backend_alloc_buffer(ctx.backend, C.ggml_nbytes(t)) C.ggml_backend_tensor_alloc(b, t, C.ggml_backend_buffer_get_base(b)) - C.ggml_set_zero(t) - return &Tensor{b: c.b, t: t} + if zero { + C.ggml_set_zero(t) + } + return &Tensor{b: ctx.b, t: t} +} + +func (c Context) Empty(dtype ml.DType, shape ...int) ml.Tensor { + return newTensor(c, dtype, false, shape) +} + +func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor { + return newTensor(c, dtype, true, shape) } func fromSlice[S ~[]E, E float32 | int32](ctx Context, s S, shape []int, dtype uint32) (ml.Tensor, error) {