ml: Empty tensor constructor for tensors

In cases where we allocate a tensor and then fully overwrite it with copied data, it is wasteful to first zero out the memory.
2025-02-28 17:48:07 -08:00 · 2025-02-28 17:48:07 -08:00 · ee141cc821
commit ee141cc821
parent 55e5776c44
4 changed files with 29 additions and 14 deletions
--- a/kvcache/causal_test.go
+++ b/kvcache/causal_test.go
@ -309,7 +309,7 @@ func (b *testBackend) SystemInfo() string {
 type testContext struct{}
-func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
+func (c *testContext) Empty(dtype ml.DType, shape ...int) ml.Tensor {
 	total := 0
 	if len(shape) > 0 {
@ -322,8 +322,12 @@ func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
 	return &testTensor{dtype: dtype, elementSize: 4, data: make([]float32, total), shape: shape}
 }
 func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
 	return c.Empty(dtype, shape...)
 }
 func (c *testContext) FromFloatSlice(s []float32, shape ...int) (ml.Tensor, error) {
-	t := c.Zeros(ml.DTypeF32, shape...).(*testTensor)
+	t := c.Empty(ml.DTypeF32, shape...).(*testTensor)
 	copy(t.data, s)
@ -391,7 +395,7 @@ func (t *testTensor) Floats() []float32 {
 }
 func (t *testTensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
-	out := ctx.Zeros(t.DType(), t.Shape()...).(*testTensor)
+	out := ctx.Empty(t.DType(), t.Shape()...).(*testTensor)
 	for i := range out.data {
 		out.data[i] = t.data[i] + t2.(*testTensor).data[i]
@ -468,7 +472,7 @@ func (t *testTensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
 	context := &testContext{}
-	view := context.Zeros(t.dtype, s...).(*testTensor)
+	view := context.Empty(t.dtype, s...).(*testTensor)
 	view.data = t.data[offset : offset+len(view.data)]
 	return view
--- a/kvcache/encoder.go
+++ b/kvcache/encoder.go
@ -105,8 +105,8 @@ func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) {
 	}
 	if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
-		c.keys[c.curLayer] = c.cacheCtx.Zeros(key.DType(), key.Shape()...)
+		c.keys[c.curLayer] = c.cacheCtx.Empty(key.DType(), key.Shape()...)
-		c.values[c.curLayer] = c.cacheCtx.Zeros(value.DType(), value.Shape()...)
+		c.values[c.curLayer] = c.cacheCtx.Empty(value.DType(), value.Shape()...)
 	}
 	ctx.Forward(
--- a/ml/backend.go
+++ b/ml/backend.go
@ -82,6 +82,7 @@ func NewBackend(f *os.File, params BackendParams) (Backend, error) {
 }
 type Context interface {
 	Empty(dtype DType, shape ...int) Tensor
 	Zeros(dtype DType, shape ...int) Tensor
 	FromFloatSlice(s []float32, shape ...int) (Tensor, error)
 	FromIntSlice(s []int32, shape ...int) (Tensor, error)
@ -195,7 +196,7 @@ func Dump(ctx Context, t Tensor, opts ...DumpOptions) string {
 			return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32)
 		})
 	case DTypeF16:
-		f32 := ctx.Zeros(DTypeF32, t.Shape()...)
+		f32 := ctx.Empty(DTypeF32, t.Shape()...)
 		f32 = t.Copy(ctx, f32)
 		return dump[[]float32](ctx, f32, opts[0].Items, func(f float32) string {
 			return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32)
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@ -304,7 +304,7 @@ func shapeToGGML(shape []int) *C.int64_t {
 	return &sh[0]
 }
-func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
+func newTensor(ctx Context, dtype ml.DType, zero bool, shape []int) ml.Tensor {
 	if len(shape) < 1 || len(shape) > 4 {
 		panic("unsupported number of dimensions")
 	}
@ -318,19 +318,29 @@ func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
 	var t *C.struct_ggml_tensor
 	switch dtype {
 	case ml.DTypeF32:
-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F32, C.int(len(shape)), shapeToGGML(shape))
+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_F32, C.int(len(shape)), shapeToGGML(shape))
 	case ml.DTypeF16:
-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F16, C.int(len(shape)), shapeToGGML(shape))
+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_F16, C.int(len(shape)), shapeToGGML(shape))
 	case ml.DTypeI32:
-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_I32, C.int(len(shape)), shapeToGGML(shape))
+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_I32, C.int(len(shape)), shapeToGGML(shape))
 	default:
 		panic("unsupported dtype")
 	}
-	b := C.ggml_backend_alloc_buffer(c.backend, C.ggml_nbytes(t))
+	b := C.ggml_backend_alloc_buffer(ctx.backend, C.ggml_nbytes(t))
 	C.ggml_backend_tensor_alloc(b, t, C.ggml_backend_buffer_get_base(b))
-	C.ggml_set_zero(t)
+	if zero {
-	return &Tensor{b: c.b, t: t}
+		C.ggml_set_zero(t)
 	}
 	return &Tensor{b: ctx.b, t: t}
 }
 func (c Context) Empty(dtype ml.DType, shape ...int) ml.Tensor {
 	return newTensor(c, dtype, false, shape)
 }
 func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
 	return newTensor(c, dtype, true, shape)
 }
 func fromSlice[S ~[]E, E float32 | int32](ctx Context, s S, shape []int, dtype uint32) (ml.Tensor, error) {