diff --git a/kvcache/causal_test.go b/kvcache/causal_test.go
index bd7d0ae8b..84d8de54e 100644
--- a/kvcache/causal_test.go
+++ b/kvcache/causal_test.go
@@ -309,7 +309,7 @@ func (b *testBackend) SystemInfo() string {
 
 type testContext struct{}
 
-func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
+func (c *testContext) Empty(dtype ml.DType, shape ...int) ml.Tensor {
 	total := 0
 
 	if len(shape) > 0 {
@@ -322,8 +322,12 @@ func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
 	return &testTensor{dtype: dtype, elementSize: 4, data: make([]float32, total), shape: shape}
 }
 
+func (c *testContext) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
+	return c.Empty(dtype, shape...)
+}
+
 func (c *testContext) FromFloatSlice(s []float32, shape ...int) (ml.Tensor, error) {
-	t := c.Zeros(ml.DTypeF32, shape...).(*testTensor)
+	t := c.Empty(ml.DTypeF32, shape...).(*testTensor)
 
 	copy(t.data, s)
 
@@ -391,7 +395,7 @@ func (t *testTensor) Floats() []float32 {
 }
 
 func (t *testTensor) Add(ctx ml.Context, t2 ml.Tensor) ml.Tensor {
-	out := ctx.Zeros(t.DType(), t.Shape()...).(*testTensor)
+	out := ctx.Empty(t.DType(), t.Shape()...).(*testTensor)
 
 	for i := range out.data {
 		out.data[i] = t.data[i] + t2.(*testTensor).data[i]
@@ -468,7 +472,7 @@ func (t *testTensor) View(ctx ml.Context, offset int, shape ...int) ml.Tensor {
 
 	context := &testContext{}
 
-	view := context.Zeros(t.dtype, s...).(*testTensor)
+	view := context.Empty(t.dtype, s...).(*testTensor)
 	view.data = t.data[offset : offset+len(view.data)]
 
 	return view
diff --git a/kvcache/encoder.go b/kvcache/encoder.go
index c55da2b4a..39b4cdfb6 100644
--- a/kvcache/encoder.go
+++ b/kvcache/encoder.go
@@ -105,8 +105,8 @@ func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) {
 	}
 
 	if c.keys[c.curLayer] == nil || c.values[c.curLayer] == nil {
-		c.keys[c.curLayer] = c.cacheCtx.Zeros(key.DType(), key.Shape()...)
-		c.values[c.curLayer] = c.cacheCtx.Zeros(value.DType(), value.Shape()...)
+		c.keys[c.curLayer] = c.cacheCtx.Empty(key.DType(), key.Shape()...)
+		c.values[c.curLayer] = c.cacheCtx.Empty(value.DType(), value.Shape()...)
 	}
 
 	ctx.Forward(
diff --git a/ml/backend.go b/ml/backend.go
index ccab915c7..de2725c02 100644
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -82,6 +82,7 @@ func NewBackend(f *os.File, params BackendParams) (Backend, error) {
 }
 
 type Context interface {
+	Empty(dtype DType, shape ...int) Tensor
 	Zeros(dtype DType, shape ...int) Tensor
 	FromFloatSlice(s []float32, shape ...int) (Tensor, error)
 	FromIntSlice(s []int32, shape ...int) (Tensor, error)
@@ -195,7 +196,7 @@ func Dump(ctx Context, t Tensor, opts ...DumpOptions) string {
 			return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32)
 		})
 	case DTypeF16:
-		f32 := ctx.Zeros(DTypeF32, t.Shape()...)
+		f32 := ctx.Empty(DTypeF32, t.Shape()...)
 		f32 = t.Copy(ctx, f32)
 		return dump[[]float32](ctx, f32, opts[0].Items, func(f float32) string {
 			return strconv.FormatFloat(float64(f), 'f', opts[0].Precision, 32)
diff --git a/ml/backend/ggml/ggml.go b/ml/backend/ggml/ggml.go
index 24943111c..2c7e856cc 100644
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -304,7 +304,7 @@ func shapeToGGML(shape []int) *C.int64_t {
 	return &sh[0]
 }
 
-func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
+func newTensor(ctx Context, dtype ml.DType, zero bool, shape []int) ml.Tensor {
 	if len(shape) < 1 || len(shape) > 4 {
 		panic("unsupported number of dimensions")
 	}
@@ -318,19 +318,29 @@ func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
 	var t *C.struct_ggml_tensor
 	switch dtype {
 	case ml.DTypeF32:
-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F32, C.int(len(shape)), shapeToGGML(shape))
+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_F32, C.int(len(shape)), shapeToGGML(shape))
 	case ml.DTypeF16:
-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_F16, C.int(len(shape)), shapeToGGML(shape))
+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_F16, C.int(len(shape)), shapeToGGML(shape))
 	case ml.DTypeI32:
-		t = C.ggml_new_tensor(c.ctx, C.GGML_TYPE_I32, C.int(len(shape)), shapeToGGML(shape))
+		t = C.ggml_new_tensor(ctx.ctx, C.GGML_TYPE_I32, C.int(len(shape)), shapeToGGML(shape))
 	default:
 		panic("unsupported dtype")
 	}
 
-	b := C.ggml_backend_alloc_buffer(c.backend, C.ggml_nbytes(t))
+	b := C.ggml_backend_alloc_buffer(ctx.backend, C.ggml_nbytes(t))
 	C.ggml_backend_tensor_alloc(b, t, C.ggml_backend_buffer_get_base(b))
-	C.ggml_set_zero(t)
-	return &Tensor{b: c.b, t: t}
+	if zero {
+		C.ggml_set_zero(t)
+	}
+	return &Tensor{b: ctx.b, t: t}
+}
+
+func (c Context) Empty(dtype ml.DType, shape ...int) ml.Tensor {
+	return newTensor(c, dtype, false, shape)
+}
+
+func (c Context) Zeros(dtype ml.DType, shape ...int) ml.Tensor {
+	return newTensor(c, dtype, true, shape)
 }
 
 func fromSlice[S ~[]E, E float32 | int32](ctx Context, s S, shape []int, dtype uint32) (ml.Tensor, error) {