fix vision encoder
This commit is contained in:
parent
9e4642e9b3
commit
f888912870
@ -180,7 +180,7 @@ func (m *TextModel) Forward(ctx ml.Context, inputs, positions, outputs ml.Tensor
|
|||||||
if multimodal != nil {
|
if multimodal != nil {
|
||||||
visionOutputs := multimodal[0].Multimodal.(ml.Tensor)
|
visionOutputs := multimodal[0].Multimodal.(ml.Tensor)
|
||||||
offset := multimodal[0].Index - 1 - visionOutputs.Dim(1)
|
offset := multimodal[0].Index - 1 - visionOutputs.Dim(1)
|
||||||
hiddenState = hiddenState.Set(ctx, visionOutputs, offset*hiddenState.Stride(0))
|
hiddenState = hiddenState.Set(ctx, visionOutputs, offset*hiddenState.Stride(1))
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, layer := range m.Layers {
|
for i, layer := range m.Layers {
|
||||||
|
@ -20,11 +20,11 @@ func newImageProcessor(c ml.Config) ImageProcessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *ImageProcessor) pack(img image.Image, mean, std [3]float32) []float32 {
|
func (p *ImageProcessor) pack(img image.Image, mean, std [3]float32) []float32 {
|
||||||
var pixelVals []float32
|
var pixelVals, rVals, gVals, bVals []float32
|
||||||
|
|
||||||
bounds := img.Bounds()
|
bounds := img.Bounds()
|
||||||
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
||||||
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
||||||
c := img.At(x, y)
|
c := img.At(x, y)
|
||||||
r, g, b, _ := c.RGBA()
|
r, g, b, _ := c.RGBA()
|
||||||
rVal := float32(r>>8) / 255.0
|
rVal := float32(r>>8) / 255.0
|
||||||
@ -35,10 +35,16 @@ func (p *ImageProcessor) pack(img image.Image, mean, std [3]float32) []float32 {
|
|||||||
gVal = (gVal - mean[1]) / std[1]
|
gVal = (gVal - mean[1]) / std[1]
|
||||||
bVal = (bVal - mean[2]) / std[2]
|
bVal = (bVal - mean[2]) / std[2]
|
||||||
|
|
||||||
pixelVals = append(pixelVals, rVal, gVal, bVal)
|
rVals = append(rVals, rVal)
|
||||||
|
gVals = append(gVals, gVal)
|
||||||
|
bVals = append(bVals, bVal)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pixelVals = append(pixelVals, rVals...)
|
||||||
|
pixelVals = append(pixelVals, gVals...)
|
||||||
|
pixelVals = append(pixelVals, bVals...)
|
||||||
|
|
||||||
return pixelVals
|
return pixelVals
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user