diff --git a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch
index 773f84a3cd..89bc51da80 100644
--- a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch
+++ b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch
@@ -375,7 +375,7 @@ index 2eb4e1c973..ffbfa9accf 100644
        "write program-readable progress information", "url" },
      { "stdin",          OPT_BOOL | OPT_EXPERT,                       { &stdin_interaction },
 diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index 5a6ea59715..e8631eaa4a 100644
+index 5a6ea59715..40249fe5c9 100644
 --- a/libavcodec/Makefile
 +++ b/libavcodec/Makefile
 @@ -19,6 +19,7 @@ HEADERS = ac3_parser.h                                                  \
@@ -428,7 +428,7 @@ index 5a6ea59715..e8631eaa4a 100644
 +OBJS-$(CONFIG_HEVC_RPI4_8_HWACCEL)        += rpivid_hevc.o
 +OBJS-$(CONFIG_HEVC_RPI4_10_HWACCEL)       += rpivid_hevc.o
 +OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o\
-+                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o
++                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o  v4l2_req_hevc_v4.o
  OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
  OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o
  OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
@@ -465,19 +465,10 @@ index 5a6ea59715..e8631eaa4a 100644
 +$(SUBDIR)rpi_hevcdec.o $(SUBDIR)rpi_shader_template.o $(SUBDIR)rpi_qpu.o: $(SUBDIR)rpi_hevc_shader.h
 +endif
 diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
-index f6434e40da..b48a1e4401 100644
+index f6434e40da..7fa6611e8f 100644
 --- a/libavcodec/aarch64/Makefile
 +++ b/libavcodec/aarch64/Makefile
-@@ -35,6 +35,8 @@ ARMV8-OBJS-$(CONFIG_VIDEODSP)           += aarch64/videodsp.o
- 
- # subsystems
- NEON-OBJS-$(CONFIG_AAC_DECODER)         += aarch64/sbrdsp_neon.o
-+NEON-OBJS-$(CONFIG_BLOCKDSP)            += aarch64/blockdsp_init_aarch64.o     \
-+                                           aarch64/blockdsp_neon.o
- NEON-OBJS-$(CONFIG_FFT)                 += aarch64/fft_neon.o
- NEON-OBJS-$(CONFIG_FMTCONVERT)          += aarch64/fmtconvert_neon.o
- NEON-OBJS-$(CONFIG_H264CHROMA)          += aarch64/h264cmc_neon.o
-@@ -44,10 +46,12 @@ NEON-OBJS-$(CONFIG_H264PRED)            += aarch64/h264pred_neon.o
+@@ -44,10 +44,12 @@ NEON-OBJS-$(CONFIG_H264PRED)            += aarch64/h264pred_neon.o
  NEON-OBJS-$(CONFIG_H264QPEL)            += aarch64/h264qpel_neon.o             \
                                             aarch64/hpeldsp_neon.o
  NEON-OBJS-$(CONFIG_HPELDSP)             += aarch64/hpeldsp_neon.o
@@ -491,103 +482,6 @@ index f6434e40da..b48a1e4401 100644
  NEON-OBJS-$(CONFIG_VP8DSP)              += aarch64/vp8dsp_neon.o
  
  # decoders/encoders
-diff --git a/libavcodec/aarch64/blockdsp_init_aarch64.c b/libavcodec/aarch64/blockdsp_init_aarch64.c
-new file mode 100644
-index 0000000000..9f3280f007
---- /dev/null
-+++ b/libavcodec/aarch64/blockdsp_init_aarch64.c
-@@ -0,0 +1,42 @@
-+/*
-+ * AArch64 NEON optimised block operations
-+ *
-+ * Copyright (c) 2022 Ben Avison <bavison@riscosopen.org>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include <stdint.h>
-+
-+#include "libavutil/attributes.h"
-+#include "libavutil/cpu.h"
-+#include "libavutil/arm/cpu.h"
-+#include "libavcodec/avcodec.h"
-+#include "libavcodec/blockdsp.h"
-+
-+void ff_clear_block_neon(int16_t *block);
-+void ff_clear_blocks_neon(int16_t *blocks);
-+
-+av_cold void ff_blockdsp_init_aarch64(BlockDSPContext *c)
-+{
-+    int cpu_flags = av_get_cpu_flags();
-+
-+    if (have_neon(cpu_flags)) {
-+        c->clear_block  = ff_clear_block_neon;
-+        c->clear_blocks = ff_clear_blocks_neon;
-+    }
-+}
-diff --git a/libavcodec/aarch64/blockdsp_neon.S b/libavcodec/aarch64/blockdsp_neon.S
-new file mode 100644
-index 0000000000..e4a4959ccc
---- /dev/null
-+++ b/libavcodec/aarch64/blockdsp_neon.S
-@@ -0,0 +1,43 @@
-+/*
-+ * AArch64 NEON optimised block operations
-+ *
-+ * Copyright (c) 2022 Ben Avison <bavison@riscosopen.org>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include "libavutil/aarch64/asm.S"
-+
-+function ff_clear_block_neon, export=1
-+        movi            v0.16b, #0
-+        movi            v1.16b, #0
-+        st1             {v0.16b, v1.16b}, [x0], #32
-+        st1             {v0.16b, v1.16b}, [x0], #32
-+        st1             {v0.16b, v1.16b}, [x0], #32
-+        st1             {v0.16b, v1.16b}, [x0]
-+        ret
-+endfunc
-+
-+function ff_clear_blocks_neon, export=1
-+        movi            v0.16b, #0
-+        movi            v1.16b, #0
-+        .rept           23
-+        st1             {v0.16b, v1.16b}, [x0], #32
-+        .endr
-+        st1             {v0.16b, v1.16b}, [x0]
-+        ret
-+endfunc
 diff --git a/libavcodec/aarch64/idctdsp_init_aarch64.c b/libavcodec/aarch64/idctdsp_init_aarch64.c
 index 742a3372e3..eec21aa5a2 100644
 --- a/libavcodec/aarch64/idctdsp_init_aarch64.c
@@ -767,7 +661,7 @@ index 0000000000..7f47611206
 +        ret
 +endfunc
 diff --git a/libavcodec/aarch64/vc1dsp_init_aarch64.c b/libavcodec/aarch64/vc1dsp_init_aarch64.c
-index 13dfd74940..161d5a972b 100644
+index 13dfd74940..a7976fd596 100644
 --- a/libavcodec/aarch64/vc1dsp_init_aarch64.c
 +++ b/libavcodec/aarch64/vc1dsp_init_aarch64.c
 @@ -21,10 +21,28 @@
@@ -789,12 +683,12 @@ index 13dfd74940..161d5a972b 100644
 +void ff_vc1_inv_trans_4x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 +void ff_vc1_inv_trans_4x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block);
 +
-+void ff_vc1_v_loop_filter4_neon(uint8_t *src, int stride, int pq);
-+void ff_vc1_h_loop_filter4_neon(uint8_t *src, int stride, int pq);
-+void ff_vc1_v_loop_filter8_neon(uint8_t *src, int stride, int pq);
-+void ff_vc1_h_loop_filter8_neon(uint8_t *src, int stride, int pq);
-+void ff_vc1_v_loop_filter16_neon(uint8_t *src, int stride, int pq);
-+void ff_vc1_h_loop_filter16_neon(uint8_t *src, int stride, int pq);
++void ff_vc1_v_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq);
++void ff_vc1_h_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq);
++void ff_vc1_v_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq);
++void ff_vc1_h_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq);
++void ff_vc1_v_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq);
++void ff_vc1_h_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq);
 +
  void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
                                  int h, int x, int y);
@@ -892,10 +786,10 @@ index 13dfd74940..161d5a972b 100644
  }
 diff --git a/libavcodec/aarch64/vc1dsp_neon.S b/libavcodec/aarch64/vc1dsp_neon.S
 new file mode 100644
-index 0000000000..529c21d285
+index 0000000000..9a96c2523c
 --- /dev/null
 +++ b/libavcodec/aarch64/vc1dsp_neon.S
-@@ -0,0 +1,1552 @@
+@@ -0,0 +1,1546 @@
 +/*
 + * VC1 AArch64 NEON optimisations
 + *
@@ -1605,11 +1499,10 @@ index 0000000000..529c21d285
 +// VC-1 in-loop deblocking filter for 4 pixel pairs at boundary of vertically-neighbouring blocks
 +// On entry:
 +//   x0 -> top-left pel of lower block
-+//   w1 = row stride, bytes
++//   x1 = row stride, bytes
 +//   w2 = PQUANT bitstream parameter
 +function ff_vc1_v_loop_filter4_neon, export=1
 +        sub             x3, x0, w1, sxtw #2
-+        sxtw            x1, w1                  // technically, stride is signed int
 +        ldr             d0, .Lcoeffs
 +        ld1             {v1.s}[0], [x0], x1     // P5
 +        ld1             {v2.s}[0], [x3], x1     // P1
@@ -1678,11 +1571,10 @@ index 0000000000..529c21d285
 +// VC-1 in-loop deblocking filter for 4 pixel pairs at boundary of horizontally-neighbouring blocks
 +// On entry:
 +//   x0 -> top-left pel of right block
-+//   w1 = row stride, bytes
++//   x1 = row stride, bytes
 +//   w2 = PQUANT bitstream parameter
 +function ff_vc1_h_loop_filter4_neon, export=1
 +        sub             x3, x0, #4              // where to start reading
-+        sxtw            x1, w1                  // technically, stride is signed int
 +        ldr             d0, .Lcoeffs
 +        ld1             {v1.8b}, [x3], x1
 +        sub             x0, x0, #1              // where to start writing
@@ -1752,11 +1644,10 @@ index 0000000000..529c21d285
 +// VC-1 in-loop deblocking filter for 8 pixel pairs at boundary of vertically-neighbouring blocks
 +// On entry:
 +//   x0 -> top-left pel of lower block
-+//   w1 = row stride, bytes
++//   x1 = row stride, bytes
 +//   w2 = PQUANT bitstream parameter
 +function ff_vc1_v_loop_filter8_neon, export=1
 +        sub             x3, x0, w1, sxtw #2
-+        sxtw            x1, w1                  // technically, stride is signed int
 +        ldr             d0, .Lcoeffs
 +        ld1             {v1.8b}, [x0], x1       // P5
 +        movi            v2.2d, #0x0000ffff00000000
@@ -1830,11 +1721,10 @@ index 0000000000..529c21d285
 +// VC-1 in-loop deblocking filter for 8 pixel pairs at boundary of horizontally-neighbouring blocks
 +// On entry:
 +//   x0 -> top-left pel of right block
-+//   w1 = row stride, bytes
++//   x1 = row stride, bytes
 +//   w2 = PQUANT bitstream parameter
 +function ff_vc1_h_loop_filter8_neon, export=1
 +        sub             x3, x0, #4              // where to start reading
-+        sxtw            x1, w1                  // technically, stride is signed int
 +        ldr             d0, .Lcoeffs
 +        ld1             {v1.8b}, [x3], x1       // P1[0], P2[0]...
 +        sub             x0, x0, #1              // where to start writing
@@ -1939,11 +1829,10 @@ index 0000000000..529c21d285
 +// VC-1 in-loop deblocking filter for 16 pixel pairs at boundary of vertically-neighbouring blocks
 +// On entry:
 +//   x0 -> top-left pel of lower block
-+//   w1 = row stride, bytes
++//   x1 = row stride, bytes
 +//   w2 = PQUANT bitstream parameter
 +function ff_vc1_v_loop_filter16_neon, export=1
 +        sub             x3, x0, w1, sxtw #2
-+        sxtw            x1, w1                  // technically, stride is signed int
 +        ldr             d0, .Lcoeffs
 +        ld1             {v1.16b}, [x0], x1      // P5
 +        movi            v2.2d, #0x0000ffff00000000
@@ -2071,11 +1960,10 @@ index 0000000000..529c21d285
 +// VC-1 in-loop deblocking filter for 16 pixel pairs at boundary of horizontally-neighbouring blocks
 +// On entry:
 +//   x0 -> top-left pel of right block
-+//   w1 = row stride, bytes
++//   x1 = row stride, bytes
 +//   w2 = PQUANT bitstream parameter
 +function ff_vc1_h_loop_filter16_neon, export=1
 +        sub             x3, x0, #4              // where to start reading
-+        sxtw            x1, w1                  // technically, stride is signed int
 +        ldr             d0, .Lcoeffs
 +        ld1             {v1.8b}, [x3], x1       // P1[0], P2[0]...
 +        sub             x0, x0, #1              // where to start writing
@@ -17404,7 +17292,7 @@ index 2cca784f5a..48cb816b70 100644
 +    dsp->vc1_unescape_buffer = vc1_unescape_buffer_neon;
  }
 diff --git a/libavcodec/arm/vc1dsp_neon.S b/libavcodec/arm/vc1dsp_neon.S
-index 93f043bf08..8e97bc5e58 100644
+index 93f043bf08..96014fbebc 100644
 --- a/libavcodec/arm/vc1dsp_neon.S
 +++ b/libavcodec/arm/vc1dsp_neon.S
 @@ -1161,3 +1161,764 @@ function ff_vc1_inv_trans_4x4_dc_neon, export=1
@@ -17560,17 +17448,17 @@ index 93f043bf08..8e97bc5e58 100644
 +function ff_vc1_v_loop_filter8_neon, export=1
 +        sub             r3, r0, r1, lsl #2
 +        vldr            d0, .Lcoeffs
-+        vld1.32         {d1}, [r0], r1          @ P5
-+        vld1.32         {d2}, [r3], r1          @ P1
-+        vld1.32         {d3}, [r3], r1          @ P2
-+        vld1.32         {d4}, [r0], r1          @ P6
-+        vld1.32         {d5}, [r3], r1          @ P3
-+        vld1.32         {d6}, [r0], r1          @ P7
++        vld1.32         {d1}, [r0 :64], r1      @ P5
++        vld1.32         {d2}, [r3 :64], r1      @ P1
++        vld1.32         {d3}, [r3 :64], r1      @ P2
++        vld1.32         {d4}, [r0 :64], r1      @ P6
++        vld1.32         {d5}, [r3 :64], r1      @ P3
++        vld1.32         {d6}, [r0 :64], r1      @ P7
 +        vshll.u8        q8, d1, #1              @ 2*P5
 +        vshll.u8        q9, d2, #1              @ 2*P1
-+        vld1.32         {d7}, [r3]              @ P4
++        vld1.32         {d7}, [r3 :64]          @ P4
 +        vmovl.u8        q1, d3                  @ P2
-+        vld1.32         {d20}, [r0]             @ P8
++        vld1.32         {d20}, [r0 :64]         @ P8
 +        vmovl.u8        q11, d4                 @ P6
 +        vdup.16         q12, r2                 @ pq
 +        vmovl.u8        q13, d5                 @ P3
@@ -17625,8 +17513,8 @@ index 93f043bf08..8e97bc5e58 100644
 +        vmla.i16        q1, q0, q2              @ invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P5
 +        vqmovun.s16     d0, q3
 +        vqmovun.s16     d1, q1
-+        vst1.32         {d0}, [r3], r1
-+        vst1.32         {d1}, [r3]
++        vst1.32         {d0}, [r3 :64], r1
++        vst1.32         {d1}, [r3 :64]
 +1:      bx              lr
 +endfunc
 +
@@ -17741,17 +17629,17 @@ index 93f043bf08..8e97bc5e58 100644
 +        vpush           {d8-d15}
 +        sub             r3, r0, r1, lsl #2
 +        vldr            d0, .Lcoeffs
-+        vld1.64         {q1}, [r0], r1          @ P5
-+        vld1.64         {q2}, [r3], r1          @ P1
-+        vld1.64         {q3}, [r3], r1          @ P2
-+        vld1.64         {q4}, [r0], r1          @ P6
-+        vld1.64         {q5}, [r3], r1          @ P3
-+        vld1.64         {q6}, [r0], r1          @ P7
++        vld1.64         {q1}, [r0 :128], r1     @ P5
++        vld1.64         {q2}, [r3 :128], r1     @ P1
++        vld1.64         {q3}, [r3 :128], r1     @ P2
++        vld1.64         {q4}, [r0 :128], r1     @ P6
++        vld1.64         {q5}, [r3 :128], r1     @ P3
++        vld1.64         {q6}, [r0 :128], r1     @ P7
 +        vshll.u8        q7, d2, #1              @ 2*P5[0..7]
 +        vshll.u8        q8, d4, #1              @ 2*P1[0..7]
-+        vld1.64         {q9}, [r3]              @ P4
++        vld1.64         {q9}, [r3 :128]         @ P4
 +        vmovl.u8        q10, d6                 @ P2[0..7]
-+        vld1.64         {q11}, [r0]             @ P8
++        vld1.64         {q11}, [r0 :128]        @ P8
 +        vmovl.u8        q12, d8                 @ P6[0..7]
 +        vdup.16         q13, r2                 @ pq
 +        vshll.u8        q2, d5, #1              @ 2*P1[8..15]
@@ -17861,8 +17749,8 @@ index 93f043bf08..8e97bc5e58 100644
 +        vqmovun.s16     d0, q6
 +        vqmovun.s16     d5, q9
 +        vqmovun.s16     d1, q1
-+        vst1.64         {q2}, [r3], r1
-+        vst1.64         {q0}, [r3]
++        vst1.64         {q2}, [r3 :128], r1
++        vst1.64         {q0}, [r3 :128]
 +1:      vpop            {d8-d15}
 +        bx              lr
 +endfunc
@@ -18194,31 +18082,6 @@ index c91b2fd169..003079cdc6 100644
  } AVHWAccel;
  
  /**
-diff --git a/libavcodec/blockdsp.c b/libavcodec/blockdsp.c
-index c7efe7e77b..46766244b8 100644
---- a/libavcodec/blockdsp.c
-+++ b/libavcodec/blockdsp.c
-@@ -65,6 +65,8 @@ av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx)
-     c->fill_block_tab[0] = fill_block16_c;
-     c->fill_block_tab[1] = fill_block8_c;
- 
-+    if (ARCH_AARCH64)
-+        ff_blockdsp_init_aarch64(c);
-     if (ARCH_ALPHA)
-         ff_blockdsp_init_alpha(c);
-     if (ARCH_ARM)
-diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h
-index 26fc2ea13b..fe539491da 100644
---- a/libavcodec/blockdsp.h
-+++ b/libavcodec/blockdsp.h
-@@ -41,6 +41,7 @@ typedef struct BlockDSPContext {
- 
- void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx);
- 
-+void ff_blockdsp_init_aarch64(BlockDSPContext *c);
- void ff_blockdsp_init_alpha(BlockDSPContext *c);
- void ff_blockdsp_init_arm(BlockDSPContext *c);
- void ff_blockdsp_init_ppc(BlockDSPContext *c);
 diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
 index 1bf1c620d6..ccfa991f60 100644
 --- a/libavcodec/cabac.h
@@ -19020,6 +18883,536 @@ index 0000000000..4e35bd583d
 +#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP	(V4L2_CID_CODEC_HANTRO_BASE + 0)
 +
 +#endif
+diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h
+new file mode 100644
+index 0000000000..c02fdbe5a8
+--- /dev/null
++++ b/libavcodec/hevc-ctrls-v4.h
+@@ -0,0 +1,524 @@
++/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */
++/*
++ *  Video for Linux Two controls header file
++ *
++ *  Copyright (C) 1999-2012 the contributors
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  Alternatively you can redistribute this file under the terms of the
++ *  BSD license as stated below:
++ *
++ *  Redistribution and use in source and binary forms, with or without
++ *  modification, are permitted provided that the following conditions
++ *  are met:
++ *  1. Redistributions of source code must retain the above copyright
++ *     notice, this list of conditions and the following disclaimer.
++ *  2. Redistributions in binary form must reproduce the above copyright
++ *     notice, this list of conditions and the following disclaimer in
++ *     the documentation and/or other materials provided with the
++ *     distribution.
++ *  3. The names of its contributors may not be used to endorse or promote
++ *     products derived from this software without specific prior written
++ *     permission.
++ *
++ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
++ *  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ *  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ *  The contents of this header was split off from videodev2.h. All control
++ *  definitions should be added to this header, which is included by
++ *  videodev2.h.
++ */
++
++#ifndef AVCODEC_HEVC_CTRLS_V4_H
++#define AVCODEC_HEVC_CTRLS_V4_H
++
++#include <linux/const.h>
++#include <linux/types.h>
++
++#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS
++#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000	/* Stateless codecs controls */
++#endif
++#ifndef V4L2_CID_CODEC_STATELESS_BASE
++#define V4L2_CID_CODEC_STATELESS_BASE		(V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900)
++#endif
++
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
++#define V4L2_CID_STATELESS_HEVC_SPS		(V4L2_CID_CODEC_STATELESS_BASE + 400)
++#define V4L2_CID_STATELESS_HEVC_PPS		(V4L2_CID_CODEC_STATELESS_BASE + 401)
++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 402)
++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX	(V4L2_CID_CODEC_STATELESS_BASE + 403)
++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 404)
++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE	(V4L2_CID_CODEC_STATELESS_BASE + 405)
++#define V4L2_CID_STATELESS_HEVC_START_CODE	(V4L2_CID_CODEC_STATELESS_BASE + 406)
++#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407)
++
++enum v4l2_stateless_hevc_decode_mode {
++	V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED,
++	V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED,
++};
++
++enum v4l2_stateless_hevc_start_code {
++	V4L2_STATELESS_HEVC_START_CODE_NONE,
++	V4L2_STATELESS_HEVC_START_CODE_ANNEX_B,
++};
++
++#define V4L2_HEVC_SLICE_TYPE_B	0
++#define V4L2_HEVC_SLICE_TYPE_P	1
++#define V4L2_HEVC_SLICE_TYPE_I	2
++
++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
++
++/**
++ * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set
++ *
++ * @video_parameter_set_id: specifies the value of the
++ *			vps_video_parameter_set_id of the active VPS
++ * @seq_parameter_set_id: provides an identifier for the SPS for
++ *			  reference by other syntax elements
++ * @pic_width_in_luma_samples:	specifies the width of each decoded picture
++ *				in units of luma samples
++ * @pic_height_in_luma_samples: specifies the height of each decoded picture
++ *				in units of luma samples
++ * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the
++ *                         samples of the luma array
++ * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the
++ *                           samples of the chroma arrays
++ * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of
++ *                                     the variable MaxPicOrderCntLsb
++ * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum
++ *                                    required size of the decoded picture
++ *                                    buffer for the codec video sequence
++ * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures
++ * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the
++ *				    value of SpsMaxLatencyPictures array
++ * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum
++ *					    luma coding block size
++ * @log2_diff_max_min_luma_coding_block_size: specifies the difference between
++ *					      the maximum and minimum luma
++ *					      coding block size
++ * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma
++ *					       transform block size
++ * @log2_diff_max_min_luma_transform_block_size: specifies the difference between
++ *						 the maximum and minimum luma
++ *						 transform block size
++ * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy
++ *					 depth for transform units of
++ *					 coding units coded in inter
++ *					 prediction mode
++ * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy
++ *					 depth for transform units of
++ *					 coding units coded in intra
++ *					 prediction mode
++ * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of
++ *                                    bits used to represent each of PCM sample
++ *                                    values of the luma component
++ * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number
++ *                                      of bits used to represent each of PCM
++ *                                      sample values of the chroma components
++ * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the
++ *                                              minimum size of coding blocks
++ * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between
++ *						  the maximum and minimum size of
++ *						  coding blocks
++ * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set()
++ *				 syntax structures included in the SPS
++ * @num_long_term_ref_pics_sps: specifies the number of candidate long-term
++ *				reference pictures that are specified in the SPS
++ * @chroma_format_idc: specifies the chroma sampling
++ * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number
++ *                             of temporal sub-layers
++ * @reserved: padding field. Should be zeroed by applications.
++ * @flags: see V4L2_HEVC_SPS_FLAG_{}
++ */
++struct v4l2_ctrl_hevc_sps {
++	__u8	video_parameter_set_id;
++	__u8	seq_parameter_set_id;
++	__u16	pic_width_in_luma_samples;
++	__u16	pic_height_in_luma_samples;
++	__u8	bit_depth_luma_minus8;
++	__u8	bit_depth_chroma_minus8;
++	__u8	log2_max_pic_order_cnt_lsb_minus4;
++	__u8	sps_max_dec_pic_buffering_minus1;
++	__u8	sps_max_num_reorder_pics;
++	__u8	sps_max_latency_increase_plus1;
++	__u8	log2_min_luma_coding_block_size_minus3;
++	__u8	log2_diff_max_min_luma_coding_block_size;
++	__u8	log2_min_luma_transform_block_size_minus2;
++	__u8	log2_diff_max_min_luma_transform_block_size;
++	__u8	max_transform_hierarchy_depth_inter;
++	__u8	max_transform_hierarchy_depth_intra;
++	__u8	pcm_sample_bit_depth_luma_minus1;
++	__u8	pcm_sample_bit_depth_chroma_minus1;
++	__u8	log2_min_pcm_luma_coding_block_size_minus3;
++	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
++	__u8	num_short_term_ref_pic_sets;
++	__u8	num_long_term_ref_pics_sps;
++	__u8	chroma_format_idc;
++	__u8	sps_max_sub_layers_minus1;
++
++	__u8	reserved[6];
++	__u64	flags;
++};
++
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
++
++/**
++ * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set
++ *
++ * @pic_parameter_set_id: identifies the PPS for reference by other
++ *			  syntax elements
++ * @num_extra_slice_header_bits: specifies the number of extra slice header
++ *				 bits that are present in the slice header RBSP
++ *				 for coded pictures referring to the PPS.
++ * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the
++ *                                        inferred value of num_ref_idx_l0_active_minus1
++ * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the
++ *                                        inferred value of num_ref_idx_l1_active_minus1
++ * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for
++ *		     each slice referring to the PPS
++ * @diff_cu_qp_delta_depth: specifies the difference between the luma coding
++ *			    tree block size and the minimum luma coding block
++ *			    size of coding units that convey cu_qp_delta_abs
++ *			    and cu_qp_delta_sign_flag
++ * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb
++ * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr
++ * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns
++ *			     partitioning the picture
++ * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning
++ *			  the picture
++ * @column_width_minus1: this value plus 1 specifies the width of the each tile column in
++ *			 units of coding tree blocks
++ * @row_height_minus1: this value plus 1 specifies the height of the each tile row in
++ *		       units of coding tree blocks
++ * @pps_beta_offset_div2: specify the default deblocking parameter offsets for
++ *			  beta divided by 2
++ * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC
++ *			divided by 2
++ * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of
++ *                                    the variable Log2ParMrgLevel
++ * @reserved: padding field. Should be zeroed by applications.
++ * @flags: see V4L2_HEVC_PPS_FLAG_{}
++ */
++struct v4l2_ctrl_hevc_pps {
++	__u8	pic_parameter_set_id;
++	__u8	num_extra_slice_header_bits;
++	__u8	num_ref_idx_l0_default_active_minus1;
++	__u8	num_ref_idx_l1_default_active_minus1;
++	__s8	init_qp_minus26;
++	__u8	diff_cu_qp_delta_depth;
++	__s8	pps_cb_qp_offset;
++	__s8	pps_cr_qp_offset;
++	__u8	num_tile_columns_minus1;
++	__u8	num_tile_rows_minus1;
++	__u8	column_width_minus1[20];
++	__u8	row_height_minus1[22];
++	__s8	pps_beta_offset_div2;
++	__s8	pps_tc_offset_div2;
++	__u8	log2_parallel_merge_level_minus2;
++	__u8	reserved;
++	__u64	flags;
++};
++
++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE	0x01
++
++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME				0
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD			1
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD			2
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM			3
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP			4
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP			5
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM		6
++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING			7
++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING			8
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM	9
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP	10
++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM		11
++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP		12
++
++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
++
++/**
++ * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry
++ *
++ * @timestamp: timestamp of the V4L2 capture buffer to use as reference.
++ * @flags: long term flag for the reference frame
++ * @field_pic: whether the reference is a field picture or a frame.
++ * @reserved: padding field. Should be zeroed by applications.
++ * @pic_order_cnt_val: the picture order count of the current picture.
++ */
++struct v4l2_hevc_dpb_entry {
++	__u64	timestamp;
++	__u8	flags;
++	__u8	field_pic;
++	__u16	reserved;
++	__s32	pic_order_cnt_val;
++};
++
++/**
++ * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters
++ *
++ * @delta_luma_weight_l0: the difference of the weighting factor applied
++ *			  to the luma prediction value for list 0
++ * @luma_offset_l0: the additive offset applied to the luma prediction value
++ *		    for list 0
++ * @delta_chroma_weight_l0: the difference of the weighting factor applied
++ *			    to the chroma prediction values for list 0
++ * @chroma_offset_l0: the difference of the additive offset applied to
++ *		      the chroma prediction values for list 0
++ * @delta_luma_weight_l1: the difference of the weighting factor applied
++ *			  to the luma prediction value for list 1
++ * @luma_offset_l1: the additive offset applied to the luma prediction value
++ *		    for list 1
++ * @delta_chroma_weight_l1: the difference of the weighting factor applied
++ *			    to the chroma prediction values for list 1
++ * @chroma_offset_l1: the difference of the additive offset applied to
++ *		      the chroma prediction values for list 1
++ * @luma_log2_weight_denom: the base 2 logarithm of the denominator for
++ *			    all luma weighting factors
++ * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm
++ *				    of the denominator for all chroma
++ *				    weighting factors
++ */
++struct v4l2_hevc_pred_weight_table {
++	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++	__u8	luma_log2_weight_denom;
++	__s8	delta_chroma_log2_weight_denom;
++};
++
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
++
++/**
++ * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters
++ *
++ * This control is a dynamically sized 1-dimensional array,
++ * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it.
++ *
++ * @bit_size: size (in bits) of the current slice data
++ * @data_byte_offset: offset (in bytes) to the video data in the current slice data
++ * @num_entry_point_offsets: specifies the number of entry point offset syntax
++ *			     elements in the slice header.
++ * @nal_unit_type: specifies the coding type of the slice (B, P or I)
++ * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit
++ * @slice_type: see V4L2_HEVC_SLICE_TYPE_{}
++ * @colour_plane_id: specifies the colour plane associated with the current slice
++ * @slice_pic_order_cnt: specifies the picture order count
++ * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum
++ *                                reference index for reference picture list 0
++ *                                that may be used to decode the slice
++ * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum
++ *                                reference index for reference picture list 1
++ *                                that may be used to decode the slice
++ * @collocated_ref_idx: specifies the reference index of the collocated picture used
++ *			for temporal motion vector prediction
++ * @five_minus_max_num_merge_cand: specifies the maximum number of merging
++ *				   motion vector prediction candidates supported in
++ *				   the slice subtracted from 5
++ * @slice_qp_delta: specifies the initial value of QpY to be used for the coding
++ *		    blocks in the slice
++ * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset
++ * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset
++ * @slice_act_y_qp_offset: screen content extension parameters
++ * @slice_act_cb_qp_offset: screen content extension parameters
++ * @slice_act_cr_qp_offset: screen content extension parameters
++ * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2
++ * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2
++ * @pic_struct: indicates whether a picture should be displayed as a frame or as one or
++ *		more fields
++ * @reserved0: padding field. Should be zeroed by applications.
++ * @slice_segment_addr: specifies the address of the first coding tree block in
++ *			the slice segment
++ * @ref_idx_l0: the list of L0 reference elements as indices in the DPB
++ * @ref_idx_l1: the list of L1 reference elements as indices in the DPB
++ * @short_term_ref_pic_set_size: specifies the size of short-term reference
++ *				 pictures set included in the SPS
++ * @long_term_ref_pic_set_size: specifies the size of long-term reference
++ *				pictures set include in the SPS
++ * @pred_weight_table: the prediction weight coefficients for inter-picture
++ *		       prediction
++ * @reserved1: padding field. Should be zeroed by applications.
++ * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{}
++ */
++struct v4l2_ctrl_hevc_slice_params {
++	__u32	bit_size;
++	__u32	data_byte_offset;
++	__u32	num_entry_point_offsets;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++	__u8	nal_unit_type;
++	__u8	nuh_temporal_id_plus1;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	__u8	slice_type;
++	__u8	colour_plane_id;
++	__s32	slice_pic_order_cnt;
++	__u8	num_ref_idx_l0_active_minus1;
++	__u8	num_ref_idx_l1_active_minus1;
++	__u8	collocated_ref_idx;
++	__u8	five_minus_max_num_merge_cand;
++	__s8	slice_qp_delta;
++	__s8	slice_cb_qp_offset;
++	__s8	slice_cr_qp_offset;
++	__s8	slice_act_y_qp_offset;
++	__s8	slice_act_cb_qp_offset;
++	__s8	slice_act_cr_qp_offset;
++	__s8	slice_beta_offset_div2;
++	__s8	slice_tc_offset_div2;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++	__u8	pic_struct;
++
++	__u8	reserved0[3];
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	__u32	slice_segment_addr;
++	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u16	short_term_ref_pic_set_size;
++	__u16	long_term_ref_pic_set_size;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
++	struct v4l2_hevc_pred_weight_table pred_weight_table;
++
++	__u8	reserved1[2];
++	__u64	flags;
++};
++
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
++
++/**
++ * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters
++ *
++ * @pic_order_cnt_val: picture order count
++ * @short_term_ref_pic_set_size: specifies the size of short-term reference
++ *				 pictures set included in the SPS of the first slice
++ * @long_term_ref_pic_set_size: specifies the size of long-term reference
++ *				pictures set include in the SPS of the first slice
++ * @num_active_dpb_entries: the number of entries in dpb
++ * @num_poc_st_curr_before: the number of reference pictures in the short-term
++ *			    set that come before the current frame
++ * @num_poc_st_curr_after: the number of reference pictures in the short-term
++ *			   set that come after the current frame
++ * @num_poc_lt_curr: the number of reference pictures in the long-term set
++ * @poc_st_curr_before: provides the index of the short term before references
++ *			in DPB array
++ * @poc_st_curr_after: provides the index of the short term after references
++ *		       in DPB array
++ * @poc_lt_curr: provides the index of the long term references in DPB array
++ * @reserved: padding field. Should be zeroed by applications.
++ * @dpb: the decoded picture buffer, for meta-data about reference frames
++ * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{}
++ */
++struct v4l2_ctrl_hevc_decode_params {
++	__s32	pic_order_cnt_val;
++	__u16	short_term_ref_pic_set_size;
++	__u16	long_term_ref_pic_set_size;
++	__u8	num_active_dpb_entries;
++	__u8	num_poc_st_curr_before;
++	__u8	num_poc_st_curr_after;
++	__u8	num_poc_lt_curr;
++	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	reserved[4];
++	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u64	flags;
++};
++
++/**
++ * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters
++ *
++ * @scaling_list_4x4: scaling list is used for the scaling process for
++ *		      transform coefficients. The values on each scaling
++ *		      list are expected in raster scan order
++ * @scaling_list_8x8: scaling list is used for the scaling process for
++ *		      transform coefficients. The values on each scaling
++ *		      list are expected in raster scan order
++ * @scaling_list_16x16:	scaling list is used for the scaling process for
++ *			transform coefficients. The values on each scaling
++ *			list are expected in raster scan order
++ * @scaling_list_32x32:	scaling list is used for the scaling process for
++ *			transform coefficients. The values on each scaling
++ *			list are expected in raster scan order
++ * @scaling_list_dc_coef_16x16:	scaling list is used for the scaling process
++ *				for transform coefficients. The values on each
++ *				scaling list are expected in raster scan order.
++ * @scaling_list_dc_coef_32x32:	scaling list is used for the scaling process
++ *				for transform coefficients. The values on each
++ *				scaling list are expected in raster scan order.
++ */
++struct v4l2_ctrl_hevc_scaling_matrix {
++	__u8	scaling_list_4x4[6][16];
++	__u8	scaling_list_8x8[6][64];
++	__u8	scaling_list_16x16[6][64];
++	__u8	scaling_list_32x32[2][64];
++	__u8	scaling_list_dc_coef_16x16[6];
++	__u8	scaling_list_dc_coef_32x32[2];
++};
++
++#endif
 diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c
 index 5af4b788d5..c7314a6af8 100644
 --- a/libavcodec/hevc_parser.c
@@ -19044,8 +19437,67 @@ index 5af4b788d5..c7314a6af8 100644
      if (ps->vps->vps_timing_info_present_flag) {
          num = ps->vps->vps_num_units_in_tick;
          den = ps->vps->vps_time_scale;
+diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
+index 4f6d985ae6..eefae71275 100644
+--- a/libavcodec/hevc_refs.c
++++ b/libavcodec/hevc_refs.c
+@@ -96,18 +96,22 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
+         if (!frame->rpl_buf)
+             goto fail;
+ 
+-        frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
+-        if (!frame->tab_mvf_buf)
+-            goto fail;
+-        frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
++        if (s->tab_mvf_pool) {
++            frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
++            if (!frame->tab_mvf_buf)
++                goto fail;
++            frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
++        }
+ 
+-        frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
+-        if (!frame->rpl_tab_buf)
+-            goto fail;
+-        frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
+-        frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
+-        for (j = 0; j < frame->ctb_count; j++)
+-            frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
++        if (s->rpl_tab_pool) {
++            frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
++            if (!frame->rpl_tab_buf)
++                goto fail;
++            frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
++            frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
++            for (j = 0; j < frame->ctb_count; j++)
++                frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
++        }
+ 
+         frame->frame->top_field_first  = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
+         frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
+@@ -276,14 +280,17 @@ static int init_slice_rpl(HEVCContext *s)
+     int ctb_count    = frame->ctb_count;
+     int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
+     int i;
++    RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
+ 
+     if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab))
+         return AVERROR_INVALIDDATA;
+ 
+-    for (i = ctb_addr_ts; i < ctb_count; i++)
+-        frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
++    if (frame->rpl_tab) {
++        for (i = ctb_addr_ts; i < ctb_count; i++)
++            frame->rpl_tab[i] = tab;
++    }
+ 
+-    frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
++    frame->refPicList = tab->refPicList;
+ 
+     return 0;
+ }
 diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
-index 1eaeaf72f1..b6871ff2e2 100644
+index 1eaeaf72f1..ffef145b15 100644
 --- a/libavcodec/hevcdec.c
 +++ b/libavcodec/hevcdec.c
 @@ -332,6 +332,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
@@ -19115,7 +19567,43 @@ index 1eaeaf72f1..b6871ff2e2 100644
  #endif
          break;
      case AV_PIX_FMT_YUV444P:
-@@ -3230,7 +3258,14 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
+@@ -459,6 +487,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps,
+     if (!sps)
+         return 0;
+ 
++    // If hwaccel then we don't need all the s/w decode helper arrays
++    if (s->avctx->hwaccel) {
++        export_stream_params(s, sps);
++
++        s->avctx->pix_fmt = pix_fmt;
++        s->ps.sps = sps;
++        s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
++        return 0;
++    }
++
+     ret = pic_arrays_init(s, sps);
+     if (ret < 0)
+         goto fail;
+@@ -2809,11 +2847,13 @@ static int hevc_frame_start(HEVCContext *s)
+                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
+     int ret;
+ 
+-    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
+-    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
+-    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
+-    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
+-    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
++    if (s->horizontal_bs) {
++        memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
++        memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
++        memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
++        memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
++        memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
++    }
+ 
+     s->is_decoded        = 0;
+     s->first_nal_type    = s->nal_unit_type;
+@@ -3230,7 +3270,14 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
      s->ref = NULL;
      ret    = decode_nal_units(s, avpkt->data, avpkt->size);
      if (ret < 0)
@@ -19130,7 +19618,35 @@ index 1eaeaf72f1..b6871ff2e2 100644
  
      if (avctx->hwaccel) {
          if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
-@@ -3585,6 +3620,15 @@ AVCodec ff_hevc_decoder = {
+@@ -3273,15 +3320,19 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
+     if (ret < 0)
+         return ret;
+ 
+-    dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
+-    if (!dst->tab_mvf_buf)
+-        goto fail;
+-    dst->tab_mvf = src->tab_mvf;
++    if (src->tab_mvf_buf) {
++        dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
++        if (!dst->tab_mvf_buf)
++            goto fail;
++        dst->tab_mvf = src->tab_mvf;
++    }
+ 
+-    dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
+-    if (!dst->rpl_tab_buf)
+-        goto fail;
+-    dst->rpl_tab = src->rpl_tab;
++    if (src->rpl_tab_buf) {
++        dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
++        if (!dst->rpl_tab_buf)
++            goto fail;
++        dst->rpl_tab = src->rpl_tab;
++    }
+ 
+     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
+     if (!dst->rpl_buf)
+@@ -3585,6 +3636,15 @@ AVCodec ff_hevc_decoder = {
  #if CONFIG_HEVC_NVDEC_HWACCEL
                                 HWACCEL_NVDEC(hevc),
  #endif
@@ -49502,7 +50018,7 @@ index 0000000000..85c5b46d75
 +};
 +
 diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 02f23d954b..6ca83cc21b 100644
+index 02f23d954b..7f82ca3fa0 100644
 --- a/libavcodec/v4l2_buffers.c
 +++ b/libavcodec/v4l2_buffers.c
 @@ -21,6 +21,7 @@
@@ -49513,9 +50029,11 @@ index 02f23d954b..6ca83cc21b 100644
  #include <linux/videodev2.h>
  #include <sys/ioctl.h>
  #include <sys/mman.h>
-@@ -30,56 +31,68 @@
+@@ -29,57 +30,82 @@
+ #include <poll.h>
  #include "libavcodec/avcodec.h"
  #include "libavcodec/internal.h"
++#include "libavutil/avassert.h"
  #include "libavutil/pixdesc.h"
 +#include "libavutil/hwcontext.h"
  #include "v4l2_context.h"
@@ -49557,21 +50075,32 @@ index 02f23d954b..6ca83cc21b 100644
  }
  
 -static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts)
-+static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
++static inline struct timeval tv_from_int(const int64_t t)
  {
 -    int64_t v4l2_pts;
--
++    return (struct timeval){
++        .tv_usec = t % USEC_PER_SEC,
++        .tv_sec  = t / USEC_PER_SEC
++    };
++}
+ 
 -    if (pts == AV_NOPTS_VALUE)
 -        pts = 0;
--
++static inline int64_t int_from_tv(const struct timeval t)
++{
++    return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec;
++}
+ 
++static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
++{
      /* convert pts to v4l2 timebase */
 -    v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
+-    out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
+-    out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
 +    const int64_t v4l2_pts =
-+        out->context->no_pts_rescale ? pts :
 +        pts == AV_NOPTS_VALUE ? 0 :
 +            av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
-     out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
-     out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
++    out->buf.timestamp = tv_from_int(v4l2_pts);
  }
  
 -static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf)
@@ -49579,18 +50108,20 @@ index 02f23d954b..6ca83cc21b 100644
  {
 -    int64_t v4l2_pts;
 -
++    const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp);
++    return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE;
++#if 0
      /* convert pts back to encoder timebase */
 -    v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
-+    const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
-                         avbuf->buf.timestamp.tv_usec;
- 
--    return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
+-                        avbuf->buf.timestamp.tv_usec;
 +    return
 +        avbuf->context->no_pts_rescale ? v4l2_pts :
 +        v4l2_pts == 0 ? AV_NOPTS_VALUE :
 +            av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
++#endif
 +}
-+
+ 
+-    return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
 +static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
 +{
 +    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
@@ -49603,15 +50134,17 @@ index 02f23d954b..6ca83cc21b 100644
  }
  
  static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
-@@ -116,6 +129,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
+@@ -116,49 +142,176 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
      return AVCOL_PRI_UNSPECIFIED;
  }
  
+-static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
 +static void v4l2_set_color(V4L2Buffer *buf,
 +                           const enum AVColorPrimaries avcp,
 +                           const enum AVColorSpace avcs,
 +                           const enum AVColorTransferCharacteristic avxc)
-+{
+ {
+-    enum v4l2_quantization qt;
 +    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
 +    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
 +    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
@@ -49647,7 +50180,10 @@ index 02f23d954b..6ca83cc21b 100644
 +    default:
 +        break;
 +    }
-+
+ 
+-    qt = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
+-        buf->context->format.fmt.pix_mp.quantization :
+-        buf->context->format.fmt.pix.quantization;
 +    switch (avcs) {
 +    case AVCOL_SPC_RGB:
 +        cs = V4L2_COLORSPACE_SRGB;
@@ -49677,7 +50213,10 @@ index 02f23d954b..6ca83cc21b 100644
 +    default:
 +        break;
 +    }
-+
+ 
+-    switch (qt) {
+-    case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG;
+-    case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG;
 +    switch (xfer) {
 +    case AVCOL_TRC_BT709:
 +        xfer = V4L2_XFER_FUNC_709;
@@ -49691,10 +50230,11 @@ index 02f23d954b..6ca83cc21b 100644
 +    case AVCOL_TRC_SMPTE2084:
 +        xfer = V4L2_XFER_FUNC_SMPTE2084;
 +        break;
-+    default:
-+        break;
-+    }
-+
+     default:
+         break;
+     }
+ 
+-     return AVCOL_RANGE_UNSPECIFIED;
 +    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
 +        buf->context->format.fmt.pix_mp.colorspace = cs;
 +        buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr;
@@ -49704,15 +50244,58 @@ index 02f23d954b..6ca83cc21b 100644
 +        buf->context->format.fmt.pix.ycbcr_enc = ycbcr;
 +        buf->context->format.fmt.pix.xfer_func = xfer;
 +    }
-+}
-+
- static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
- {
-     enum v4l2_quantization qt;
-@@ -134,6 +246,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
-      return AVCOL_RANGE_UNSPECIFIED;
  }
  
+-static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
++static inline enum v4l2_quantization
++buf_quantization(const V4L2Buffer * const buf)
+ {
+-    enum v4l2_ycbcr_encoding ycbcr;
+-    enum v4l2_colorspace cs;
++    return V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
++        buf->context->format.fmt.pix_mp.quantization :
++        buf->context->format.fmt.pix.quantization;
++}
+ 
+-    cs = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
++static inline enum v4l2_colorspace
++buf_colorspace(const V4L2Buffer * const buf)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
+         buf->context->format.fmt.pix_mp.colorspace :
+         buf->context->format.fmt.pix.colorspace;
++}
+ 
+-    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
++static inline enum v4l2_ycbcr_encoding
++buf_ycbcr_enc(const V4L2Buffer * const buf)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
+         buf->context->format.fmt.pix_mp.ycbcr_enc:
+         buf->context->format.fmt.pix.ycbcr_enc;
++}
+ 
+-    switch(cs) {
+-    case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB;
++static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
++{
++    switch (buf_quantization(buf)) {
++    case V4L2_QUANTIZATION_LIM_RANGE:
++        return AVCOL_RANGE_MPEG;
++    case V4L2_QUANTIZATION_FULL_RANGE:
++        return AVCOL_RANGE_JPEG;
++    case V4L2_QUANTIZATION_DEFAULT:
++        // If YUV (which we assume for all video decode) then, from the header
++        // comments, range is limited unless CS is JPEG
++        return buf_colorspace(buf) == V4L2_COLORSPACE_JPEG ?
++            AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
++    default:
++        break;
++    }
++
++     return AVCOL_RANGE_UNSPECIFIED;
++}
++
 +static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr)
 +{
 +    const enum v4l2_quantization q =
@@ -49727,32 +50310,64 @@ index 02f23d954b..6ca83cc21b 100644
 +    }
 +}
 +
- static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
++static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
++{
++    switch (buf_colorspace(buf)) {
++    case V4L2_COLORSPACE_JPEG:  // JPEG -> SRGB
++    case V4L2_COLORSPACE_SRGB:
++        return AVCOL_SPC_RGB;
+     case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709;
+     case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC;
+     case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG;
+     case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M;
+     case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M;
+     case V4L2_COLORSPACE_BT2020:
+-        if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM)
+-            return AVCOL_SPC_BT2020_CL;
+-        else
+-             return AVCOL_SPC_BT2020_NCL;
++        return buf_ycbcr_enc(buf) == V4L2_YCBCR_ENC_BT2020_CONST_LUM ?
++            AVCOL_SPC_BT2020_CL : AVCOL_SPC_BT2020_NCL;
+     default:
+         break;
+     }
+@@ -168,17 +321,9 @@ static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
+ 
+ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
  {
-     enum v4l2_ycbcr_encoding ycbcr;
-@@ -210,73 +336,165 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
+-    enum v4l2_ycbcr_encoding ycbcr;
++    const enum v4l2_ycbcr_encoding ycbcr = buf_ycbcr_enc(buf);
+     enum v4l2_xfer_func xfer;
+-    enum v4l2_colorspace cs;
+-
+-    cs = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
+-        buf->context->format.fmt.pix_mp.colorspace :
+-        buf->context->format.fmt.pix.colorspace;
+-
+-    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
+-        buf->context->format.fmt.pix_mp.ycbcr_enc:
+-        buf->context->format.fmt.pix.ycbcr_enc;
++    const enum v4l2_colorspace cs = buf_colorspace(buf);
+ 
+     xfer = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ?
+         buf->context->format.fmt.pix_mp.xfer_func:
+@@ -210,73 +355,165 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
      return AVCOL_TRC_UNSPECIFIED;
  }
  
 -static void v4l2_free_buffer(void *opaque, uint8_t *unused)
 +static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf)
-+{
-+    return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
-+}
-+
-+static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
  {
 -    V4L2Buffer* avbuf = opaque;
 -    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
-+    return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
++    return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
 +}
  
 -    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
 -        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
-+static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
 +{
-+    buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
-+        is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
++    return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
 +}
  
 -        if (s->reinit) {
@@ -49766,6 +50381,12 @@ index 02f23d954b..6ca83cc21b 100644
 -            else if (avbuf->context->streamon)
 -                ff_v4l2_buffer_enqueue(avbuf);
 -        }
++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
++{
++    buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
++        is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
++}
++
 +static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
 +{
 +    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
@@ -49791,7 +50412,8 @@ index 02f23d954b..6ca83cc21b 100644
 +        layer->nb_planes = 1;
 +
 +        break;
-+
+ 
+-        av_buffer_unref(&avbuf->context_ref);
 +    case AV_PIX_FMT_NV12:
 +    case AV_PIX_FMT_NV21:
 +
@@ -49810,8 +50432,7 @@ index 02f23d954b..6ca83cc21b 100644
 +        break;
 +
 +    case AV_PIX_FMT_YUV420P:
- 
--        av_buffer_unref(&avbuf->context_ref);
++
 +        layer->format = DRM_FORMAT_YUV420;
 +
 +        if (avbuf->num_planes > 1)
@@ -49863,7 +50484,7 @@ index 02f23d954b..6ca83cc21b 100644
  
 -    in->status = V4L2BUF_RET_USER;
 -    atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
-+        avbuf->status = V4L2BUF_AVAILABLE;
++        ff_v4l2_buffer_set_avail(avbuf);
  
 -    return 0;
 +        if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) {
@@ -49940,7 +50561,7 @@ index 02f23d954b..6ca83cc21b 100644
  
      if (plane >= out->num_planes)
          return AVERROR(EINVAL);
-@@ -284,32 +502,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i
+@@ -284,32 +521,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i
      length = out->plane_info[plane].length;
      bytesused = FFMIN(size+offset, length);
  
@@ -50013,7 +50634,7 @@ index 02f23d954b..6ca83cc21b 100644
      }
  
      /* fixup special cases */
-@@ -318,17 +561,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
+@@ -318,17 +580,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
      case AV_PIX_FMT_NV21:
          if (avbuf->num_planes > 1)
              break;
@@ -50037,7 +50658,7 @@ index 02f23d954b..6ca83cc21b 100644
          break;
  
      default:
-@@ -338,68 +581,95 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
+@@ -338,68 +600,127 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
      return 0;
  }
  
@@ -50059,6 +50680,38 @@ index 02f23d954b..6ca83cc21b 100644
 +{
 +    return i != 0  && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
 +}
++
++static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
++{
++    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
++
++    if (frame->format != AV_PIX_FMT_DRM_PRIME || !src)
++        return AVERROR(EINVAL);
++
++    av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF);
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
++        // Only currently cope with single buffer types
++        if (out->buf.length != 1)
++            return AVERROR_PATCHWELCOME;
++        if (src->nb_objects != 1)
++            return AVERROR(EINVAL);
++
++        out->planes[0].m.fd = src->objects[0].fd;
++    }
++    else {
++        if (src->nb_objects != 1)
++            return AVERROR(EINVAL);
++
++        out->buf.m.fd      = src->objects[0].fd;
++    }
++
++    // No need to copy src AVDescriptor and if we did then we may confuse
++    // fd close on free
++    out->ref_buf = av_buffer_ref(frame->buf[0]);
++
++    return 0;
++}
 +
  static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
  {
@@ -50187,10 +50840,16 @@ index 02f23d954b..6ca83cc21b 100644
      return 0;
  }
  
-@@ -411,7 +681,16 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+@@ -409,16 +730,31 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+  *
+  ******************************************************************************/
  
- int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts)
  {
+-    v4l2_set_pts(out, frame->pts);
+-
+-    return v4l2_buffer_swframe_to_buf(frame, out);
 +    out->buf.flags = frame->key_frame ?
 +        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
 +        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
@@ -50199,12 +50858,17 @@ index 02f23d954b..6ca83cc21b 100644
 +    v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
 +    v4l2_set_color_range(out, frame->color_range);
 +    // PTS & interlace are buffer vars
-     v4l2_set_pts(out, frame->pts);
++    if (track_ts)
++        out->buf.timestamp = tv_from_int(track_ts);
++    else
++        v4l2_set_pts(out, frame->pts);
 +    v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first);
- 
-     return v4l2_buffer_swframe_to_buf(frame, out);
++
++    return frame->format == AV_PIX_FMT_DRM_PRIME ?
++        v4l2_buffer_primeframe_to_buf(frame, out) :
++        v4l2_buffer_swframe_to_buf(frame, out);
  }
-@@ -419,6 +698,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+ 
  int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
  {
      int ret;
@@ -50212,7 +50876,7 @@ index 02f23d954b..6ca83cc21b 100644
  
      av_frame_unref(frame);
  
-@@ -429,17 +709,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
+@@ -429,17 +765,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
  
      /* 2. get frame information */
      frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME);
@@ -50248,7 +50912,7 @@ index 02f23d954b..6ca83cc21b 100644
  
      /* 3. report errors upstream */
      if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) {
-@@ -452,15 +747,14 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
+@@ -452,15 +803,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
  
  int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
  {
@@ -50266,16 +50930,18 @@ index 02f23d954b..6ca83cc21b 100644
      pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
 -    pkt->data = pkt->buf->data;
 +    pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
++    pkt->flags = 0;
  
      if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
          pkt->flags |= AV_PKT_FLAG_KEY;
-@@ -475,31 +769,85 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
+@@ -475,31 +826,91 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
      return 0;
  }
  
 -int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
-+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-+                                    const void *extdata, size_t extlen)
++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
++                                    const void *extdata, size_t extlen,
++                                    const int64_t timestamp)
  {
      int ret;
  
@@ -50291,7 +50957,11 @@ index 02f23d954b..6ca83cc21b 100644
 +    if (ret && ret != AVERROR(ENOMEM))
          return ret;
  
-     v4l2_set_pts(out, pkt->pts);
+-    v4l2_set_pts(out, pkt->pts);
++    if (timestamp)
++        out->buf.timestamp = tv_from_int(timestamp);
++    else
++        v4l2_set_pts(out, pkt->pts);
  
 -    if (pkt->flags & AV_PKT_FLAG_KEY)
 -        out->flags = V4L2_BUF_FLAG_KEYFRAME;
@@ -50301,12 +50971,11 @@ index 02f23d954b..6ca83cc21b 100644
  
 -    return 0;
 +    return ret;
- }
- 
--int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
++}
++
 +int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
 +{
-+    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0);
++    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
 +}
 +
 +
@@ -50326,19 +50995,23 @@ index 02f23d954b..6ca83cc21b 100644
 +            close(avbuf->drm_frame.objects[i].fd);
 +    }
 +
++    av_buffer_unref(&avbuf->ref_buf);
++
 +    ff_weak_link_unref(&avbuf->context_wl);
 +
 +    av_free(avbuf);
-+}
+ }
+ 
+-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
 +
-+
-+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx)
++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem)
  {
 -    V4L2Context *ctx = avbuf->context;
      int ret, i;
 +    V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
 +    AVBufferRef * bufref;
-+
+ 
+-    avbuf->buf.memory = V4L2_MEMORY_MMAP;
 +    *pbufref = NULL;
 +    if (avbuf == NULL)
 +        return AVERROR(ENOMEM);
@@ -50348,9 +51021,9 @@ index 02f23d954b..6ca83cc21b 100644
 +        av_free(avbuf);
 +        return AVERROR(ENOMEM);
 +    }
- 
++
 +    avbuf->context = ctx;
-     avbuf->buf.memory = V4L2_MEMORY_MMAP;
++    avbuf->buf.memory = mem;
      avbuf->buf.type = ctx->type;
      avbuf->buf.index = index;
  
@@ -50363,7 +51036,7 @@ index 02f23d954b..6ca83cc21b 100644
      if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
          avbuf->buf.length = VIDEO_MAX_PLANES;
          avbuf->buf.m.planes = avbuf->planes;
-@@ -507,7 +855,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
+@@ -507,7 +918,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
  
      ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
      if (ret < 0)
@@ -50372,7 +51045,16 @@ index 02f23d954b..6ca83cc21b 100644
  
      if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
          avbuf->num_planes = 0;
-@@ -527,25 +875,33 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
+@@ -520,6 +931,8 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
+         avbuf->num_planes = 1;
+ 
+     for (i = 0; i < avbuf->num_planes; i++) {
++        const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
++            (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
+ 
+         avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
+             ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline :
+@@ -527,25 +940,29 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
  
          if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
              avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
@@ -50380,24 +51062,20 @@ index 02f23d954b..6ca83cc21b 100644
 -                                           PROT_READ | PROT_WRITE, MAP_SHARED,
 -                                           buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
 +
-+            if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) ||
-+                !buf_to_m2mctx(avbuf)->output_drm) {
++            if (want_mmap)
 +                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
 +                                               PROT_READ | PROT_WRITE, MAP_SHARED,
 +                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
-+            }
          } else {
              avbuf->plane_info[i].length = avbuf->buf.length;
 -            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
 -                                          PROT_READ | PROT_WRITE, MAP_SHARED,
 -                                          buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
 +
-+            if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) ||
-+                !buf_to_m2mctx(avbuf)->output_drm) {
++            if (want_mmap)
 +                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
 +                                               PROT_READ | PROT_WRITE, MAP_SHARED,
 +                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
-+            }
          }
  
 -        if (avbuf->plane_info[i].mm_addr == MAP_FAILED)
@@ -50417,7 +51095,7 @@ index 02f23d954b..6ca83cc21b 100644
      if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
          avbuf->buf.m.planes = avbuf->planes;
          avbuf->buf.length   = avbuf->num_planes;
-@@ -555,20 +911,51 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
+@@ -555,20 +972,51 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
          avbuf->buf.length    = avbuf->planes[0].length;
      }
  
@@ -50474,10 +51152,10 @@ index 02f23d954b..6ca83cc21b 100644
      return 0;
  }
 diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
-index 8dbc7fc104..7d5fadcd3d 100644
+index 8dbc7fc104..e64441ec9b 100644
 --- a/libavcodec/v4l2_buffers.h
 +++ b/libavcodec/v4l2_buffers.h
-@@ -27,25 +27,34 @@
+@@ -27,25 +27,38 @@
  #include <stdatomic.h>
  #include <linux/videodev2.h>
  
@@ -50514,10 +51192,14 @@ index 8dbc7fc104..7d5fadcd3d 100644
 -    atomic_uint context_refcount;
 +    /* DRM descriptor */
 +    AVDRMFrameDescriptor drm_frame;
++    /* For DRM_PRIME encode - need to keep a ref to the source buffer till we
++     * are done
++     */
++    AVBufferRef * ref_buf;
  
      /* keep track of the mmap address and mmap length */
      struct V4L2Plane_info {
-@@ -60,7 +69,6 @@ typedef struct V4L2Buffer {
+@@ -60,7 +73,6 @@ typedef struct V4L2Buffer {
      struct v4l2_buffer buf;
      struct v4l2_plane planes[VIDEO_MAX_PLANES];
  
@@ -50525,27 +51207,50 @@ index 8dbc7fc104..7d5fadcd3d 100644
      enum V4L2Buffer_status status;
  
  } V4L2Buffer;
-@@ -98,6 +106,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);
+@@ -98,6 +110,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);
   */
  int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
  
-+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-+                                    const void *extdata, size_t extlen);
++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
++                                    const void *extdata, size_t extlen,
++                                    const int64_t timestamp);
 +
  /**
   * Extracts the data from an AVFrame to a V4L2Buffer
   *
-@@ -116,7 +127,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
+@@ -106,7 +122,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
+  *
+  * @returns 0 in case of success, a negative AVERROR code otherwise
+  */
+-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts);
+ 
+ /**
+  * Initializes a V4L2Buffer
+@@ -116,7 +132,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
   *
   * @returns 0 in case of success, a negative AVERROR code otherwise
   */
 -int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
-+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx);
++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem);
  
  /**
   * Enqueues a V4L2Buffer
+@@ -127,5 +143,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
+  */
+ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf);
+ 
++static inline void
++ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf)
++{
++    avbuf->status = V4L2BUF_AVAILABLE;
++    av_buffer_unref(&avbuf->ref_buf);
++}
++
+ 
+ #endif // AVCODEC_V4L2_BUFFERS_H
 diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 29b144ed73..077c5223af 100644
+index 29b144ed73..c94b09b60f 100644
 --- a/libavcodec/v4l2_context.c
 +++ b/libavcodec/v4l2_context.c
 @@ -27,11 +27,13 @@
@@ -50562,49 +51267,191 @@ index 29b144ed73..077c5223af 100644
  
  struct v4l2_format_update {
      uint32_t v4l2_fmt;
-@@ -41,28 +43,18 @@ struct v4l2_format_update {
+@@ -41,26 +43,168 @@ struct v4l2_format_update {
      int update_avfmt;
  };
  
 -static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx)
-+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
++
++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
  {
-     return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
-         container_of(ctx, V4L2m2mContext, output) :
-         container_of(ctx, V4L2m2mContext, capture);
+-    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
+-        container_of(ctx, V4L2m2mContext, output) :
+-        container_of(ctx, V4L2m2mContext, capture);
++    return (int64_t)n;
  }
  
 -static inline AVCodecContext *logger(V4L2Context *ctx)
-+static inline AVCodecContext *logger(const V4L2Context *ctx)
++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
  {
-     return ctx_to_m2mctx(ctx)->avctx;
+-    return ctx_to_m2mctx(ctx)->avctx;
++    return (unsigned int)pts;
++}
++
++// FFmpeg requires us to propagate a number of vars from the coded pkt into
++// the decoded frame. The only thing that tracks like that in V4L2 stateful
++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
++// guarantees about PTS being unique or specified for every frame so replace
++// the supplied PTS with a simple incrementing number and keep a circular
++// buffer of all the things we want preserved (including the original PTS)
++// indexed by the tracking no.
++static int64_t
++xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt)
++{
++    int64_t track_pts;
++
++    // Avoid 0
++    if (++x->track_no == 0)
++        x->track_no = 1;
++
++    track_pts = track_to_pts(avctx, x->track_no);
++
++    av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
++    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
++        .discard          = 0,
++        .pending          = 1,
++        .pkt_size         = avpkt->size,
++        .pts              = avpkt->pts,
++        .dts              = avpkt->dts,
++        .reordered_opaque = avctx->reordered_opaque,
++        .pkt_pos          = avpkt->pos,
++        .pkt_duration     = avpkt->duration,
++        .track_pts        = track_pts
++    };
++    return track_pts;
++}
++
++static int64_t
++xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame)
++{
++    int64_t track_pts;
++
++    // Avoid 0
++    if (++x->track_no == 0)
++        x->track_no = 1;
++
++    track_pts = track_to_pts(avctx, x->track_no);
++
++    av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no);
++    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
++        .discard          = 0,
++        .pending          = 1,
++        .pkt_size         = 0,
++        .pts              = frame->pts,
++        .dts              = AV_NOPTS_VALUE,
++        .reordered_opaque = frame->reordered_opaque,
++        .pkt_pos          = frame->pkt_pos,
++        .pkt_duration     = frame->pkt_duration,
++        .track_pts        = track_pts
++    };
++    return track_pts;
++}
++
++
++// Returns -1 if we should discard the frame
++static int
++xlat_pts_frame_out(AVCodecContext *const avctx,
++             xlat_track_t * const x,
++             AVFrame *const frame)
++{
++    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
++    V4L2m2mTrackEl *const t = x->track_els + n;
++    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
++    {
++        av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
++               "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
++        frame->pts              = AV_NOPTS_VALUE;
++        frame->pkt_dts          = AV_NOPTS_VALUE;
++        frame->reordered_opaque = x->last_opaque;
++        frame->pkt_pos          = -1;
++        frame->pkt_duration     = 0;
++        frame->pkt_size         = -1;
++    }
++    else if (!t->discard)
++    {
++        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
++        frame->pkt_dts          = t->dts;
++        frame->reordered_opaque = t->reordered_opaque;
++        frame->pkt_pos          = t->pkt_pos;
++        frame->pkt_duration     = t->pkt_duration;
++        frame->pkt_size         = t->pkt_size;
++
++        x->last_opaque = x->track_els[n].reordered_opaque;
++        if (frame->pts != AV_NOPTS_VALUE)
++            x->last_pts = frame->pts;
++        t->pending = 0;
++    }
++    else
++    {
++        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
++        return -1;
++    }
++
++    av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
++           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
++    return 0;
  }
  
 -static inline unsigned int v4l2_get_width(struct v4l2_format *fmt)
--{
--    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
--}
--
--static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
--{
--    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
--}
--
- static AVRational v4l2_get_sar(V4L2Context *ctx)
++// Returns -1 if we should discard the frame
++static int
++xlat_pts_pkt_out(AVCodecContext *const avctx,
++             xlat_track_t * const x,
++             AVPacket *const pkt)
  {
-     struct AVRational sar = { 0, 1 };
-@@ -81,21 +73,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx)
+-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
++    unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE;
++    V4L2m2mTrackEl *const t = x->track_els + n;
++    if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts)
++    {
++        av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
++               "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
++        pkt->pts                = AV_NOPTS_VALUE;
++    }
++    else if (!t->discard)
++    {
++        pkt->pts                = t->pending ? t->pts : AV_NOPTS_VALUE;
++
++        x->last_opaque = x->track_els[n].reordered_opaque;
++        if (pkt->pts != AV_NOPTS_VALUE)
++            x->last_pts = pkt->pts;
++        t->pending = 0;
++    }
++    else
++    {
++        av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
++        return -1;
++    }
++
++    // * Would like something much better than this...xlat(offset + out_count)?
++    pkt->dts = pkt->pts;
++    av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n",
++           pkt->pts, t->track_pts, n);
++    return 0;
+ }
+ 
+-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
++
++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
+ {
+-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
++    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
++        container_of(ctx, V4L2m2mContext, output) :
++        container_of(ctx, V4L2m2mContext, capture);
++}
++
++static inline AVCodecContext *logger(const V4L2Context *ctx)
++{
++    return ctx_to_m2mctx(ctx)->avctx;
+ }
+ 
+ static AVRational v4l2_get_sar(V4L2Context *ctx)
+@@ -81,21 +225,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx)
      return sar;
  }
  
 -static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2)
 +static inline int ctx_buffers_alloced(const V4L2Context * const ctx)
-+{
-+    return ctx->bufrefs != NULL;
-+}
-+
-+// Width/Height changed or we don't have an alloc in the first place?
-+static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2)
  {
 -    struct v4l2_format *fmt1 = &ctx->format;
 -    int ret =  V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
@@ -50613,6 +51460,12 @@ index 29b144ed73..077c5223af 100644
 -        :
 -        fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
 -        fmt1->fmt.pix.height != fmt2->fmt.pix.height;
++    return ctx->bufrefs != NULL;
++}
++
++// Width/Height changed or we don't have an alloc in the first place?
++static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2)
++{
 +    const struct v4l2_format *fmt1 = &ctx->format;
 +    int ret = !ctx_buffers_alloced(ctx) ||
 +        (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
@@ -50634,7 +51487,7 @@ index 29b144ed73..077c5223af 100644
  
      return ret;
  }
-@@ -153,90 +153,110 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd
+@@ -153,90 +305,110 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd
      }
  }
  
@@ -50799,7 +51652,7 @@ index 29b144ed73..077c5223af 100644
      return 1;
  }
  
-@@ -280,171 +300,275 @@ static int v4l2_stop_encode(V4L2Context *ctx)
+@@ -280,171 +452,282 @@ static int v4l2_stop_encode(V4L2Context *ctx)
      return 0;
  }
  
@@ -50885,16 +51738,18 @@ index 29b144ed73..077c5223af 100644
          }
 -        ctx->done = 1;
 -        return NULL;
-+    }
+     }
 +    atomic_fetch_sub(&ctx->q_count, 1);
 +
 +    avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
-+    avbuf->status = V4L2BUF_AVAILABLE;
++    ff_v4l2_buffer_set_avail(avbuf);
 +    avbuf->buf = buf;
 +    if (is_mp) {
 +        memcpy(avbuf->planes, planes, sizeof(planes));
 +        avbuf->buf.m.planes = avbuf->planes;
-     }
++    }
++    // Done with any attached buffer
++    av_buffer_unref(&avbuf->ref_buf);
  
 -start:
 -    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
@@ -51129,13 +51984,6 @@ index 29b144ed73..077c5223af 100644
 -                ctx->done = 1;
 -#endif
 +            continue;
-+        }
-+
-+        if ((pfd.revents & poll_cap) != 0) {
-+            ret = dq_buf(ctx, ppavbuf);
-+            if (ret == AVERROR(EPIPE))
-+                continue;
-+            return ret;
          }
  
 -        avbuf = &ctx->buffers[buf.index];
@@ -51144,6 +51992,13 @@ index 29b144ed73..077c5223af 100644
 -        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
 -            memcpy(avbuf->planes, planes, sizeof(planes));
 -            avbuf->buf.m.planes = avbuf->planes;
++        if ((pfd.revents & poll_cap) != 0) {
++            ret = dq_buf(ctx, ppavbuf);
++            if (ret == AVERROR(EPIPE))
++                continue;
++            return ret;
++        }
++
 +        if ((pfd.revents & poll_out) != 0) {
 +            if (is_cap)
 +                return AVERROR(EAGAIN);
@@ -51171,6 +52026,15 @@ index 29b144ed73..077c5223af 100644
 +    buf->sequence = 0;
 +
 +    return avbuf;
++}
++
++void
++ff_v4l2_dq_all(V4L2Context *const ctx)
++{
++    V4L2Buffer * avbuf;
++    do {
++        get_qbuf(ctx, &avbuf, 0);
++    } while (avbuf);
  }
  
  static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
@@ -51179,14 +52043,12 @@ index 29b144ed73..077c5223af 100644
      int i;
  
      /* get back as many output buffers as possible */
-     if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+-    if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
 -          do {
 -          } while (v4l2_dequeue_v4l2buf(ctx, timeout));
-+        V4L2Buffer * avbuf;
-+        do {
-+            get_qbuf(ctx, &avbuf, 0);
-+        } while (avbuf);
-     }
+-    }
++    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
++        ff_v4l2_dq_all(ctx);
  
      for (i = 0; i < ctx->num_buffers; i++) {
 -        if (ctx->buffers[i].status == V4L2BUF_AVAILABLE)
@@ -51197,7 +52059,7 @@ index 29b144ed73..077c5223af 100644
      }
  
      return NULL;
-@@ -452,25 +576,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
+@@ -452,25 +735,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
  
  static int v4l2_release_buffers(V4L2Context* ctx)
  {
@@ -51257,7 +52119,7 @@ index 29b144ed73..077c5223af 100644
  }
  
  static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
-@@ -499,6 +643,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm
+@@ -499,6 +802,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm
  
  static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
  {
@@ -51266,7 +52128,7 @@ index 29b144ed73..077c5223af 100644
      enum AVPixelFormat pixfmt = ctx->av_pix_fmt;
      struct v4l2_fmtdesc fdesc;
      int ret;
-@@ -517,6 +663,13 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
+@@ -517,6 +822,13 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
          if (ret)
              return AVERROR(EINVAL);
  
@@ -51280,7 +52142,7 @@ index 29b144ed73..077c5223af 100644
          pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
          ret = v4l2_try_raw_format(ctx, pixfmt);
          if (ret){
-@@ -569,18 +722,83 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p)
+@@ -569,30 +881,99 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p)
    *
    *****************************************************************************/
  
@@ -51295,7 +52157,7 @@ index 29b144ed73..077c5223af 100644
 +    for (i = 0; i < ctx->num_buffers; ++i) {
 +        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
 +        if (buf->status == V4L2BUF_IN_DRIVER)
-+            buf->status = V4L2BUF_AVAILABLE;
++            ff_v4l2_buffer_set_avail(buf);
 +    }
 +    atomic_store(&ctx->q_count, 0);
 +}
@@ -51355,6 +52217,8 @@ index 29b144ed73..077c5223af 100644
 +    {
 +        if (cmd == VIDIOC_STREAMOFF)
 +            flush_all_buffers_status(ctx);
++        else
++            ctx->first_buf = 1;
 +
 +        ctx->streamon = (cmd == VIDIOC_STREAMON);
 +        av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name,
@@ -51370,7 +52234,33 @@ index 29b144ed73..077c5223af 100644
  }
  
  int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
-@@ -608,7 +826,8 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
+ {
+-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    V4L2m2mContext *const s = ctx_to_m2mctx(ctx);
++    AVCodecContext *const avctx = s->avctx;
++    int64_t track_ts;
+     V4L2Buffer* avbuf;
+     int ret;
+ 
+     if (!frame) {
+         ret = v4l2_stop_encode(ctx);
+         if (ret)
+-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
++            av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
+         s->draining= 1;
+         return 0;
+     }
+@@ -601,23 +982,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
+     if (!avbuf)
+         return AVERROR(ENOMEM);
+ 
+-    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf);
++    track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame);
++
++    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts);
+     if (ret)
+         return ret;
+ 
      return ff_v4l2_buffer_enqueue(avbuf);
  }
  
@@ -51379,25 +52269,29 @@ index 29b144ed73..077c5223af 100644
 +                                   const void * extdata, size_t extlen)
  {
      V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    AVCodecContext *const avctx = s->avctx;
      V4L2Buffer* avbuf;
-@@ -616,8 +835,9 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
+     int ret;
++    int64_t track_ts;
  
      if (!pkt->size) {
          ret = v4l2_stop_decode(ctx);
 +        // Log but otherwise ignore stop failure
          if (ret)
 -            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name);
-+            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
++            av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
          s->draining = 1;
          return 0;
      }
-@@ -626,8 +846,11 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
+@@ -626,8 +1013,13 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
      if (!avbuf)
          return AVERROR(EAGAIN);
  
 -    ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf);
 -    if (ret)
-+    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen);
++    track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt);
++
++    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts);
 +    if (ret == AVERROR(ENOMEM))
 +        av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
 +               __func__, pkt->size, avbuf->planes[0].length);
@@ -51405,9 +52299,12 @@ index 29b144ed73..077c5223af 100644
          return ret;
  
      return ff_v4l2_buffer_enqueue(avbuf);
-@@ -636,19 +859,10 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
+@@ -635,42 +1027,36 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
+ 
  int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
  {
++    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    AVCodecContext *const avctx = s->avctx;
      V4L2Buffer *avbuf;
 +    int rv;
  
@@ -51420,17 +52317,24 @@ index 29b144ed73..077c5223af 100644
 -    if (!avbuf) {
 -        if (ctx->done)
 -            return AVERROR_EOF;
--
++    do {
++        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
++            return rv;
++        if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0)
++            return rv;
++    } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0);
+ 
 -        return AVERROR(EAGAIN);
 -    }
-+    if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
-+        return rv;
- 
-     return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
+-
+-    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
++   return 0;
  }
-@@ -656,19 +870,10 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
+ 
  int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
  {
++    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    AVCodecContext *const avctx = s->avctx;
      V4L2Buffer *avbuf;
 +    int rv;
  
@@ -51443,15 +52347,22 @@ index 29b144ed73..077c5223af 100644
 -    if (!avbuf) {
 -        if (ctx->done)
 -            return AVERROR_EOF;
--
++    do {
++        if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0)
++            return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
++        if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0)
++            return rv;
++    } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0);
+ 
 -        return AVERROR(EAGAIN);
 -    }
-+    if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0)
-+        return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
- 
-     return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
+-
+-    return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
++    return 0;
  }
-@@ -702,78 +907,160 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
+ 
+ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
+@@ -702,78 +1088,179 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
  
  int ff_v4l2_context_set_format(V4L2Context* ctx)
  {
@@ -51503,7 +52414,7 @@ index 29b144ed73..077c5223af 100644
  
 -int ff_v4l2_context_init(V4L2Context* ctx)
 +
-+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers)
++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem)
  {
 -    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
 +    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
@@ -51524,8 +52435,9 @@ index 29b144ed73..077c5223af 100644
  
      memset(&req, 0, sizeof(req));
 -    req.count = ctx->num_buffers;
+-    req.memory = V4L2_MEMORY_MMAP;
 +    req.count = req_buffers;
-     req.memory = V4L2_MEMORY_MMAP;
++    req.memory = mem;
      req.type = ctx->type;
 -    ret = ioctl(s->fd, VIDIOC_REQBUFS, &req);
 -    if (ret < 0) {
@@ -51560,7 +52472,7 @@ index 29b144ed73..077c5223af 100644
 +    }
 +
 +    for (i = 0; i < ctx->num_buffers; i++) {
-+        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx);
++        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem);
 +        if (ret) {
              av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
 -            goto error;
@@ -51589,13 +52501,13 @@ index 29b144ed73..077c5223af 100644
 +
 +int ff_v4l2_context_init(V4L2Context* ctx)
 +{
++    struct v4l2_queryctrl qctrl;
 +    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
 +    int ret;
 +
 +    // It is not valid to reinit a context without a previous release
 +    av_assert0(ctx->bufrefs == NULL);
- 
--    av_freep(&ctx->buffers);
++
 +    if (!v4l2_type_supported(ctx)) {
 +        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
 +        return AVERROR_PATCHWELCOME;
@@ -51604,7 +52516,8 @@ index 29b144ed73..077c5223af 100644
 +    ff_mutex_init(&ctx->lock, NULL);
 +    pthread_cond_init(&ctx->cond, NULL);
 +    atomic_init(&ctx->q_count, 0);
-+
+ 
+-    av_freep(&ctx->buffers);
 +    if (s->output_drm) {
 +        AVHWFramesContext *hwframes;
 +
@@ -51631,7 +52544,25 @@ index 29b144ed73..077c5223af 100644
 +        goto fail_unref_hwframes;
 +    }
 +
-+    ret = create_buffers(ctx, ctx->num_buffers);
++    memset(&qctrl, 0, sizeof(qctrl));
++    qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT;
++    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) {
++        ret = AVERROR(errno);
++        if (ret != AVERROR(EINVAL)) {
++            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret));
++            goto fail_unref_hwframes;
++        }
++        // Control unsupported - set default if wanted
++        if (ctx->num_buffers < 2)
++            ctx->num_buffers = 4;
++    }
++    else {
++        if (ctx->num_buffers < 2)
++            ctx->num_buffers = qctrl.minimum + 2;
++        ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum);
++    }
++
++    ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
 +    if (ret < 0)
 +        goto fail_unref_hwframes;
 +
@@ -51644,7 +52575,7 @@ index 29b144ed73..077c5223af 100644
      return ret;
  }
 diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 22a9532444..a56216e990 100644
+index 22a9532444..311b6f10a4 100644
 --- a/libavcodec/v4l2_context.h
 +++ b/libavcodec/v4l2_context.h
 @@ -31,6 +31,7 @@
@@ -51655,7 +52586,7 @@ index 22a9532444..a56216e990 100644
  #include "v4l2_buffers.h"
  
  typedef struct V4L2Context {
-@@ -70,11 +71,18 @@ typedef struct V4L2Context {
+@@ -70,28 +71,57 @@ typedef struct V4L2Context {
       */
      int width, height;
      AVRational sample_aspect_ratio;
@@ -51676,18 +52607,35 @@ index 22a9532444..a56216e990 100644
  
      /**
       * Readonly after init.
-@@ -92,6 +100,21 @@ typedef struct V4L2Context {
+      */
+     int num_buffers;
+ 
++    /**
++     * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF
++     */
++    enum v4l2_memory buf_mem;
++
+     /**
+      * Whether the stream has been started (VIDIOC_STREAMON has been sent).
+      */
+     int streamon;
+ 
++    /* 1st buffer after stream on */
++    int first_buf;
++
+     /**
+      *  Either no more buffers available or an unrecoverable error was notified
+      *  by the V4L2 kernel driver: once set the context has to be exited.
       */
      int done;
  
 +    int flag_last;
 +
 +    /**
-+     * PTS rescale not wanted
-+     * If the PTS is just a dummy frame count then rescale is
-+     * actively harmful
++     * If NZ then when Qing frame/pkt use this rather than the
++     * "real" PTS
 +     */
-+    int no_pts_rescale;
++    uint64_t track_ts;
 +
 +    AVBufferRef *frames_ref;
 +    atomic_int q_count;
@@ -51698,7 +52646,7 @@ index 22a9532444..a56216e990 100644
  } V4L2Context;
  
  /**
-@@ -156,7 +179,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
+@@ -156,7 +186,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
   * @param[in] ctx The V4L2Context to dequeue from.
   * @param[inout] f The AVFrame to dequeue to.
   * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
@@ -51709,7 +52657,7 @@ index 22a9532444..a56216e990 100644
   */
  int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
  
-@@ -170,7 +196,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
+@@ -170,7 +203,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
   * @param[in] pkt A pointer to an AVPacket.
   * @return 0 in case of success, a negative error otherwise.
   */
@@ -51718,11 +52666,43 @@ index 22a9532444..a56216e990 100644
  
  /**
   * Enqueues a buffer to a V4L2Context from an AVFrame
+@@ -183,4 +216,6 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
+  */
+ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
+ 
++void ff_v4l2_dq_all(V4L2Context *const ctx);
++
+ #endif // AVCODEC_V4L2_CONTEXT_H
 diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index e48b3a8ccf..32288e5a99 100644
+index e48b3a8ccf..2a9fe32776 100644
 --- a/libavcodec/v4l2_m2m.c
 +++ b/libavcodec/v4l2_m2m.c
-@@ -215,13 +215,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
+@@ -36,6 +36,14 @@
+ #include "v4l2_fmt.h"
+ #include "v4l2_m2m.h"
+ 
++static void
++xlat_init(xlat_track_t * const x)
++{
++    memset(x, 0, sizeof(*x));
++    x->last_pts = AV_NOPTS_VALUE;
++}
++
++
+ static inline int v4l2_splane_video(struct v4l2_capability *cap)
+ {
+     if (cap->capabilities & (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT) &&
+@@ -68,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
+ 
+     s->capture.done = s->output.done = 0;
+     s->capture.name = "capture";
++    s->capture.buf_mem = V4L2_MEMORY_MMAP;
+     s->output.name = "output";
++    s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
+     atomic_init(&s->refcount, 0);
+     sem_init(&s->refsync, 0, 0);
+ 
+@@ -215,13 +225,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
          av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
  
      /* 2. unmap the capture buffers (v4l2 and ffmpeg):
@@ -51736,7 +52716,7 @@ index e48b3a8ccf..32288e5a99 100644
      ff_v4l2_context_release(&s->capture);
  
      /* 3. get the new capture format */
-@@ -240,7 +234,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
+@@ -240,7 +244,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
  
      /* 5. complete reinit */
      s->draining = 0;
@@ -51744,7 +52724,7 @@ index e48b3a8ccf..32288e5a99 100644
  
      return 0;
  }
-@@ -274,7 +267,6 @@ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *s)
+@@ -274,7 +277,6 @@ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *s)
  
      /* start again now that we know the stream dimensions */
      s->draining = 0;
@@ -51752,7 +52732,7 @@ index e48b3a8ccf..32288e5a99 100644
  
      ret = ff_v4l2_context_get_format(&s->output, 0);
      if (ret) {
-@@ -328,7 +320,13 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
+@@ -328,7 +330,13 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
      ff_v4l2_context_release(&s->capture);
      sem_destroy(&s->refsync);
  
@@ -51767,7 +52747,7 @@ index e48b3a8ccf..32288e5a99 100644
  
      av_free(s);
  }
-@@ -338,17 +336,34 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
+@@ -338,17 +346,34 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
      V4L2m2mContext *s = priv->context;
      int ret;
  
@@ -51808,8 +52788,53 @@ index e48b3a8ccf..32288e5a99 100644
      av_buffer_unref(&priv->context_ref);
  
      return 0;
+@@ -392,28 +417,33 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *priv)
+     return v4l2_configure_contexts(s);
+ }
+ 
+-int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
++int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps)
+ {
+-    *s = av_mallocz(sizeof(V4L2m2mContext));
+-    if (!*s)
++    V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext));
++
++    *pps = NULL;
++    if (!s)
+         return AVERROR(ENOMEM);
+ 
+-    priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext),
++    priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s),
+                                          &v4l2_m2m_destroy_context, NULL, 0);
+     if (!priv->context_ref) {
+-        av_freep(s);
++        av_free(s);
+         return AVERROR(ENOMEM);
+     }
+ 
+     /* assign the context */
+-    priv->context = *s;
+-    (*s)->priv = priv;
++    priv->context = s;
++    s->priv = priv;
+ 
+     /* populate it */
+-    priv->context->capture.num_buffers = priv->num_capture_buffers;
+-    priv->context->output.num_buffers  = priv->num_output_buffers;
+-    priv->context->self_ref = priv->context_ref;
+-    priv->context->fd = -1;
++    s->capture.num_buffers = priv->num_capture_buffers;
++    s->output.num_buffers  = priv->num_output_buffers;
++    s->self_ref = priv->context_ref;
++    s->fd = -1;
++
++    xlat_init(&s->xlat);
+ 
++    *pps = s;
+     return 0;
+ }
 diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 456281f48c..8544b528e1 100644
+index 456281f48c..ea39b0a757 100644
 --- a/libavcodec/v4l2_m2m.h
 +++ b/libavcodec/v4l2_m2m.h
 @@ -30,6 +30,7 @@
@@ -51820,7 +52845,7 @@ index 456281f48c..8544b528e1 100644
  #include "v4l2_context.h"
  
  #define container_of(ptr, type, member) ({ \
-@@ -38,7 +39,38 @@
+@@ -38,7 +39,37 @@
  
  #define V4L_M2M_DEFAULT_OPTS \
      { "num_output_buffers", "Number of buffers in the output context",\
@@ -51853,14 +52878,13 @@ index 456281f48c..8544b528e1 100644
 +typedef struct xlat_track_s {
 +    unsigned int track_no;
 +    int64_t last_pts;
-+    int64_t last_pkt_dts;
 +    int64_t last_opaque;
 +    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
 +} xlat_track_t;
  
  typedef struct V4L2m2mContext {
      char devname[PATH_MAX];
-@@ -52,7 +84,6 @@ typedef struct V4L2m2mContext {
+@@ -52,7 +83,6 @@ typedef struct V4L2m2mContext {
      AVCodecContext *avctx;
      sem_t refsync;
      atomic_uint refcount;
@@ -51868,7 +52892,7 @@ index 456281f48c..8544b528e1 100644
  
      /* null frame/packet received */
      int draining;
-@@ -63,6 +94,33 @@ typedef struct V4L2m2mContext {
+@@ -63,6 +93,36 @@ typedef struct V4L2m2mContext {
  
      /* reference back to V4L2m2mPriv */
      void *priv;
@@ -51878,6 +52902,9 @@ index 456281f48c..8544b528e1 100644
 +    /* generate DRM frames */
 +    int output_drm;
 +
++    /* input frames are drmprime */
++    int input_drm;
++
 +    /* Frame tracking */
 +    xlat_track_t xlat;
 +    int pending_hw;
@@ -51891,7 +52918,7 @@ index 456281f48c..8544b528e1 100644
 +    /* Ext data sent */
 +    int extdata_sent;
 +    /* Ext data sent in packet - overrides ctx */
-+    uint8_t * extdata_data;
++    void * extdata_data;
 +    size_t extdata_size;
 +
 +#define FF_V4L2_QUIRK_REINIT_ALWAYS             1
@@ -51902,7 +52929,7 @@ index 456281f48c..8544b528e1 100644
  } V4L2m2mContext;
  
  typedef struct V4L2m2mPriv {
-@@ -73,6 +131,7 @@ typedef struct V4L2m2mPriv {
+@@ -73,6 +133,7 @@ typedef struct V4L2m2mPriv {
  
      int num_output_buffers;
      int num_capture_buffers;
@@ -51910,7 +52937,7 @@ index 456281f48c..8544b528e1 100644
  } V4L2m2mPriv;
  
  /**
-@@ -126,4 +185,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx);
+@@ -126,4 +187,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx);
   */
  int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
  
@@ -51938,7 +52965,7 @@ index 456281f48c..8544b528e1 100644
 +
  #endif /* AVCODEC_V4L2_M2M_H */
 diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 3e17e0fcac..dd383f31e5 100644
+index 3e17e0fcac..18f3bc7ff2 100644
 --- a/libavcodec/v4l2_m2m_dec.c
 +++ b/libavcodec/v4l2_m2m_dec.c
 @@ -23,6 +23,10 @@
@@ -51952,7 +52979,7 @@ index 3e17e0fcac..dd383f31e5 100644
  #include "libavutil/pixfmt.h"
  #include "libavutil/pixdesc.h"
  #include "libavutil/opt.h"
-@@ -30,75 +34,107 @@
+@@ -30,75 +34,264 @@
  #include "libavcodec/decode.h"
  #include "libavcodec/internal.h"
  
@@ -51969,18 +52996,22 @@ index 3e17e0fcac..dd383f31e5 100644
 +#define STATS_LAST_COUNT_MAX 64
 +#define STATS_INTERVAL_MAX (1 << 30)
 +
-+static int64_t pts_stats_guess(const pts_stats_t * const stats)
++#ifndef FF_API_BUFFER_SIZE_T
++#define FF_API_BUFFER_SIZE_T 1
++#endif
++
++#define DUMP_FAILED_EXTRADATA 0
++
++#if DUMP_FAILED_EXTRADATA
++static inline char hex1(unsigned int x)
  {
 -    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
 -    V4L2Context *const capture = &s->capture;
 -    V4L2Context *const output = &s->output;
 -    struct v4l2_selection selection = { 0 };
 -    int ret;
-+    if (stats->last_pts == AV_NOPTS_VALUE ||
-+            stats->last_interval == 0 ||
-+            stats->last_count >= STATS_LAST_COUNT_MAX)
-+        return AV_NOPTS_VALUE;
-+    return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
++    x &= 0xf;
++    return x <= 9 ? '0' + x : 'a' + x - 10;
 +}
  
 -    /* 1. start the output process */
@@ -51989,17 +53020,104 @@ index 3e17e0fcac..dd383f31e5 100644
 -        if (ret < 0) {
 -            av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n");
 -            return ret;
+-        }
++static inline char * hex2(char * s, unsigned int x)
++{
++    *s++ = hex1(x >> 4);
++    *s++ = hex1(x);
++    return s;
++}
++
++static inline char * hex4(char * s, unsigned int x)
++{
++    s = hex2(s, x >> 8);
++    s = hex2(s, x);
++    return s;
++}
++
++static inline char * dash2(char * s)
++{
++    *s++ = '-';
++    *s++ = '-';
++    return s;
++}
++
++static void
++data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len)
++{
++    size_t i;
++    s = hex4(s, offset);
++    m += offset;
++    for (i = 0; i != 8; ++i) {
++        *s++ = ' ';
++        s = len > i + offset ? hex2(s, *m++) : dash2(s);
++    }
++    *s++ = ' ';
++    *s++ = ':';
++    for (; i != 16; ++i) {
++        *s++ = ' ';
++        s = len > i + offset ? hex2(s, *m++) : dash2(s);
+     }
++    *s++ = 0;
++}
+ 
+-    if (capture->streamon)
+-        return 0;
++static void
++log_dump(void * logctx, int lvl, const void * const data, const size_t len)
++{
++    size_t i;
++    for (i = 0; i < len; i += 16) {
++        char buf[80];
++        data16(buf, i, data, len);
++        av_log(logctx, lvl, "%s\n", buf);
++    }
++}
++#endif
+ 
+-    /* 2. get the capture format */
+-    capture->format.type = capture->type;
+-    ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format);
+-    if (ret) {
+-        av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n");
+-        return ret;
++static int64_t pts_stats_guess(const pts_stats_t * const stats)
++{
++    if (stats->last_pts == AV_NOPTS_VALUE ||
++            stats->last_interval == 0 ||
++            stats->last_count >= STATS_LAST_COUNT_MAX)
++        return AV_NOPTS_VALUE;
++    return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
++}
++
 +static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
 +{
 +    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
 +        if (stats->last_count < STATS_LAST_COUNT_MAX)
 +            ++stats->last_count;
 +        return;
-+    }
-+
+     }
+ 
+-    /* 2.1 update the AVCodecContext */
+-    avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
+-    capture->av_pix_fmt = avctx->pix_fmt;
 +    if (stats->last_pts != AV_NOPTS_VALUE) {
 +        const int64_t interval = pts - stats->last_pts;
-+
+ 
+-    /* 3. set the crop parameters */
+-    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+-    selection.r.height = avctx->coded_height;
+-    selection.r.width = avctx->coded_width;
+-    ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
+-    if (!ret) {
+-        ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
+-        if (ret) {
+-            av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
+-        } else {
+-            av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height);
+-            /* update the size of the resulting frame */
+-            capture->height = selection.r.height;
+-            capture->width  = selection.r.width;
 +        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
 +            stats->last_count >= STATS_LAST_COUNT_MAX) {
 +            if (stats->last_interval != 0)
@@ -52017,7 +53135,12 @@ index 3e17e0fcac..dd383f31e5 100644
          }
      }
  
--    if (capture->streamon)
+-    /* 4. init the capture context now that we have the capture format */
+-    if (!capture->buffers) {
+-        ret = ff_v4l2_context_init(capture);
+-        if (ret) {
+-            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
+-            return AVERROR(ENOMEM);
 +    stats->last_pts = pts;
 +    stats->last_count = 1;
 +}
@@ -52033,6 +53156,102 @@ index 3e17e0fcac..dd383f31e5 100644
 +    };
 +}
 +
++// If abdata == NULL then this just counts space required
++// Unpacks avcC if detected
++static int
++h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata)
++{
++    const uint8_t * const xdend = extradata + extrasize;
++    const uint8_t * p = extradata;
++    uint8_t * d = abdata;
++    unsigned int n;
++    unsigned int len;
++    const unsigned int hdrlen = 4;
++    unsigned int need_pps = 1;
++
++    if (extrasize < 8)
++        return AVERROR(EINVAL);
++
++    if (p[0] == 0 && p[1] == 0) {
++        // Assume a couple of leading zeros are good enough to indicate NAL
++        if (abdata)
++            memcpy(d, p, extrasize);
++        return extrasize;
++    }
++
++    // avcC starts with a 1
++    if (p[0] != 1)
++        return AVERROR(EINVAL);
++
++    p += 5;
++    n = *p++ & 0x1f;
++
++doxps:
++    while (n--) {
++        if (xdend - p < 2)
++            return AVERROR(EINVAL);
++        len = (p[0] << 8) | p[1];
++        p += 2;
++        if (xdend - p < (ptrdiff_t)len)
++            return AVERROR(EINVAL);
++        if (abdata) {
++            d[0] = 0;
++            d[1] = 0;
++            d[2] = 0;
++            d[3] = 1;
++            memcpy(d + 4, p, len);
+         }
++        d += len + hdrlen;
++        p += len;
++    }
++    if (need_pps) {
++        need_pps = 0;
++        if (p >= xdend)
++            return AVERROR(EINVAL);
++        n = *p++;
++        goto doxps;
+     }
+ 
+-    /* 5. start the capture process */
+-    ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
+-    if (ret) {
+-        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n");
++    return d - abdata;
++}
++
++static int
++copy_extradata(AVCodecContext * const avctx,
++               const void * const src_data, const int src_len,
++               void ** const pdst_data, size_t * const pdst_len)
++{
++    int len;
++
++    *pdst_len = 0;
++    av_freep(pdst_data);
++
++    if (avctx->codec_id == AV_CODEC_ID_H264)
++        len = h264_xd_copy(src_data, src_len, NULL);
++    else
++        len = src_len < 0 ? AVERROR(EINVAL) : src_len;
++
++    // Zero length is OK but we swant to stop - -ve is error val
++    if (len <= 0)
++        return len;
++
++    if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
++        return AVERROR(ENOMEM);
++
++    if (avctx->codec_id == AV_CODEC_ID_H264)
++        h264_xd_copy(src_data, src_len, *pdst_data);
++    else
++        memcpy(*pdst_data, src_data, len);
++    *pdst_len = len;
++
++    return 0;
++}
++
++
++
 +static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
 +{
 +    int ret;
@@ -52042,163 +53261,47 @@ index 3e17e0fcac..dd383f31e5 100644
 +    };
 +
 +    if (s->output.streamon)
-         return 0;
- 
--    /* 2. get the capture format */
--    capture->format.type = capture->type;
--    ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format);
--    if (ret) {
--        av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n");
++        return 0;
++
 +    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
 +    if (ret != 0) {
 +        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret));
          return ret;
      }
  
--    /* 2.1 update the AVCodecContext */
--    avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
--    capture->av_pix_fmt = avctx->pix_fmt;
--
--    /* 3. set the crop parameters */
--    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
--    selection.r.height = avctx->coded_height;
--    selection.r.width = avctx->coded_width;
--    ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
--    if (!ret) {
--        ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
--        if (ret) {
--            av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
--        } else {
--            av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height);
--            /* update the size of the resulting frame */
--            capture->height = selection.r.height;
--            capture->width  = selection.r.width;
--        }
 +    // STREAMON should do implicit START so this just for those that don't.
 +    // It is optional so don't worry if it fails
 +    if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) {
 +        ret = AVERROR(errno);
 +        av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret));
-     }
--
--    /* 4. init the capture context now that we have the capture format */
--    if (!capture->buffers) {
--        ret = ff_v4l2_context_init(capture);
--        if (ret) {
--            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
--            return AVERROR(ENOMEM);
--        }
++    }
 +    else {
 +        av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n");
-     }
++    }
 +    return 0;
 +}
- 
--    /* 5. start the capture process */
--    ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
--    if (ret) {
--        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n");
--        return ret;
--    }
++
 +static int v4l2_try_start(AVCodecContext *avctx)
 +{
 +    V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
 +    int ret;
- 
++
 +    /* 1. start the output process */
 +    if ((ret = check_output_streamon(avctx, s)) != 0)
 +        return ret;
      return 0;
  }
  
-@@ -133,52 +169,606 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s)
+@@ -133,52 +326,518 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s)
      return 0;
  }
  
 -static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
-+{
-+    return (int64_t)n;
-+}
-+
-+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
-+{
-+    return (unsigned int)pts;
-+}
-+
-+// FFmpeg requires us to propagate a number of vars from the coded pkt into
-+// the decoded frame. The only thing that tracks like that in V4L2 stateful
-+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
-+// guarantees about PTS being unique or specified for every frame so replace
-+// the supplied PTS with a simple incrementing number and keep a circular
-+// buffer of all the things we want preserved (including the original PTS)
-+// indexed by the tracking no.
 +static void
-+xlat_pts_in(AVCodecContext *const avctx, xlat_track_t *const x, AVPacket *const avpkt)
-+{
-+    int64_t track_pts;
-+
-+    // Avoid 0
-+    if (++x->track_no == 0)
-+        x->track_no = 1;
-+
-+    track_pts = track_to_pts(avctx, x->track_no);
-+
-+    av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
-+    x->last_pkt_dts = avpkt->dts;
-+    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
-+        .discard          = 0,
-+        .pending          = 1,
-+        .pkt_size         = avpkt->size,
-+        .pts              = avpkt->pts,
-+        .dts              = avpkt->dts,
-+        .reordered_opaque = avctx->reordered_opaque,
-+        .pkt_pos          = avpkt->pos,
-+        .pkt_duration     = avpkt->duration,
-+        .track_pts        = track_pts
-+    };
-+    avpkt->pts = track_pts;
-+}
-+
-+// Returns -1 if we should discard the frame
-+static int
-+xlat_pts_out(AVCodecContext *const avctx,
-+             xlat_track_t * const x,
++set_best_effort_pts(AVCodecContext *const avctx,
 +             pts_stats_t * const ps,
 +             AVFrame *const frame)
 +{
-+    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
-+    V4L2m2mTrackEl *const t = x->track_els + n;
-+    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
-+    {
-+        av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
-+        frame->pts              = AV_NOPTS_VALUE;
-+        frame->pkt_dts          = x->last_pkt_dts;
-+        frame->reordered_opaque = x->last_opaque;
-+        frame->pkt_pos          = -1;
-+        frame->pkt_duration     = 0;
-+        frame->pkt_size         = -1;
-+    }
-+    else if (!t->discard)
-+    {
-+        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
-+        frame->pkt_dts          = x->last_pkt_dts;
-+        frame->reordered_opaque = t->reordered_opaque;
-+        frame->pkt_pos          = t->pkt_pos;
-+        frame->pkt_duration     = t->pkt_duration;
-+        frame->pkt_size         = t->pkt_size;
-+
-+        x->last_opaque = x->track_els[n].reordered_opaque;
-+        if (frame->pts != AV_NOPTS_VALUE)
-+            x->last_pts = frame->pts;
-+        t->pending = 0;
-+    }
-+    else
-+    {
-+        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
-+        return -1;
-+    }
-+
 +    pts_stats_add(ps, frame->pts);
 +
 +#if FF_API_PKT_PTS
@@ -52207,10 +53310,15 @@ index 3e17e0fcac..dd383f31e5 100644
 +FF_ENABLE_DEPRECATION_WARNINGS
 +#endif
 +    frame->best_effort_timestamp = pts_stats_guess(ps);
-+    frame->pkt_dts               = frame->pts;  // We can't emulate what s/w does in a useful manner?
-+    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
-+           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
-+    return 0;
++    // If we can't guess from just PTS - try DTS
++    if (frame->best_effort_timestamp == AV_NOPTS_VALUE)
++        frame->best_effort_timestamp = frame->pkt_dts;
++
++    // We can't emulate what s/w does in a useful manner and using the
++    // "correct" answer seems to just confuse things.
++    frame->pkt_dts               = frame->pts;
++    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n",
++           frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
 +}
 +
 +static void
@@ -52224,13 +53332,6 @@ index 3e17e0fcac..dd383f31e5 100644
 +    x->last_pts = AV_NOPTS_VALUE;
 +}
 +
-+static void
-+xlat_init(xlat_track_t * const x)
-+{
-+    memset(x, 0, sizeof(*x));
-+    x->last_pts = AV_NOPTS_VALUE;
-+}
-+
 +static int
 +xlat_pending(const xlat_track_t * const x)
 +{
@@ -52309,8 +53410,11 @@ index 3e17e0fcac..dd383f31e5 100644
 +
 +        for (i = 0; i < 256; ++i) {
 +            uint8_t * side_data;
++#if FF_API_BUFFER_SIZE_T
++            int side_size;
++#else
 +            size_t side_size;
-+
++#endif
 +            ret = ff_decode_get_packet(avctx, &s->buf_pkt);
 +            if (ret != 0)
 +                break;
@@ -52319,13 +53423,8 @@ index 3e17e0fcac..dd383f31e5 100644
 +            side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
 +            if (side_data) {
 +                av_log(avctx, AV_LOG_DEBUG, "New extradata\n");
-+                av_freep(&s->extdata_data);
-+                if ((s->extdata_data = av_malloc(side_size ? side_size : 1)) == NULL) {
-+                    av_log(avctx, AV_LOG_ERROR, "Failed to alloc %zd bytes of extra data\n", side_size);
-+                    return AVERROR(ENOMEM);
-+                }
-+                memcpy(s->extdata_data, side_data, side_size);
-+                s->extdata_size = side_size;
++                if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0)
++                    av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret));
 +                s->extdata_sent = 0;
 +            }
 +
@@ -52381,20 +53480,18 @@ index 3e17e0fcac..dd383f31e5 100644
 +            av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
              return ret;
 +        }
++    }
 +
-+        xlat_pts_in(avctx, &s->xlat, &s->buf_pkt);
-     }
- 
--    if (s->draining)
--        goto dequeue;
 +    if (s->draining) {
 +        if (s->buf_pkt.size) {
 +            av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n");
 +            av_packet_unref(&s->buf_pkt);
 +        }
 +        return NQ_DRAINING;
-+    }
-+
+     }
+ 
+-    if (s->draining)
+-        goto dequeue;
 +    if (!s->buf_pkt.size)
 +        return NQ_NONE;
 +
@@ -52409,8 +53506,6 @@ index 3e17e0fcac..dd383f31e5 100644
 +        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
 +    else if (s->extdata_data)
 +        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
-+    else
-+        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, avctx->extradata, avctx->extradata_size);
  
 -        s->buf_pkt = avpkt;
 -        /* no input buffers available, continue dequeing */
@@ -52519,53 +53614,51 @@ index 3e17e0fcac..dd383f31e5 100644
 +        if (dst_rv != 0 && TRY_DQ(src_rv)) {
 +            // Pick a timeout depending on state
 +            const int t =
++                src_rv == NQ_Q_FULL ? -1 :
 +                src_rv == NQ_DRAINING ? 300 :
-+                prefer_dq ? 5 :
-+                src_rv == NQ_Q_FULL ? -1 : 0;
++                prefer_dq ? 5 : 0;
 +
-+            do {
-+                // Dequeue frame will unref any previous contents of frame
-+                // if it returns success so we don't need an explicit unref
-+                // when discarding
-+                // This returns AVERROR(EAGAIN) on timeout or if
-+                // there is room in the input Q and timeout == -1
-+                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
++            // Dequeue frame will unref any previous contents of frame
++            // if it returns success so we don't need an explicit unref
++            // when discarding
++            // This returns AVERROR(EAGAIN) on timeout or if
++            // there is room in the input Q and timeout == -1
++            dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
 +
-+                // Failure due to no buffer in Q?
-+                if (dst_rv == AVERROR(ENOSPC)) {
-+                    // Wait & retry
-+                    if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
-+                        dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
-+                    }
++            // Failure due to no buffer in Q?
++            if (dst_rv == AVERROR(ENOSPC)) {
++                // Wait & retry
++                if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
++                    dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
 +                }
++            }
 +
-+                // Adjust dynamic pending threshold
-+                if (dst_rv == 0) {
-+                    if (--s->pending_hw < PENDING_HW_MIN)
-+                        s->pending_hw = PENDING_HW_MIN;
++            // Adjust dynamic pending threshold
++            if (dst_rv == 0) {
++                if (--s->pending_hw < PENDING_HW_MIN)
++                    s->pending_hw = PENDING_HW_MIN;
++                s->pending_n = 0;
++
++                set_best_effort_pts(avctx, &s->pts_stat, frame);
++            }
++            else if (dst_rv == AVERROR(EAGAIN)) {
++                if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) {
++                    s->pending_hw = pending * 16 + PENDING_HW_OFFSET;
 +                    s->pending_n = 0;
 +                }
-+                else if (dst_rv == AVERROR(EAGAIN)) {
-+                    if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) {
-+                        s->pending_hw = pending * 16 + PENDING_HW_OFFSET;
-+                        s->pending_n = 0;
-+                    }
-+                }
++            }
 +
-+                if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
-+                    av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
-+                    dst_rv = AVERROR_EOF;
-+                    s->capture.done = 1;
-+                }
-+                else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
-+                    av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
-+                           s->draining, s->capture.done);
-+                else if (dst_rv && dst_rv != AVERROR(EAGAIN))
-+                    av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
-+                           s->draining, s->capture.done, dst_rv);
-+
-+                // Go again if we got a frame that we need to discard
-+            } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame));
++            if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
++                av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
++                dst_rv = AVERROR_EOF;
++                s->capture.done = 1;
++            }
++            else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
++                av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
++                       s->draining, s->capture.done);
++            else if (dst_rv && dst_rv != AVERROR(EAGAIN))
++                av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
++                       s->draining, s->capture.done, dst_rv);
 +        }
 +
 +        ++i;
@@ -52748,7 +53841,7 @@ index 3e17e0fcac..dd383f31e5 100644
  }
  
  static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-@@ -186,12 +776,30 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
+@@ -186,12 +845,29 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
      V4L2Context *capture, *output;
      V4L2m2mContext *s;
      V4L2m2mPriv *priv = avctx->priv_data;
@@ -52772,14 +53865,13 @@ index 3e17e0fcac..dd383f31e5 100644
      if (ret < 0)
          return ret;
  
-+    xlat_init(&s->xlat);
 +    pts_stats_init(&s->pts_stat, avctx, "decoder");
 +    s->pending_hw = PENDING_HW_MIN;
 +
      capture = &s->capture;
      output = &s->output;
  
-@@ -199,34 +807,129 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
+@@ -199,34 +875,136 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
       * by the v4l2 driver; this event will trigger a full pipeline reconfig and
       * the proper values will be retrieved from the kernel driver.
       */
@@ -52793,12 +53885,10 @@ index 3e17e0fcac..dd383f31e5 100644
      output->av_codec_id = avctx->codec_id;
      output->av_pix_fmt  = AV_PIX_FMT_NONE;
 +    output->min_buf_size = max_coded_size(avctx);
-+    output->no_pts_rescale = 1;
  
      capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
      capture->av_pix_fmt = avctx->pix_fmt;
 +    capture->min_buf_size = 0;
-+    capture->no_pts_rescale = 1;
 +
 +    /* the client requests the codec to generate DRM frames:
 +     *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
@@ -52839,7 +53929,15 @@ index 3e17e0fcac..dd383f31e5 100644
          av_log(avctx, AV_LOG_ERROR, "can't configure decoder\n");
 -        s->self_ref = NULL;
 -        av_buffer_unref(&priv->context_ref);
--
++        return ret;
++    }
+ 
++    if (avctx->extradata &&
++        (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret));
++#if DUMP_FAILED_EXTRADATA
++        log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size);
++#endif
          return ret;
      }
  
@@ -52918,7 +54016,7 @@ index 3e17e0fcac..dd383f31e5 100644
  }
  
  #define OFFSET(x) offsetof(V4L2m2mPriv, x)
-@@ -235,10 +938,16 @@ static av_cold int v4l2_decode_close(AVCodecContext *avctx)
+@@ -235,10 +1013,16 @@ static av_cold int v4l2_decode_close(AVCodecContext *avctx)
  static const AVOption options[] = {
      V4L_M2M_DEFAULT_OPTS,
      { "num_capture_buffers", "Number of buffers in the capture context",
@@ -52936,7 +54034,7 @@ index 3e17e0fcac..dd383f31e5 100644
  #define M2MDEC_CLASS(NAME) \
      static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
          .class_name = #NAME "_v4l2m2m_decoder", \
-@@ -259,9 +968,15 @@ static const AVOption options[] = {
+@@ -259,9 +1043,15 @@ static const AVOption options[] = {
          .init           = v4l2_decode_init, \
          .receive_frame  = v4l2_receive_frame, \
          .close          = v4l2_decode_close, \
@@ -52953,6 +54051,408 @@ index 3e17e0fcac..dd383f31e5 100644
          .wrapper_name   = "v4l2m2m", \
      }
  
+diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
+index 32321f392f..08d0c092bd 100644
+--- a/libavcodec/v4l2_m2m_enc.c
++++ b/libavcodec/v4l2_m2m_enc.c
+@@ -24,6 +24,8 @@
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
+ #include <search.h>
++#include <drm_fourcc.h>
++
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/internal.h"
+ #include "libavutil/pixdesc.h"
+@@ -37,6 +39,34 @@
+ #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x
+ #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x
+ 
++// P030 should be defined in drm_fourcc.h and hopefully will be sometime
++// in the future but until then...
++#ifndef DRM_FORMAT_P030
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
++#endif
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++#ifndef V4L2_CID_CODEC_BASE
++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
++#endif
++
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in videodev2.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
+ static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den)
+ {
+     struct v4l2_streamparm parm = { 0 };
+@@ -147,15 +177,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p)
+ static int v4l2_check_b_frame_support(V4L2m2mContext *s)
+ {
+     if (s->avctx->max_b_frames)
+-        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n");
++        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames);
+ 
+-    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0);
++    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1);
+     v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0);
+     if (s->avctx->max_b_frames == 0)
+         return 0;
+ 
+     avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding");
+-
+     return AVERROR_PATCHWELCOME;
+ }
+ 
+@@ -270,13 +299,186 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s)
+     return 0;
+ }
+ 
++static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame)
++{
++    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
++
++    const uint32_t drm_fmt = src->layers[0].format;
++    // Treat INVALID as LINEAR
++    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
++        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
++    uint32_t pix_fmt = 0;
++    uint32_t w = 0;
++    uint32_t h = 0;
++    uint32_t bpl = src->layers[0].planes[0].pitch;
++
++    // We really don't expect multiple layers
++    // All formats that we currently cope with are single object
++
++    if (src->nb_layers != 1 || src->nb_objects != 1)
++        return AVERROR(EINVAL);
++
++    switch (drm_fmt) {
++        case DRM_FORMAT_YUV420:
++            if (mod == DRM_FORMAT_MOD_LINEAR) {
++                if (src->layers[0].nb_planes != 3)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_YUV420;
++                h = src->layers[0].planes[1].offset / bpl;
++                w = bpl;
++            }
++            break;
++
++        case DRM_FORMAT_NV12:
++            if (mod == DRM_FORMAT_MOD_LINEAR) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_NV12;
++                h = src->layers[0].planes[1].offset / bpl;
++                w = bpl;
++            }
++            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
++                w = bpl;
++                h = src->layers[0].planes[1].offset / 128;
++                bpl = fourcc_mod_broadcom_param(mod);
++            }
++            break;
++
++        case DRM_FORMAT_P030:
++            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
++                w = bpl / 2;  // Matching lie to how we construct this
++                h = src->layers[0].planes[1].offset / 128;
++                bpl = fourcc_mod_broadcom_param(mod);
++            }
++            break;
++
++        default:
++            break;
++    }
++
++    if (!pix_fmt)
++        return AVERROR(EINVAL);
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
++        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
++
++        pix->width = w;
++        pix->height = h;
++        pix->pixelformat = pix_fmt;
++        pix->plane_fmt[0].bytesperline = bpl;
++        pix->num_planes = 1;
++    }
++    else {
++        struct v4l2_pix_format *const pix = &format->fmt.pix;
++
++        pix->width = w;
++        pix->height = h;
++        pix->pixelformat = pix_fmt;
++        pix->bytesperline = bpl;
++    }
++
++    return 0;
++}
++
++// Do we have similar enough formats to be usable?
++static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b)
++{
++    if (a->type != b->type)
++        return 0;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) {
++        const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp;
++        const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp;
++        unsigned int i;
++        if (pa->pixelformat != pb->pixelformat ||
++            pa->num_planes != pb->num_planes)
++            return 0;
++        for (i = 0; i != pa->num_planes; ++i) {
++            if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline)
++                return 0;
++        }
++    }
++    else {
++        const struct v4l2_pix_format *const pa = &a->fmt.pix;
++        const struct v4l2_pix_format *const pb = &b->fmt.pix;
++        if (pa->pixelformat != pb->pixelformat ||
++            pa->bytesperline != pb->bytesperline)
++            return 0;
++    }
++    return 1;
++}
++
++
+ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+ {
+     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+     V4L2Context *const output = &s->output;
+ 
++    ff_v4l2_dq_all(output);
++
++    // Signal EOF if needed
++    if (!frame) {
++        return ff_v4l2_context_enqueue_frame(output, frame);
++    }
++
++    if (s->input_drm && !output->streamon) {
++        int rv;
++        struct v4l2_format req_format = {.type = output->format.type};
++
++        // Set format when we first get a buffer
++        if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n");
++            return rv;
++        }
++
++        ff_v4l2_context_release(output);
++
++        output->format = req_format;
++
++        if ((rv = ff_v4l2_context_set_format(output)) != 0) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n");
++            return rv;
++        }
++
++        if (!fmt_eq(&req_format, &output->format)) {
++            av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n");
++            return AVERROR(EINVAL);
++        }
++
++        output->selection.top = frame->crop_top;
++        output->selection.left = frame->crop_left;
++        output->selection.width = av_frame_cropped_width(frame);
++        output->selection.height = av_frame_cropped_height(frame);
++
++        if ((rv = ff_v4l2_context_init(output)) != 0) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n");
++            return rv;
++        }
++
++        {
++            struct v4l2_selection selection = {
++                .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
++                .target = V4L2_SEL_TGT_CROP,
++                .r = output->selection
++            };
++            if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) {
++                av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n",
++                       selection.r.width, selection.r.height, selection.r.left, selection.r.top,
++                       av_err2str(AVERROR(errno)));
++            }
++            av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n",
++                   selection.r.width, selection.r.height, selection.r.left, selection.r.top);
++        }
++    }
++
+ #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME
+-    if (frame && frame->pict_type == AV_PICTURE_TYPE_I)
++    if (frame->pict_type == AV_PICTURE_TYPE_I)
+         v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1);
+ #endif
+ 
+@@ -290,6 +492,8 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+     V4L2Context *const output = &s->output;
+     int ret;
+ 
++    ff_v4l2_dq_all(output);
++
+     if (s->draining)
+         goto dequeue;
+ 
+@@ -310,7 +514,87 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+     }
+ 
+ dequeue:
+-    return ff_v4l2_context_dequeue_packet(capture, avpkt);
++    ret = ff_v4l2_context_dequeue_packet(capture, avpkt);
++    ff_v4l2_dq_all(output);
++    if (ret)
++        return ret;
++
++    if (capture->first_buf == 1) {
++        uint8_t * data;
++        const int len = avpkt->size;
++
++        // 1st buffer after streamon should be SPS/PPS
++        capture->first_buf = 2;
++
++        // Clear both possible stores so there is no chance of confusion
++        av_freep(&s->extdata_data);
++        s->extdata_size = 0;
++        av_freep(&avctx->extradata);
++        avctx->extradata_size = 0;
++
++        if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
++            goto fail_no_mem;
++
++        memcpy(data, avpkt->data, len);
++        av_packet_unref(avpkt);
++
++        // We need to copy the header, but keep local if not global
++        if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) {
++            avctx->extradata = data;
++            avctx->extradata_size = len;
++        }
++        else {
++            s->extdata_data = data;
++            s->extdata_size = len;
++        }
++
++        ret = ff_v4l2_context_dequeue_packet(capture, avpkt);
++        ff_v4l2_dq_all(output);
++        if (ret)
++            return ret;
++    }
++
++    // First frame must be key so mark as such even if encoder forgot
++    if (capture->first_buf == 2) {
++        avpkt->flags |= AV_PKT_FLAG_KEY;
++
++        // Add any extradata to the 1st packet we emit as we cannot create it at init
++        if (avctx->extradata_size > 0 && avctx->extradata) {
++            void * const side = av_packet_new_side_data(avpkt,
++                                           AV_PKT_DATA_NEW_EXTRADATA,
++                                           avctx->extradata_size);
++            if (!side)
++                goto fail_no_mem;
++
++            memcpy(side, avctx->extradata, avctx->extradata_size);
++        }
++    }
++
++    // Add SPS/PPS to the start of every key frame if non-global headers
++    if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) {
++        const size_t newlen = s->extdata_size + avpkt->size;
++        AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE);
++
++        if (buf == NULL)
++            goto fail_no_mem;
++
++        memcpy(buf->data, s->extdata_data, s->extdata_size);
++        memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size);
++
++        av_buffer_unref(&avpkt->buf);
++        avpkt->buf = buf;
++        avpkt->data = buf->data;
++        avpkt->size = newlen;
++    }
++
++//    av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret);
++    capture->first_buf = 0;
++    return 0;
++
++fail_no_mem:
++    ret = AVERROR(ENOMEM);
++    av_packet_unref(avpkt);
++    return ret;
+ }
+ 
+ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+@@ -322,6 +606,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+     uint32_t v4l2_fmt_output;
+     int ret;
+ 
++    av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt);
++
+     ret = ff_v4l2_m2m_create_context(priv, &s);
+     if (ret < 0)
+         return ret;
+@@ -329,13 +615,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+     capture = &s->capture;
+     output  = &s->output;
+ 
++    s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME);
++
+     /* common settings output/capture */
+     output->height = capture->height = avctx->height;
+     output->width = capture->width = avctx->width;
+ 
+     /* output context */
+     output->av_codec_id = AV_CODEC_ID_RAWVIDEO;
+-    output->av_pix_fmt = avctx->pix_fmt;
++    output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt :
++            avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt :
++            AV_PIX_FMT_YUV420P;
+ 
+     /* capture context */
+     capture->av_codec_id = avctx->codec_id;
+@@ -354,7 +644,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+         v4l2_fmt_output = output->format.fmt.pix.pixelformat;
+ 
+     pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO);
+-    if (pix_fmt_output != avctx->pix_fmt) {
++    if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) {
+         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output);
+         av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name);
+         return AVERROR(EINVAL);
+@@ -372,9 +662,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx)
+ #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+ 
+ #define V4L_M2M_CAPTURE_OPTS \
+-    V4L_M2M_DEFAULT_OPTS,\
++    { "num_output_buffers", "Number of buffers in the output context",\
++        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\
+     { "num_capture_buffers", "Number of buffers in the capture context", \
+-        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS }
++        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS }
+ 
+ static const AVOption mpeg4_options[] = {
+     V4L_M2M_CAPTURE_OPTS,
 diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c
 new file mode 100644
 index 0000000000..5b3fb958fa
@@ -53531,13 +55031,15 @@ index 0000000000..cfa94d55c4
 +
 diff --git a/libavcodec/v4l2_req_devscan.h b/libavcodec/v4l2_req_devscan.h
 new file mode 100644
-index 0000000000..0baef36535
+index 0000000000..956d9234f1
 --- /dev/null
 +++ b/libavcodec/v4l2_req_devscan.h
-@@ -0,0 +1,21 @@
+@@ -0,0 +1,23 @@
 +#ifndef _DEVSCAN_H_
 +#define _DEVSCAN_H_
 +
++#include <stdint.h>
++
 +struct devscan;
 +struct decdev;
 +enum v4l2_buf_type;
@@ -53830,13 +55332,15 @@ index 0000000000..ae6c648369
 +
 diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h
 new file mode 100644
-index 0000000000..8d909c4297
+index 0000000000..cfb17e801d
 --- /dev/null
 +++ b/libavcodec/v4l2_req_dmabufs.h
-@@ -0,0 +1,38 @@
+@@ -0,0 +1,40 @@
 +#ifndef DMABUFS_H
 +#define DMABUFS_H
 +
++#include <stddef.h>
++
 +struct dmabufs_ctl;
 +struct dmabuf_h;
 +
@@ -53899,20 +55403,27 @@ index 0000000000..dcc8d95632
 +#define HEVC_CTRLS_VERSION 3
 +#include "v4l2_req_hevc_vx.c"
 +
+diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c
+new file mode 100644
+index 0000000000..c35579d8e0
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v4.c
+@@ -0,0 +1,3 @@
++#define HEVC_CTRLS_VERSION 4
++#include "v4l2_req_hevc_vx.c"
++
 diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
 new file mode 100644
-index 0000000000..55c41ae679
+index 0000000000..9ff5592e61
 --- /dev/null
 +++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -0,0 +1,1228 @@
+@@ -0,0 +1,1365 @@
 +// File included by v4l2_req_hevc_v* - not compiled on its own
 +
 +#include "decode.h"
 +#include "hevcdec.h"
 +#include "hwconfig.h"
 +
-+#include "v4l2_request_hevc.h"
-+
 +#if HEVC_CTRLS_VERSION == 1
 +#include "hevc-ctrls-v1.h"
 +
@@ -53923,10 +55434,37 @@ index 0000000000..55c41ae679
 +#include "hevc-ctrls-v2.h"
 +#elif HEVC_CTRLS_VERSION == 3
 +#include "hevc-ctrls-v3.h"
++#elif HEVC_CTRLS_VERSION == 4
++#include <linux/v4l2-controls.h>
++#if !defined(V4L2_CID_STATELESS_HEVC_SPS)
++#include "hevc-ctrls-v4.h"
++#endif
 +#else
 +#error Unknown HEVC_CTRLS_VERSION
 +#endif
 +
++#ifndef V4L2_CID_STATELESS_HEVC_SPS
++#define V4L2_CID_STATELESS_HEVC_SPS                     V4L2_CID_MPEG_VIDEO_HEVC_SPS
++#define V4L2_CID_STATELESS_HEVC_PPS                     V4L2_CID_MPEG_VIDEO_HEVC_PPS
++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS            V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS
++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX          V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX
++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS           V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS
++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE             V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE
++#define V4L2_CID_STATELESS_HEVC_START_CODE              V4L2_CID_MPEG_VIDEO_HEVC_START_CODE
++
++#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED
++#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED
++#define V4L2_STATELESS_HEVC_START_CODE_NONE             V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE
++#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B          V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B
++#endif
++
++// Should be in videodev2 but we might not have a good enough one
++#ifndef V4L2_PIX_FMT_HEVC_SLICE
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++#endif
++
++#include "v4l2_request_hevc.h"
++
 +#include "libavutil/hwcontext_drm.h"
 +
 +#include <semaphore.h>
@@ -53962,11 +55500,16 @@ index 0000000000..55c41ae679
 +    struct v4l2_ctrl_hevc_slice_params * slice_params;
 +    struct slice_info * slices;
 +
++    size_t num_offsets;
++    size_t alloced_offsets;
++    uint32_t *offsets;
++
 +} V4L2MediaReqDescriptor;
 +
 +struct slice_info {
 +    const uint8_t * ptr;
 +    size_t len; // bytes
++    size_t n_offsets;
 +};
 +
 +// Handy container for accumulating controls before setting
@@ -54125,7 +55668,7 @@ index 0000000000..55c41ae679
 +    if (rd->num_slices >= rd->alloced_slices) {
 +        struct v4l2_ctrl_hevc_slice_params * p2;
 +        struct slice_info * s2;
-+        size_t n2 = rd->num_slices == 0 ? 8 : rd->num_slices * 2;
++        size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2;
 +
 +        p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2));
 +        if (p2 == NULL)
@@ -54143,6 +55686,23 @@ index 0000000000..55c41ae679
 +    return 0;
 +}
 +
++static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets)
++{
++    if (rd->num_offsets + n > rd->alloced_offsets) {
++        size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2;
++        void * p2;
++        while (rd->num_offsets + n > n2)
++            n2 *= 2;
++        if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL)
++            return AVERROR(ENOMEM);
++        rd->offsets = p2;
++        rd->alloced_offsets = n2;
++    }
++    for (size_t i = 0; i != n; ++i)
++        rd->offsets[rd->num_offsets++] = offsets[i] - 1;
++    return 0;
++}
++
 +static unsigned int
 +fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries)
 +{
@@ -54164,9 +55724,13 @@ index 0000000000..55c41ae679
 +#endif
 +            entry->field_pic = frame->frame->interlaced_frame;
 +
++#if HEVC_CTRLS_VERSION <= 3
 +            /* TODO: Interleaved: Get the POC for each field. */
 +            entry->pic_order_cnt[0] = frame->poc;
 +            entry->pic_order_cnt[1] = frame->poc;
++#else
++            entry->pic_order_cnt_val = frame->poc;
++#endif
 +        }
 +    }
 +    return n;
@@ -54192,8 +55756,11 @@ index 0000000000..55c41ae679
 +
 +    *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
 +        .bit_size = bit_size,
++#if HEVC_CTRLS_VERSION <= 3
 +        .data_bit_offset = bit_offset,
-+
++#else
++        .data_byte_offset = bit_offset / 8 + 1,
++#endif
 +        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
 +        .slice_segment_addr = sh->slice_segment_addr,
 +
@@ -54276,6 +55843,7 @@ index 0000000000..55c41ae679
 +    fill_pred_table(h, &slice_params->pred_weight_table);
 +
 +    slice_params->num_entry_point_offsets = sh->num_entry_point_offsets;
++#if HEVC_CTRLS_VERSION <= 3
 +    if (slice_params->num_entry_point_offsets > 256) {
 +        slice_params->num_entry_point_offsets = 256;
 +        av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
@@ -54283,6 +55851,7 @@ index 0000000000..55c41ae679
 +
 +    for (i = 0; i < slice_params->num_entry_point_offsets; i++)
 +        slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
++#endif
 +}
 +
 +#if HEVC_CTRLS_VERSION >= 2
@@ -54658,51 +56227,66 @@ index 0000000000..55c41ae679
 +#if HEVC_CTRLS_VERSION >= 2
 +    struct v4l2_ctrl_hevc_decode_params * const dec,
 +#endif
-+    struct v4l2_ctrl_hevc_slice_params * const slices,
-+    const unsigned int slice_no,
-+    const unsigned int slice_count)
++    struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count,
++    void * const offsets, const size_t offset_count)
 +{
 +    int rv;
++#if HEVC_CTRLS_VERSION >= 2
++    unsigned int n = 3;
++#else
++    unsigned int n = 2;
++#endif
 +
-+    struct v4l2_ext_control control[] = {
++    struct v4l2_ext_control control[6] = {
 +        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
++            .id = V4L2_CID_STATELESS_HEVC_SPS,
 +            .ptr = &controls->sps,
 +            .size = sizeof(controls->sps),
 +        },
 +        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
++            .id = V4L2_CID_STATELESS_HEVC_PPS,
 +            .ptr = &controls->pps,
 +            .size = sizeof(controls->pps),
 +        },
 +#if HEVC_CTRLS_VERSION >= 2
 +        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS,
++            .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
 +            .ptr = dec,
 +            .size = sizeof(*dec),
 +        },
 +#endif
-+        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
-+            .ptr = slices + slice_no,
-+            .size = sizeof(*slices) * slice_count,
-+        },
-+        // Optional
-+        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX,
-+            .ptr = &controls->scaling_matrix,
-+            .size = sizeof(controls->scaling_matrix),
-+        },
 +    };
 +
-+    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control,
-+            controls->has_scaling ?
-+                FF_ARRAY_ELEMS(control) :
-+                FF_ARRAY_ELEMS(control) - 1);
++    if (slices)
++        control[n++] = (struct v4l2_ext_control) {
++            .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
++            .ptr = slices,
++            .size = sizeof(*slices) * slice_count,
++        };
++
++    if (controls->has_scaling)
++        control[n++] = (struct v4l2_ext_control) {
++            .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
++            .ptr = &controls->scaling_matrix,
++            .size = sizeof(controls->scaling_matrix),
++        };
++
++#if HEVC_CTRLS_VERSION >= 4
++    if (offsets)
++        control[n++] = (struct v4l2_ext_control) {
++            .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS,
++            .ptr = offsets,
++            .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count,
++        };
++#endif
++
++    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n);
 +
 +    return rv;
 +}
 +
++// This only works because we started out from a single coded frame buffer
++// that will remain intact until after end_frame
 +static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
 +{
 +    const HEVCContext * const h = avctx->priv_data;
@@ -54711,18 +56295,45 @@ index 0000000000..55c41ae679
 +    int bcount = get_bits_count(&h->HEVClc->gb);
 +    uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount;
 +
++    const unsigned int n = rd->num_slices;
++    const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices;
++
 +    int rv;
 +    struct slice_info * si;
 +
++    // This looks dodgy but we know that FFmpeg has parsed this from a buffer
++    // that contains the entire frame including the start code
++    if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
++        buffer -= 3;
++        size += 3;
++        boff += 24;
++        if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) {
++            av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n",
++                   buffer[0], buffer[1], buffer[2]);
++        }
++    }
++
++    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
++        if (rd->slices == NULL) {
++            if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL)
++                return AVERROR(ENOMEM);
++            rd->slices->ptr = buffer;
++            rd->num_slices = 1;
++        }
++        rd->slices->len = buffer - rd->slices->ptr + size;
++        return 0;
++    }
++
 +    if ((rv = slice_add(rd)) != 0)
 +        return rv;
 +
-+    si = rd->slices + rd->num_slices - 1;
++    si = rd->slices + n;
 +    si->ptr = buffer;
 +    si->len = size;
++    si->n_offsets = rd->num_offsets;
 +
-+    if (ctx->multi_slice && rd->num_slices > 1) {
-+        struct slice_info *const si0 = rd->slices;
++    if (n != block_start) {
++        struct slice_info *const si0 = rd->slices + block_start;
 +        const size_t offset = (buffer - si0->ptr);
 +        boff += offset * 8;
 +        size += offset;
@@ -54730,12 +56341,15 @@ index 0000000000..55c41ae679
 +    }
 +
 +#if HEVC_CTRLS_VERSION >= 2
-+    if (rd->num_slices == 1)
++    if (n == 0)
 +        fill_decode_params(h, &rd->dec);
-+    fill_slice_params(h, &rd->dec, rd->slice_params + rd->num_slices - 1, size * 8, boff);
++    fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff);
 +#else
-+    fill_slice_params(h, rd->slice_params + rd->num_slices - 1, size * 8, boff);
++    fill_slice_params(h, rd->slice_params + n, size * 8, boff);
 +#endif
++    if (ctx->max_offsets != 0 &&
++        (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0)
++        return rv;
 +
 +    return 0;
 +}
@@ -54761,10 +56375,13 @@ index 0000000000..55c41ae679
 +{
 +    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
 +
++    const int is_last = (j == rd->num_slices);
 +    struct slice_info *const si = rd->slices + i;
 +    struct media_request * req = NULL;
 +    struct qent_src * src = NULL;
 +    MediaBufsStatus stat;
++    void * offsets = rd->offsets + rd->slices[i].n_offsets;
++    size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets;
 +
 +    if ((req = media_request_get(ctx->mpool)) == NULL) {
 +        av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__);
@@ -54776,8 +56393,8 @@ index 0000000000..55c41ae679
 +#if HEVC_CTRLS_VERSION >= 2
 +                     &rd->dec,
 +#endif
-+                     rd->slice_params,
-+                     i, j - i)) {
++                     rd->slice_params + i, j - i,
++                     offsets, n_offsets)) {
 +        av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__);
 +        goto fail1;
 +    }
@@ -54797,13 +56414,9 @@ index 0000000000..55c41ae679
 +        goto fail2;
 +    }
 +
-+#warning ANNEX_B start code
-+//        if (ctx->start_code == V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) {
-+//        }
-+
 +    stat = mediabufs_start_request(ctx->mbufs, &req, &src,
 +                                   i == 0 ? rd->qe_dst : NULL,
-+                                   j == rd->num_slices);
++                                   is_last);
 +
 +    if (stat != MEDIABUFS_STATUS_SUCCESS) {
 +        av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__);
@@ -54868,18 +56481,11 @@ index 0000000000..55c41ae679
 +    }
 +
 +    // Send as slices
-+    if (ctx->multi_slice)
-+    {
-+        if ((rv = send_slice(avctx, rd, &rc, 0, rd->num_slices)) != 0)
++    for (i = 0; i < rd->num_slices; i += ctx->max_slices) {
++        const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices);
++        if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0)
 +            goto fail;
 +    }
-+    else
-+    {
-+        for (i = 0; i != rd->num_slices; ++i) {
-+            if ((rv = send_slice(avctx, rd, &rc, i, i + 1)) != 0)
-+                goto fail;
-+        }
-+    }
 +
 +    // Set the drm_prime desriptor
 +    drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs));
@@ -54894,6 +56500,12 @@ index 0000000000..55c41ae679
 +    return rv;
 +}
 +
++static inline int
++ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v)
++{
++    return v >= c->minimum && v <= c->maximum;
++}
++
 +// Initial check & init
 +static int
 +probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
@@ -54905,17 +56517,19 @@ index 0000000000..55c41ae679
 +
 +    // Check for var slice array
 +    struct v4l2_query_ext_ctrl qc[] = {
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX },
++        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS },
++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_SPS },
++        { .id = V4L2_CID_STATELESS_HEVC_PPS },
++        { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX },
 +#if HEVC_CTRLS_VERSION >= 2
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS },
++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS },
 +#endif
 +    };
 +    // Order & size must match!
 +    static const size_t ctrl_sizes[] = {
 +        sizeof(struct v4l2_ctrl_hevc_slice_params),
++        sizeof(int32_t),
 +        sizeof(struct v4l2_ctrl_hevc_sps),
 +        sizeof(struct v4l2_ctrl_hevc_pps),
 +        sizeof(struct v4l2_ctrl_hevc_scaling_matrix),
@@ -54933,11 +56547,22 @@ index 0000000000..55c41ae679
 +        return AVERROR(EINVAL);
 +#endif
 +
-+    if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) {
-+        av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION);
++    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls);
++    i = 0;
++#if HEVC_CTRLS_VERSION >= 4
++    // Skip slice check if no slice mode
++    if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
++        i = 1;
++#else
++    // Fail frame mode silently for anything prior to V4
++    if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
 +        return AVERROR(EINVAL);
-+    }
-+    for (i = 0; i != noof_ctrls; ++i) {
++#endif
++    for (; i != noof_ctrls; ++i) {
++        if (qc[i].type == 0) {
++            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id);
++            return AVERROR(EINVAL);
++        }
 +        if (ctrl_sizes[i] != (size_t)qc[i].elem_size) {
 +            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n",
 +                   HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size);
@@ -54947,12 +56572,11 @@ index 0000000000..55c41ae679
 +
 +    fill_sps(&ctrl_sps, sps);
 +
-+    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_MPEG_VIDEO_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
++    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
 +        av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n");
 +        return AVERROR(EINVAL);
 +    }
 +
-+    ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0;
 +    return 0;
 +}
 +
@@ -54963,38 +56587,63 @@ index 0000000000..55c41ae679
 +    int ret;
 +
 +    struct v4l2_query_ext_ctrl querys[] = {
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, },
++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, },
++#if HEVC_CTRLS_VERSION >= 4
++        { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, },
++#endif
 +    };
 +
 +    struct v4l2_ext_control ctrls[] = {
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
 +    };
 +
 +    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
 +
-+    ctx->decode_mode = querys[0].default_value;
++    ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) ||
++                       querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ?
++        1 : querys[2].dims[0];
++    av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices);
 +
-+    if (ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED &&
-+        ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode);
++#if HEVC_CTRLS_VERSION >= 4
++    ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ?
++        0 : querys[3].dims[0];
++    av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets);
++#else
++    ctx->max_offsets = 0;
++#endif
++
++    if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED ||
++        querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)
++        ctx->decode_mode = querys[0].default_value;
++    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED))
++        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED;
++    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
++        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED;
++    else {
++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__);
 +        return AVERROR(EINVAL);
 +    }
 +
-+    ctx->start_code = querys[1].default_value;
-+    if (ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE &&
-+        ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code);
++    if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE ||
++        querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)
++        ctx->start_code = querys[1].default_value;
++    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B))
++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
++    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
++    else {
++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__);
 +        return AVERROR(EINVAL);
 +    }
 +
-+    ctx->max_slices = querys[2].elems;
-+    if (ctx->max_slices > MAX_SLICES) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported max slices, %d\n", __func__, ctx->max_slices);
-+        return AVERROR(EINVAL);
-+    }
++    // If we are in slice mode & START_CODE_NONE supported then pick that
++    // as it doesn't require the slightly dodgy look backwards in our raw buffer
++    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
++        ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
 +
 +    ctrls[0].value = ctx->decode_mode;
 +    ctrls[1].value = ctx->start_code;
@@ -55018,6 +56667,7 @@ index 0000000000..55c41ae679
 +
 +    av_freep(&rd->slices);
 +    av_freep(&rd->slice_params);
++    av_freep(&rd->offsets);
 +
 +    av_free(rd);
 +}
@@ -57293,10 +58943,14 @@ index 0000000000..e1182cb2fc
 +#endif /* POLLQUEUE_H_ */
 diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h
 new file mode 100644
-index 0000000000..cb4bd164b4
+index 0000000000..a31cc1f4ec
 --- /dev/null
 +++ b/libavcodec/v4l2_req_utils.h
-@@ -0,0 +1,22 @@
+@@ -0,0 +1,27 @@
++#ifndef AVCODEC_V4L2_REQ_UTILS_H
++#define AVCODEC_V4L2_REQ_UTILS_H
++
++#include <stdint.h>
 +#include "libavutil/log.h"
 +
 +#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
@@ -57319,12 +58973,13 @@ index 0000000000..cb4bd164b4
 +    return tbuf;
 +}
 +
++#endif
 diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
 new file mode 100644
-index 0000000000..0ae14db90b
+index 0000000000..27b1b8dd6d
 --- /dev/null
 +++ b/libavcodec/v4l2_request_hevc.c
-@@ -0,0 +1,311 @@
+@@ -0,0 +1,315 @@
 +/*
 + * This file is part of FFmpeg.
 + *
@@ -57536,7 +59191,11 @@ index 0000000000..0ae14db90b
 +        goto fail4;
 +    }
 +
-+    if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) {
++    if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) {
++        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n");
++        ctx->fns = &V2(ff_v4l2_req_hevc, 4);
++    }
++    else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) {
 +        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n");
 +        ctx->fns = &V2(ff_v4l2_req_hevc, 3);
 +    }
@@ -57638,13 +59297,14 @@ index 0000000000..0ae14db90b
 +};
 diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h
 new file mode 100644
-index 0000000000..b2cb8c8584
+index 0000000000..99c90064ea
 --- /dev/null
 +++ b/libavcodec/v4l2_request_hevc.h
 @@ -0,0 +1,102 @@
 +#ifndef AVCODEC_V4L2_REQUEST_HEVC_H
 +#define AVCODEC_V4L2_REQUEST_HEVC_H
 +
++#include <stdint.h>
 +#include <drm_fourcc.h>
 +#include "v4l2_req_decode_q.h"
 +
@@ -57689,8 +59349,6 @@ index 0000000000..b2cb8c8584
 +#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY	0x0800
 +#endif
 +
-+#define MAX_SLICES 128
-+
 +#define VCAT(name, version) name##_v##version
 +#define V2(n,v) VCAT(n, v)
 +#define V(n) V2(n, HEVC_CTRLS_VERSION)
@@ -57707,10 +59365,10 @@ index 0000000000..b2cb8c8584
 +
 +    unsigned int timestamp;  // ?? maybe uint64_t
 +
-+    int multi_slice;
 +    int decode_mode;
 +    int start_code;
-+    int max_slices;
++    unsigned int max_slices;    // 0 => not wanted (frame mode)
++    unsigned int max_offsets;   // 0 => not wanted
 +
 +    req_decode_q decode_q;
 +
@@ -57742,6 +59400,7 @@ index 0000000000..b2cb8c8584
 +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
 +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
 +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3);
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4);
 +
 +#endif
 diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
@@ -60049,7 +61708,7 @@ index 5123540653..eb1e755982 100644
  OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
                                                  opencl/unsharp.o
 diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
-index 1183e40267..174e3f7ef7 100644
+index 1183e40267..4be094ecc9 100644
 --- a/libavfilter/allfilters.c
 +++ b/libavfilter/allfilters.c
 @@ -204,6 +204,7 @@ extern AVFilter ff_vf_dedot;
@@ -60060,7 +61719,15 @@ index 1183e40267..174e3f7ef7 100644
  extern AVFilter ff_vf_deinterlace_vaapi;
  extern AVFilter ff_vf_dejudder;
  extern AVFilter ff_vf_delogo;
-@@ -414,6 +415,7 @@ extern AVFilter ff_vf_transpose_opencl;
+@@ -356,6 +357,7 @@ extern AVFilter ff_vf_scale;
+ extern AVFilter ff_vf_scale_cuda;
+ extern AVFilter ff_vf_scale_npp;
+ extern AVFilter ff_vf_scale_qsv;
++extern AVFilter ff_vf_scale_v4l2m2m;
+ extern AVFilter ff_vf_scale_vaapi;
+ extern AVFilter ff_vf_scale_vulkan;
+ extern AVFilter ff_vf_scale2ref;
+@@ -414,6 +416,7 @@ extern AVFilter ff_vf_transpose_opencl;
  extern AVFilter ff_vf_transpose_vaapi;
  extern AVFilter ff_vf_trim;
  extern AVFilter ff_vf_unpremultiply;
@@ -60221,10 +61888,10 @@ index bf30f54177..eb5dfa22f8 100644
          case AVMEDIA_TYPE_AUDIO:
 diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
 new file mode 100644
-index 0000000000..d1c714b805
+index 0000000000..4ed510cd19
 --- /dev/null
 +++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -0,0 +1,1282 @@
+@@ -0,0 +1,1994 @@
 +/*
 + * This file is part of FFmpeg.
 + *
@@ -60262,6 +61929,8 @@ index 0000000000..d1c714b805
 +#include <sys/mman.h>
 +#include <unistd.h>
 +
++#include "config.h"
++
 +#include "libavutil/avassert.h"
 +#include "libavutil/avstring.h"
 +#include "libavutil/common.h"
@@ -60279,33 +61948,49 @@ index 0000000000..d1c714b805
 +#include "avfilter.h"
 +#include "formats.h"
 +#include "internal.h"
++#include "scale_eval.h"
 +#include "video.h"
 +
++#ifndef DRM_FORMAT_P030
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
++#endif
++
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in drm_fourcc.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
 +typedef struct V4L2Queue V4L2Queue;
 +typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
 +
-+typedef struct V4L2PlaneInfo {
-+    int bytesperline;
-+    size_t length;
-+} V4L2PlaneInfo;
++typedef enum filter_type_v4l2_e
++{
++    FILTER_V4L2_DEINTERLACE = 1,
++    FILTER_V4L2_SCALE,
++} filter_type_v4l2_t;
 +
 +typedef struct V4L2Buffer {
 +    int enqueued;
 +    int reenqueue;
-+    int fd;
 +    struct v4l2_buffer buffer;
 +    AVFrame frame;
 +    struct v4l2_plane planes[VIDEO_MAX_PLANES];
 +    int num_planes;
-+    V4L2PlaneInfo plane_info[VIDEO_MAX_PLANES];
 +    AVDRMFrameDescriptor drm_frame;
 +    V4L2Queue *q;
 +} V4L2Buffer;
 +
 +typedef struct V4L2Queue {
 +    struct v4l2_format format;
++    struct v4l2_selection sel;
 +    int num_buffers;
 +    V4L2Buffer *buffers;
++    const char * name;
 +    DeintV4L2M2MContextShared *ctx;
 +} V4L2Queue;
 +
@@ -60338,11 +62023,18 @@ index 0000000000..d1c714b805
 +
 +typedef struct DeintV4L2M2MContextShared {
 +    void * logctx;  // For logging - will be NULL when done
++    filter_type_v4l2_t filter_type;
 +
 +    int fd;
 +    int done;
 +    int width;
 +    int height;
++
++    // from options
++    int output_width;
++    int output_height;
++    enum AVPixelFormat output_format;
++
 +    int orig_width;
 +    int orig_height;
 +    atomic_uint refcount;
@@ -60361,8 +62053,64 @@ index 0000000000..d1c714b805
 +    const AVClass *class;
 +
 +    DeintV4L2M2MContextShared *shared;
++
++    char * w_expr;
++    char * h_expr;
++    char * output_format_string;;
++
++    int force_original_aspect_ratio;
++    int force_divisible_by;
++
++    char *colour_primaries_string;
++    char *colour_transfer_string;
++    char *colour_matrix_string;
++    int   colour_range;
++    char *chroma_location_string;
++
++    enum AVColorPrimaries colour_primaries;
++    enum AVColorTransferCharacteristic colour_transfer;
++    enum AVColorSpace colour_matrix;
++    enum AVChromaLocation chroma_location;
 +} DeintV4L2M2MContext;
 +
++// These just list the ones we know we can cope with
++static uint32_t
++fmt_av_to_v4l2(const enum AVPixelFormat avfmt)
++{
++    switch (avfmt) {
++    case AV_PIX_FMT_YUV420P:
++        return V4L2_PIX_FMT_YUV420;
++    case AV_PIX_FMT_NV12:
++        return V4L2_PIX_FMT_NV12;
++#if CONFIG_SAND
++    case AV_PIX_FMT_RPI4_8:
++    case AV_PIX_FMT_SAND128:
++        return V4L2_PIX_FMT_NV12_COL128;
++#endif
++    default:
++        break;
++    }
++    return 0;
++}
++
++static enum AVPixelFormat
++fmt_v4l2_to_av(const uint32_t pixfmt)
++{
++    switch (pixfmt) {
++    case V4L2_PIX_FMT_YUV420:
++        return AV_PIX_FMT_YUV420P;
++    case V4L2_PIX_FMT_NV12:
++        return AV_PIX_FMT_NV12;
++#if CONFIG_SAND
++    case V4L2_PIX_FMT_NV12_COL128:
++        return AV_PIX_FMT_RPI4_8;
++#endif
++    default:
++        break;
++    }
++    return AV_PIX_FMT_NONE;
++}
++
 +static unsigned int pts_stats_interval(const pts_stats_t * const stats)
 +{
 +    return stats->last_interval;
@@ -60528,6 +62276,39 @@ index 0000000000..d1c714b805
 +    return 0;
 +}
 +
++static inline uint32_t
++fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline;
++}
++
++static inline uint32_t
++fmt_height(const struct v4l2_format * const fmt)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
++}
++
++static inline uint32_t
++fmt_width(const struct v4l2_format * const fmt)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
++}
++
++static inline uint32_t
++fmt_pixelformat(const struct v4l2_format * const fmt)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
++}
++
++static void
++init_format(V4L2Queue * const q, const uint32_t format_type)
++{
++    memset(&q->format, 0, sizeof(q->format));
++    memset(&q->sel,    0, sizeof(q->sel));
++    q->format.type = format_type;
++    q->sel.type    = format_type;
++}
++
 +static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
 +{
 +    struct v4l2_capability cap;
@@ -60538,78 +62319,99 @@ index 0000000000..d1c714b805
 +    if (ret < 0)
 +        return ret;
 +
-+    if (!(cap.capabilities & V4L2_CAP_STREAMING))
++    if (ctx->filter_type == FILTER_V4L2_SCALE &&
++        strcmp("bcm2835-codec-isp", cap.card) != 0)
++    {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n");
 +        return AVERROR(EINVAL);
++    }
 +
-+    if (cap.capabilities & V4L2_CAP_VIDEO_M2M) {
-+        ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-+        ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-+
-+        return 0;
++    if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n");
++        return AVERROR(EINVAL);
 +    }
 +
 +    if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) {
-+        ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
-+        ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
-+
-+        return 0;
++        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
++        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
++    }
++    else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) {
++        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE);
++        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT);
++    }
++    else {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n");
++        return AVERROR(EINVAL);
 +    }
 +
-+    return AVERROR(EINVAL);
++    return 0;
 +}
 +
-+static int deint_v4l2m2m_try_format(V4L2Queue *queue)
++// Just use for probe - doesn't modify q format
++static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt)
 +{
-+    struct v4l2_format *fmt        = &queue->format;
++    struct v4l2_format fmt         = {.type = queue->format.type};
 +    DeintV4L2M2MContextShared *ctx = queue->ctx;
 +    int ret, field;
++    // Pick YUV to test with if not otherwise specified
++    uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt);
++    enum AVPixelFormat r_avfmt;
 +
-+    ret = ioctl(ctx->fd, VIDIOC_G_FMT, fmt);
++
++    ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt);
 +    if (ret)
 +        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret);
 +
-+    if (V4L2_TYPE_IS_OUTPUT(fmt->type))
++    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type))
 +        field = V4L2_FIELD_INTERLACED_TB;
 +    else
 +        field = V4L2_FIELD_NONE;
 +
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        fmt->fmt.pix_mp.pixelformat = V4L2_PIX_FMT_YUV420;
-+        fmt->fmt.pix_mp.field = field;
-+        fmt->fmt.pix_mp.width = ctx->width;
-+        fmt->fmt.pix_mp.height = ctx->height;
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
++        fmt.fmt.pix_mp.pixelformat = pixelformat;
++        fmt.fmt.pix_mp.field = field;
++        fmt.fmt.pix_mp.width = width;
++        fmt.fmt.pix_mp.height = height;
 +    } else {
-+        fmt->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420;
-+        fmt->fmt.pix.field = field;
-+        fmt->fmt.pix.width = ctx->width;
-+        fmt->fmt.pix.height = ctx->height;
++        fmt.fmt.pix.pixelformat = pixelformat;
++        fmt.fmt.pix.field = field;
++        fmt.fmt.pix.width = width;
++        fmt.fmt.pix.height = height;
 +    }
 +
-+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
-+		 fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height,
-+		 fmt->fmt.pix_mp.pixelformat,
-+		 fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline);
++    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
++         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
++         fmt.fmt.pix_mp.pixelformat,
++         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
 +
-+    ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, fmt);
++    ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt);
 +    if (ret)
 +        return AVERROR(EINVAL);
 +
-+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
-+		 fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height,
-+		 fmt->fmt.pix_mp.pixelformat,
-+		 fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline);
++    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
++         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
++         fmt.fmt.pix_mp.pixelformat,
++         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
 +
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        if (fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 ||
-+            fmt->fmt.pix_mp.field != field) {
-+            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type);
++    r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt));
++    if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
++        return AVERROR(EINVAL);
++    }
++    if (r_avfmt == AV_PIX_FMT_NONE) {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
++        return AVERROR(EINVAL);
++    }
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
++        if (fmt.fmt.pix_mp.field != field) {
++            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
 +
 +            return AVERROR(EINVAL);
 +        }
 +    } else {
-+        if (fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 ||
-+            fmt->fmt.pix.field != field) {
-+            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type);
++        if (fmt.fmt.pix.field != field) {
++            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
 +
 +            return AVERROR(EINVAL);
 +        }
@@ -60618,51 +62420,412 @@ index 0000000000..d1c714b805
 +    return 0;
 +}
 +
-+static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, int height, int pitch, int ysize)
++static int
++do_s_fmt(V4L2Queue * const q)
 +{
-+    struct v4l2_format *fmt        = &queue->format;
-+    DeintV4L2M2MContextShared *ctx = queue->ctx;
++    DeintV4L2M2MContextShared * const ctx = q->ctx;
++    const uint32_t pixelformat = fmt_pixelformat(&q->format);
 +    int ret;
 +
-+    struct v4l2_selection sel = {
-+        .type = fmt->type,
-+        .target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP_BOUNDS : V4L2_SEL_TGT_COMPOSE_BOUNDS,
-+    };
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        fmt->fmt.pix_mp.field = field;
-+        fmt->fmt.pix_mp.width = width;
-+        fmt->fmt.pix_mp.height = ysize / pitch;
-+        fmt->fmt.pix_mp.plane_fmt[0].bytesperline = pitch;
-+        fmt->fmt.pix_mp.plane_fmt[0].sizeimage = ysize + (ysize >> 1);
-+    } else {
-+        fmt->fmt.pix.field = field;
-+        fmt->fmt.pix.width = width;
-+        fmt->fmt.pix.height = height;
-+        fmt->fmt.pix.sizeimage = 0;
-+        fmt->fmt.pix.bytesperline = 0;
++    ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format);
++    if (ret) {
++        ret = AVERROR(errno);
++        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret));
++        return ret;
 +    }
 +
-+    ret = ioctl(ctx->fd, VIDIOC_S_FMT, fmt);
-+    if (ret)
-+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %d\n", ret);
++    if (pixelformat != fmt_pixelformat(&q->format)) {
++        av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format)));
++        return AVERROR(EINVAL);
++    }
 +
-+    ret = ioctl(ctx->fd, VIDIOC_G_SELECTION, &sel);
-+    if (ret)
-+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_SELECTION failed: %d\n", ret);
++    q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
++    q->sel.flags  = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE;
 +
-+    sel.r.width = width;
-+    sel.r.height = height;
-+    sel.r.left = 0;
-+    sel.r.top = 0;
-+    sel.target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
-+    sel.flags = V4L2_SEL_FLAG_LE;
++    ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel);
++    if (ret) {
++        ret = AVERROR(errno);
++        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret));
++    }
 +
-+    ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &sel);
-+    if (ret)
-+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_SELECTION failed: %d\n", ret);
++    return 0;
++}
 +
-+    return ret;
++static void
++set_fmt_color(struct v4l2_format *const fmt,
++               const enum AVColorPrimaries avcp,
++               const enum AVColorSpace avcs,
++               const enum AVColorTransferCharacteristic avxc)
++{
++    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
++    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
++    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
++
++    switch (avcp) {
++    case AVCOL_PRI_BT709:
++        cs = V4L2_COLORSPACE_REC709;
++        ycbcr = V4L2_YCBCR_ENC_709;
++        break;
++    case AVCOL_PRI_BT470M:
++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
++        ycbcr = V4L2_YCBCR_ENC_601;
++        break;
++    case AVCOL_PRI_BT470BG:
++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++        break;
++    case AVCOL_PRI_SMPTE170M:
++        cs = V4L2_COLORSPACE_SMPTE170M;
++        break;
++    case AVCOL_PRI_SMPTE240M:
++        cs = V4L2_COLORSPACE_SMPTE240M;
++        break;
++    case AVCOL_PRI_BT2020:
++        cs = V4L2_COLORSPACE_BT2020;
++        break;
++    case AVCOL_PRI_SMPTE428:
++    case AVCOL_PRI_SMPTE431:
++    case AVCOL_PRI_SMPTE432:
++    case AVCOL_PRI_EBU3213:
++    case AVCOL_PRI_RESERVED:
++    case AVCOL_PRI_FILM:
++    case AVCOL_PRI_UNSPECIFIED:
++    default:
++        break;
++    }
++
++    switch (avcs) {
++    case AVCOL_SPC_RGB:
++        cs = V4L2_COLORSPACE_SRGB;
++        break;
++    case AVCOL_SPC_BT709:
++        cs = V4L2_COLORSPACE_REC709;
++        break;
++    case AVCOL_SPC_FCC:
++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
++        break;
++    case AVCOL_SPC_BT470BG:
++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++        break;
++    case AVCOL_SPC_SMPTE170M:
++        cs = V4L2_COLORSPACE_SMPTE170M;
++        break;
++    case AVCOL_SPC_SMPTE240M:
++        cs = V4L2_COLORSPACE_SMPTE240M;
++        break;
++    case AVCOL_SPC_BT2020_CL:
++        cs = V4L2_COLORSPACE_BT2020;
++        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
++        break;
++    case AVCOL_SPC_BT2020_NCL:
++        cs = V4L2_COLORSPACE_BT2020;
++        break;
++    default:
++        break;
++    }
++
++    switch (xfer) {
++    case AVCOL_TRC_BT709:
++        xfer = V4L2_XFER_FUNC_709;
++        break;
++    case AVCOL_TRC_IEC61966_2_1:
++        xfer = V4L2_XFER_FUNC_SRGB;
++        break;
++    case AVCOL_TRC_SMPTE240M:
++        xfer = V4L2_XFER_FUNC_SMPTE240M;
++        break;
++    case AVCOL_TRC_SMPTE2084:
++        xfer = V4L2_XFER_FUNC_SMPTE2084;
++        break;
++    default:
++        break;
++    }
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++        fmt->fmt.pix_mp.colorspace = cs;
++        fmt->fmt.pix_mp.ycbcr_enc = ycbcr;
++        fmt->fmt.pix_mp.xfer_func = xfer;
++    } else {
++        fmt->fmt.pix.colorspace = cs;
++        fmt->fmt.pix.ycbcr_enc = ycbcr;
++        fmt->fmt.pix.xfer_func = xfer;
++    }
++}
++
++static void
++set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr)
++{
++    const enum v4l2_quantization q =
++        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
++        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
++            V4L2_QUANTIZATION_DEFAULT;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++        fmt->fmt.pix_mp.quantization = q;
++    } else {
++        fmt->fmt.pix.quantization = q;
++    }
++}
++
++static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt)
++{
++    enum v4l2_ycbcr_encoding ycbcr;
++    enum v4l2_colorspace cs;
++
++    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++        fmt->fmt.pix_mp.colorspace :
++        fmt->fmt.pix.colorspace;
++
++    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++        fmt->fmt.pix_mp.ycbcr_enc:
++        fmt->fmt.pix.ycbcr_enc;
++
++    switch(ycbcr) {
++    case V4L2_YCBCR_ENC_XV709:
++    case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709;
++    case V4L2_YCBCR_ENC_XV601:
++    case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M;
++    default:
++        break;
++    }
++
++    switch(cs) {
++    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG;
++    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M;
++    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M;
++    case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020;
++    default:
++        break;
++    }
++
++    return AVCOL_PRI_UNSPECIFIED;
++}
++
++static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt)
++{
++    enum v4l2_ycbcr_encoding ycbcr;
++    enum v4l2_colorspace cs;
++
++    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++        fmt->fmt.pix_mp.colorspace :
++        fmt->fmt.pix.colorspace;
++
++    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++        fmt->fmt.pix_mp.ycbcr_enc:
++        fmt->fmt.pix.ycbcr_enc;
++
++    switch(cs) {
++    case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB;
++    case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709;
++    case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC;
++    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG;
++    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M;
++    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M;
++    case V4L2_COLORSPACE_BT2020:
++        if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM)
++            return AVCOL_SPC_BT2020_CL;
++        else
++             return AVCOL_SPC_BT2020_NCL;
++    default:
++        break;
++    }
++
++    return AVCOL_SPC_UNSPECIFIED;
++}
++
++static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt)
++{
++    enum v4l2_ycbcr_encoding ycbcr;
++    enum v4l2_xfer_func xfer;
++    enum v4l2_colorspace cs;
++
++    cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++        fmt->fmt.pix_mp.colorspace :
++        fmt->fmt.pix.colorspace;
++
++    ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++        fmt->fmt.pix_mp.ycbcr_enc:
++        fmt->fmt.pix.ycbcr_enc;
++
++    xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++        fmt->fmt.pix_mp.xfer_func:
++        fmt->fmt.pix.xfer_func;
++
++    switch (xfer) {
++    case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709;
++    case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1;
++    default:
++        break;
++    }
++
++    switch (cs) {
++    case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22;
++    case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28;
++    case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M;
++    case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M;
++    default:
++        break;
++    }
++
++    switch (ycbcr) {
++    case V4L2_YCBCR_ENC_XV709:
++    case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG;
++    default:
++        break;
++    }
++
++    return AVCOL_TRC_UNSPECIFIED;
++}
++
++static enum AVColorRange get_color_range(const struct v4l2_format *const fmt)
++{
++    enum v4l2_quantization qt;
++
++    qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ?
++        fmt->fmt.pix_mp.quantization :
++        fmt->fmt.pix.quantization;
++
++    switch (qt) {
++    case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG;
++    case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG;
++    default:
++        break;
++    }
++
++     return AVCOL_RANGE_UNSPECIFIED;
++}
++
++static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame)
++{
++    struct v4l2_format *const format = &q->format;
++    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
++
++    const uint32_t drm_fmt = src->layers[0].format;
++    // Treat INVALID as LINEAR
++    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
++        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
++    uint32_t pix_fmt = 0;
++    uint32_t w = 0;
++    uint32_t h = 0;
++    uint32_t bpl = src->layers[0].planes[0].pitch;
++
++    // We really don't expect multiple layers
++    // All formats that we currently cope with are single object
++
++    if (src->nb_layers != 1 || src->nb_objects != 1)
++        return AVERROR(EINVAL);
++
++    switch (drm_fmt) {
++        case DRM_FORMAT_YUV420:
++            if (mod == DRM_FORMAT_MOD_LINEAR) {
++                if (src->layers[0].nb_planes != 3)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_YUV420;
++                h = src->layers[0].planes[1].offset / bpl;
++                w = bpl;
++            }
++            break;
++
++        case DRM_FORMAT_NV12:
++            if (mod == DRM_FORMAT_MOD_LINEAR) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_NV12;
++                h = src->layers[0].planes[1].offset / bpl;
++                w = bpl;
++            }
++#if CONFIG_SAND
++            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
++                w = bpl;
++                h = src->layers[0].planes[1].offset / 128;
++                bpl = fourcc_mod_broadcom_param(mod);
++            }
++#endif
++            break;
++
++        case DRM_FORMAT_P030:
++#if CONFIG_SAND
++            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
++                w = bpl / 2;  // Matching lie to how we construct this
++                h = src->layers[0].planes[1].offset / 128;
++                bpl = fourcc_mod_broadcom_param(mod);
++            }
++#endif
++            break;
++
++        default:
++            break;
++    }
++
++    if (!pix_fmt)
++        return AVERROR(EINVAL);
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
++        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
++
++        pix->width = w;
++        pix->height = h;
++        pix->pixelformat = pix_fmt;
++        pix->plane_fmt[0].bytesperline = bpl;
++        pix->num_planes = 1;
++    }
++    else {
++        struct v4l2_pix_format *const pix = &format->fmt.pix;
++
++        pix->width = w;
++        pix->height = h;
++        pix->pixelformat = pix_fmt;
++        pix->bytesperline = bpl;
++    }
++
++    set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc);
++    set_fmt_color_range(format, frame->color_range);
++
++    q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right);
++    q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom);
++    q->sel.r.left = frame->crop_left;
++    q->sel.r.top = frame->crop_top;
++
++    return 0;
++}
++
++
++static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height)
++{
++    struct v4l2_format * const fmt   = &queue->format;
++    struct v4l2_selection *const sel = &queue->sel;
++
++    memset(&fmt->fmt, 0, sizeof(fmt->fmt));
++
++    // Align w/h to 16 here in case there are alignment requirements at the next
++    // stage of the filter chain (also RPi deinterlace setup is bust and this
++    // fixes it)
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++        fmt->fmt.pix_mp.pixelformat = pixelformat;
++        fmt->fmt.pix_mp.field = field;
++        fmt->fmt.pix_mp.width = FFALIGN(width, 16);
++        fmt->fmt.pix_mp.height = FFALIGN(height, 16);
++    } else {
++        fmt->fmt.pix.pixelformat = pixelformat;
++        fmt->fmt.pix.field = field;
++        fmt->fmt.pix.width = FFALIGN(width, 16);
++        fmt->fmt.pix.height = FFALIGN(height, 16);
++    }
++
++    set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer);
++    set_fmt_color_range(fmt, priv->colour_range);
++
++    sel->r.width = width;
++    sel->r.height = height;
++    sel->r.left = 0;
++    sel->r.top = 0;
++
++    return do_s_fmt(queue);
 +}
 +
 +static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
@@ -60674,16 +62837,22 @@ index 0000000000..d1c714b805
 +        return AVERROR(errno);
 +
 +    ret = deint_v4l2m2m_prepare_context(ctx);
-+    if (ret)
++    if (ret) {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n");
 +        goto fail;
++    }
 +
-+    ret = deint_v4l2m2m_try_format(&ctx->capture);
-+    if (ret)
++    ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format);
++    if (ret) {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n");
 +        goto fail;
++    }
 +
-+    ret = deint_v4l2m2m_try_format(&ctx->output);
-+    if (ret)
++    ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE);
++    if (ret) {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n");
 +        goto fail;
++    }
 +
 +    return 0;
 +
@@ -60744,11 +62913,118 @@ index 0000000000..d1c714b805
 +    return 0;
 +}
 +
-+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
++static void
++drm_frame_init(AVDRMFrameDescriptor * const d)
++{
++    unsigned int i;
++    for (i = 0; i != AV_DRM_MAX_PLANES; ++i) {
++        d->objects[i].fd = -1;
++    }
++}
++
++static void
++drm_frame_uninit(AVDRMFrameDescriptor * const d)
++{
++    unsigned int i;
++    for (i = 0; i != d->nb_objects; ++i) {
++        if (d->objects[i].fd != -1) {
++            close(d->objects[i].fd);
++            d->objects[i].fd = -1;
++        }
++    }
++}
++
++static void
++avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n)
++{
++    unsigned int i;
++    V4L2Buffer* const avbufs = *ppavbufs;
++
++    if (avbufs == NULL)
++        return;
++    *ppavbufs = NULL;
++
++    for (i = 0; i != n; ++i) {
++        V4L2Buffer* const avbuf = avbufs + i;
++        drm_frame_uninit(&avbuf->drm_frame);
++    }
++
++    av_free(avbufs);
++}
++
++static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
 +{
 +    struct v4l2_exportbuffer expbuf;
 +    int i, ret;
++    uint64_t mod = DRM_FORMAT_MOD_LINEAR;
 +
++    AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame;
++    AVDRMLayerDescriptor * const layer = &drm_desc->layers[0];
++    const struct v4l2_format *const fmt = &q->format;
++    const uint32_t height = fmt_height(fmt);
++    ptrdiff_t bpl0;
++
++    /* fill the DRM frame descriptor */
++    drm_desc->nb_layers = 1;
++    layer->nb_planes = avbuf->num_planes;
++
++    for (int i = 0; i < avbuf->num_planes; i++) {
++        layer->planes[i].object_index = i;
++        layer->planes[i].offset = 0;
++        layer->planes[i].pitch = fmt_bpl(fmt, i);
++    }
++    bpl0 = layer->planes[0].pitch;
++
++    switch (fmt_pixelformat(fmt)) {
++#if CONFIG_SAND
++        case V4L2_PIX_FMT_NV12_COL128:
++            mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0);
++            layer->format = V4L2_PIX_FMT_NV12;
++
++            if (avbuf->num_planes > 1)
++                break;
++
++            layer->nb_planes = 2;
++            layer->planes[1].object_index = 0;
++            layer->planes[1].offset = height * 128;
++            layer->planes[0].pitch = fmt_width(fmt);
++            layer->planes[1].pitch = layer->planes[0].pitch;
++            break;
++#endif
++
++        case DRM_FORMAT_NV12:
++            layer->format = V4L2_PIX_FMT_NV12;
++
++            if (avbuf->num_planes > 1)
++                break;
++
++            layer->nb_planes = 2;
++            layer->planes[1].object_index = 0;
++            layer->planes[1].offset = bpl0 * height;
++            layer->planes[1].pitch = bpl0;
++            break;
++
++        case V4L2_PIX_FMT_YUV420:
++            layer->format = DRM_FORMAT_YUV420;
++
++            if (avbuf->num_planes > 1)
++                break;
++
++            layer->nb_planes = 3;
++            layer->planes[1].object_index = 0;
++            layer->planes[1].offset = bpl0 * height;
++            layer->planes[1].pitch = bpl0 / 2;
++            layer->planes[2].object_index = 0;
++            layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4);
++            layer->planes[2].pitch = bpl0 / 2;
++            break;
++
++        default:
++            drm_desc->nb_layers = 0;
++            return AVERROR(EINVAL);
++    }
++
++    drm_desc->nb_objects = 0;
 +    for (i = 0; i < avbuf->num_planes; i++) {
 +        memset(&expbuf, 0, sizeof(expbuf));
 +
@@ -60760,19 +63036,11 @@ index 0000000000..d1c714b805
 +        if (ret < 0)
 +            return AVERROR(errno);
 +
-+        avbuf->fd = expbuf.fd;
-+
-+        if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type)) {
-+            /* drm frame */
-+            avbuf->drm_frame.objects[i].size = avbuf->buffer.m.planes[i].length;
-+            avbuf->drm_frame.objects[i].fd = expbuf.fd;
-+            avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        } else {
-+            /* drm frame */
-+            avbuf->drm_frame.objects[0].size = avbuf->buffer.length;
-+            avbuf->drm_frame.objects[0].fd = expbuf.fd;
-+            avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        }
++        drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ?
++            avbuf->buffer.m.planes[i].length : avbuf->buffer.length;
++        drm_desc->objects[i].fd = expbuf.fd;
++        drm_desc->objects[i].format_modifier = mod;
++        drm_desc->nb_objects = i + 1;
 +    }
 +
 +    return 0;
@@ -60783,7 +63051,7 @@ index 0000000000..d1c714b805
 +    struct v4l2_format *fmt = &queue->format;
 +    DeintV4L2M2MContextShared *ctx = queue->ctx;
 +    struct v4l2_requestbuffers req;
-+    int ret, i, j, multiplanar;
++    int ret, i, multiplanar;
 +    uint32_t memory;
 +
 +    memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ?
@@ -60812,10 +63080,9 @@ index 0000000000..d1c714b805
 +    }
 +
 +    for (i = 0; i < queue->num_buffers; i++) {
-+        V4L2Buffer *buf = &queue->buffers[i];
++        V4L2Buffer * const buf = &queue->buffers[i];
 +
 +        buf->enqueued = 0;
-+        buf->fd = -1;
 +        buf->q = queue;
 +
 +        buf->buffer.type = fmt->type;
@@ -60827,6 +63094,12 @@ index 0000000000..d1c714b805
 +            buf->buffer.m.planes = buf->planes;
 +        }
 +
++        drm_frame_init(&buf->drm_frame);
++    }
++
++    for (i = 0; i < queue->num_buffers; i++) {
++        V4L2Buffer * const buf = &queue->buffers[i];
++
 +        ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer);
 +        if (ret < 0) {
 +            ret = AVERROR(errno);
@@ -60834,29 +63107,14 @@ index 0000000000..d1c714b805
 +            goto fail;
 +        }
 +
-+        if (multiplanar)
-+            buf->num_planes = buf->buffer.length;
-+        else
-+            buf->num_planes = 1;
-+
-+        for (j = 0; j < buf->num_planes; j++) {
-+            V4L2PlaneInfo *info = &buf->plane_info[j];
-+
-+            if (multiplanar) {
-+                info->bytesperline = fmt->fmt.pix_mp.plane_fmt[j].bytesperline;
-+                info->length = buf->buffer.m.planes[j].length;
-+            } else {
-+                info->bytesperline = fmt->fmt.pix.bytesperline;
-+                info->length = buf->buffer.length;
-+            }
-+        }
++        buf->num_planes = multiplanar ? buf->buffer.length : 1;
 +
 +        if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) {
 +            ret = deint_v4l2m2m_enqueue_buffer(buf);
 +            if (ret)
 +                goto fail;
 +
-+            ret = v4l2_buffer_export_drm(buf);
++            ret = v4l2_buffer_export_drm(queue, buf);
 +            if (ret)
 +                goto fail;
 +        }
@@ -60865,12 +63123,8 @@ index 0000000000..d1c714b805
 +    return 0;
 +
 +fail:
-+    for (i = 0; i < queue->num_buffers; i++)
-+        if (queue->buffers[i].fd >= 0)
-+            close(queue->buffers[i].fd);
-+    av_free(queue->buffers);
-+    queue->buffers = NULL;
-+
++    avbufs_delete(&queue->buffers, queue->num_buffers);
++    queue->num_buffers = 0;
 +    return ret;
 +}
 +
@@ -61057,7 +63311,6 @@ index 0000000000..d1c714b805
 +    if (atomic_fetch_sub(&ctx->refcount, 1) == 1) {
 +        V4L2Queue *capture = &ctx->capture;
 +        V4L2Queue *output  = &ctx->output;
-+        int i;
 +
 +        av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__);
 +
@@ -61066,12 +63319,7 @@ index 0000000000..d1c714b805
 +            deint_v4l2m2m_streamoff(output);
 +        }
 +
-+        if (capture->buffers)
-+            for (i = 0; i < capture->num_buffers; i++) {
-+                capture->buffers[i].q = NULL;
-+                if (capture->buffers[i].fd >= 0)
-+                    close(capture->buffers[i].fd);
-+            }
++        avbufs_delete(&capture->buffers, capture->num_buffers);
 +
 +        deint_v4l2m2m_unref_queued(output);
 +
@@ -61103,84 +63351,15 @@ index 0000000000..d1c714b805
 +    deint_v4l2m2m_destroy_context(ctx);
 +}
 +
-+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height)
-+{
-+    int av_pix_fmt = AV_PIX_FMT_YUV420P;
-+    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
-+    AVDRMLayerDescriptor *layer;
-+
-+    /* fill the DRM frame descriptor */
-+    drm_desc->nb_objects = avbuf->num_planes;
-+    drm_desc->nb_layers = 1;
-+
-+    layer = &drm_desc->layers[0];
-+    layer->nb_planes = avbuf->num_planes;
-+
-+    for (int i = 0; i < avbuf->num_planes; i++) {
-+        layer->planes[i].object_index = i;
-+        layer->planes[i].offset = 0;
-+        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
-+    }
-+
-+    switch (av_pix_fmt) {
-+    case AV_PIX_FMT_YUYV422:
-+
-+        layer->format = DRM_FORMAT_YUYV;
-+        layer->nb_planes = 1;
-+
-+        break;
-+
-+    case AV_PIX_FMT_NV12:
-+    case AV_PIX_FMT_NV21:
-+
-+        layer->format = av_pix_fmt == AV_PIX_FMT_NV12 ?
-+            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
-+
-+        if (avbuf->num_planes > 1)
-+            break;
-+
-+        layer->nb_planes = 2;
-+
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
-+            height;
-+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
-+        break;
-+
-+    case AV_PIX_FMT_YUV420P:
-+
-+        layer->format = DRM_FORMAT_YUV420;
-+
-+        if (avbuf->num_planes > 1)
-+            break;
-+
-+        layer->nb_planes = 3;
-+
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
-+            height;
-+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
-+
-+        layer->planes[2].object_index = 0;
-+        layer->planes[2].offset = layer->planes[1].offset +
-+            ((avbuf->plane_info[0].bytesperline *
-+              height) >> 2);
-+        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
-+        break;
-+
-+    default:
-+        drm_desc->nb_layers = 0;
-+        break;
-+    }
-+
-+    return (uint8_t *) drm_desc;
-+}
-+
 +// timeout in ms
 +static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout)
 +{
 +    DeintV4L2M2MContextShared *ctx = queue->ctx;
 +    V4L2Buffer* avbuf;
++    enum AVColorPrimaries color_primaries;
++    enum AVColorSpace colorspace;
++    enum AVColorTransferCharacteristic color_trc;
++    enum AVColorRange color_range;
 +
 +    av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
 +
@@ -61191,8 +63370,6 @@ index 0000000000..d1c714b805
 +    }
 +
 +    // Fill in PTS and anciliary info from src frame
-+    // we will want to overwrite some fields as only the pts/dts
-+    // fields are updated with new timing in this fn
 +    pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
 +
 +    frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame,
@@ -61205,18 +63382,36 @@ index 0000000000..d1c714b805
 +
 +    atomic_fetch_add(&ctx->refcount, 1);
 +
-+    frame->data[0] = (uint8_t *)v4l2_get_drm_frame(avbuf, ctx->orig_height);
++    frame->data[0] = (uint8_t *)&avbuf->drm_frame;
 +    frame->format = AV_PIX_FMT_DRM_PRIME;
 +    if (ctx->hw_frames_ctx)
 +        frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
-+    frame->height = ctx->height;
-+    frame->width = ctx->width;
++    frame->height = ctx->output_height;
++    frame->width = ctx->output_width;
 +
-+    // Not interlaced now
-+    frame->interlaced_frame = 0;
-+    frame->top_field_first = 0;
-+    // Pkt duration halved
-+    frame->pkt_duration /= 2;
++    color_primaries = get_color_primaries(&ctx->capture.format);
++    colorspace      = get_color_space(&ctx->capture.format);
++    color_trc       = get_color_trc(&ctx->capture.format);
++    color_range     = get_color_range(&ctx->capture.format);
++
++    // If the color parameters are unspecified by V4L2 then leave alone as they
++    // will have been copied from src
++    if (color_primaries != AVCOL_PRI_UNSPECIFIED)
++        frame->color_primaries = color_primaries;
++    if (colorspace != AVCOL_SPC_UNSPECIFIED)
++        frame->colorspace = colorspace;
++    if (color_trc != AVCOL_TRC_UNSPECIFIED)
++        frame->color_trc = color_trc;
++    if (color_range != AVCOL_RANGE_UNSPECIFIED)
++        frame->color_range = color_range;
++
++    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) {
++        // Not interlaced now
++        frame->interlaced_frame = 0;   // *** Fill in from dst buffer?
++        frame->top_field_first = 0;
++        // Pkt duration halved
++        frame->pkt_duration /= 2;
++    }
 +
 +    if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) {
 +        av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n");
@@ -61238,14 +63433,36 @@ index 0000000000..d1c714b805
 +    ctx->height = avctx->inputs[0]->h;
 +    ctx->width = avctx->inputs[0]->w;
 +
-+    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d\n", __func__, ctx->width, ctx->height);
++    if (ctx->filter_type == FILTER_V4L2_SCALE) {
++        if ((ret = ff_scale_eval_dimensions(priv,
++                                            priv->w_expr, priv->h_expr,
++                                            inlink, outlink,
++                                            &ctx->output_width, &ctx->output_height)) < 0)
++            return ret;
++
++        ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height,
++                                   priv->force_original_aspect_ratio, priv->force_divisible_by);
++    }
++    else {
++        ctx->output_width  = ctx->width;
++        ctx->output_height = ctx->height;
++    }
++
++    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__,
++           ctx->width, ctx->height, ctx->output_width, ctx->output_height,
++           inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den);
 +
 +    outlink->time_base           = inlink->time_base;
-+    outlink->w                   = inlink->w;
-+    outlink->h                   = inlink->h;
-+    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
++    outlink->w                   = ctx->output_width;
++    outlink->h                   = ctx->output_height;
 +    outlink->format              = inlink->format;
-+    outlink->frame_rate = (AVRational) {1, 0};  // Deny knowledge of frame rate
++    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0)
++        outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den};
++
++    if (inlink->sample_aspect_ratio.num)
++        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
++    else
++        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
 +
 +    ret = deint_v4l2m2m_find_device(ctx);
 +    if (ret)
@@ -61263,13 +63480,37 @@ index 0000000000..d1c714b805
 +{
 +    static const enum AVPixelFormat pixel_formats[] = {
 +        AV_PIX_FMT_DRM_PRIME,
-+        AV_PIX_FMT_YUV420P,
++//        AV_PIX_FMT_YUV420P,
 +        AV_PIX_FMT_NONE,
 +    };
 +
 +    return ff_set_common_formats(avctx, ff_make_format_list(pixel_formats));
 +}
 +
++static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
++{
++    const uint64_t mod = drm_desc->objects[0].format_modifier;
++    const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID);
++
++    // Only currently support single object things
++    if (drm_desc->nb_objects != 1)
++        return 0;
++
++    switch (drm_desc->layers[0].format) {
++    case DRM_FORMAT_YUV420:
++        return is_linear ? V4L2_PIX_FMT_YUV420 : 0;
++    case DRM_FORMAT_NV12:
++        return is_linear ? V4L2_PIX_FMT_NV12 :
++#if CONFIG_SAND
++            fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 :
++#endif
++            0;
++    default:
++        break;
++    }
++    return 0;
++}
++
 +static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
 +{
 +    AVFilterContext *avctx         = link->dst;
@@ -61285,41 +63526,71 @@ index 0000000000..d1c714b805
 +           avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out);
 +
 +    if (ctx->field_order == V4L2_FIELD_ANY) {
-+        AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)in->data[0];
++        const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0];
++        uint32_t pixelformat = desc_pixelformat(drm_desc);
++
++        if (pixelformat == 0) {
++            av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n",
++                   av_fourcc2str(drm_desc->layers[0].format),
++                   drm_desc->nb_objects, drm_desc->objects[0].format_modifier);
++            return AVERROR(EINVAL);
++        }
++
 +        ctx->orig_width = drm_desc->layers[0].planes[0].pitch;
 +        ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width;
 +
-+        av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%d,%d)\n", __func__, ctx->width, ctx->height,
++        av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
 +           drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
 +
++        if ((ret = set_src_fmt(output, in)) != 0) {
++            av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n",
++                   av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier);
++            return ret;
++        }
++
++        ret = do_s_fmt(output);
++        if (ret) {
++            av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n");
++            return ret;
++        }
++
++        if (ctx->output_format != AV_PIX_FMT_NONE)
++           pixelformat = fmt_av_to_v4l2(ctx->output_format);
++        ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height);
++        if (ret) {
++            av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n");
++            return ret;
++        }
++
++        ret = deint_v4l2m2m_allocate_buffers(capture);
++        if (ret) {
++            av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n");
++            return ret;
++        }
++
++        ret = deint_v4l2m2m_streamon(capture);
++        if (ret) {
++            av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret));
++            return ret;
++        }
++
++        ret = deint_v4l2m2m_allocate_buffers(output);
++        if (ret) {
++            av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n");
++            return ret;
++        }
++
++        ret = deint_v4l2m2m_streamon(output);
++        if (ret) {
++            av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret));
++            return ret;
++        }
++
 +        if (in->top_field_first)
 +            ctx->field_order = V4L2_FIELD_INTERLACED_TB;
 +        else
 +            ctx->field_order = V4L2_FIELD_INTERLACED_BT;
 +
-+        ret = deint_v4l2m2m_set_format(output, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_set_format(capture, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_allocate_buffers(capture);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_streamon(capture);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_allocate_buffers(output);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_streamon(output);
-+        if (ret)
-+            return ret;
 +    }
 +
 +    ret = deint_v4l2m2m_enqueue_frame(output, in);
@@ -61395,28 +63666,31 @@ index 0000000000..d1c714b805
 +        return 0;
 +    }
 +
-+    {
++    recycle_q(&s->output);
++    n = count_enqueued(&s->output);
++
++    while (n < 6) {
 +        AVFrame * frame;
 +        int rv;
 +
-+        recycle_q(&s->output);
-+        n = count_enqueued(&s->output);
-+
-+        while (n < 6) {
-+            if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
-+                av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
-+                return rv;
-+            }
-+
-+            if (frame == NULL) {
-+                av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
-+                break;
-+            }
-+
-+            deint_v4l2m2m_filter_frame(inlink, frame);
-+            av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
-+            ++n;
++        if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
++            av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
++            return rv;
 +        }
++
++        if (frame == NULL) {
++            av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
++            break;
++        }
++
++        rv = deint_v4l2m2m_filter_frame(inlink, frame);
++        av_frame_free(&frame);
++
++        if (rv != 0)
++            return rv;
++
++        av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
++        ++n;
 +    }
 +
 +    if (n < 6) {
@@ -61435,7 +63709,7 @@ index 0000000000..d1c714b805
 +    return did_something ? 0 : FFERROR_NOT_READY;
 +}
 +
-+static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
++static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type)
 +{
 +    DeintV4L2M2MContext * const priv = avctx->priv;
 +    DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared));
@@ -61446,11 +63720,14 @@ index 0000000000..d1c714b805
 +    }
 +    priv->shared = ctx;
 +    ctx->logctx = priv;
++    ctx->filter_type = filter_type;
 +    ctx->fd = -1;
 +    ctx->output.ctx = ctx;
 +    ctx->output.num_buffers = 8;
++    ctx->output.name = "OUTPUT";
 +    ctx->capture.ctx = ctx;
 +    ctx->capture.num_buffers = 12;
++    ctx->capture.name = "CAPTURE";
 +    ctx->done = 0;
 +    ctx->field_order = V4L2_FIELD_ANY;
 +
@@ -61458,9 +63735,52 @@ index 0000000000..d1c714b805
 +
 +    atomic_init(&ctx->refcount, 1);
 +
++    if (priv->output_format_string) {
++        ctx->output_format = av_get_pix_fmt(priv->output_format_string);
++        if (ctx->output_format == AV_PIX_FMT_NONE) {
++            av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string);
++            return AVERROR(EINVAL);
++        }
++        if (fmt_av_to_v4l2(ctx->output_format) == 0) {
++            av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format));
++            return AVERROR(EINVAL);
++        }
++    } else {
++        // Use the input format once that is configured.
++        ctx->output_format = AV_PIX_FMT_NONE;
++    }
++
++#define STRING_OPTION(var_name, func_name, default_value) do { \
++        if (priv->var_name ## _string) { \
++            int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \
++            if (var < 0) { \
++                av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \
++                return AVERROR(EINVAL); \
++            } \
++            priv->var_name = var; \
++        } else { \
++            priv->var_name = default_value; \
++        } \
++    } while (0)
++
++    STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED);
++    STRING_OPTION(colour_transfer,  color_transfer,  AVCOL_TRC_UNSPECIFIED);
++    STRING_OPTION(colour_matrix,    color_space,     AVCOL_SPC_UNSPECIFIED);
++    STRING_OPTION(chroma_location,  chroma_location, AVCHROMA_LOC_UNSPECIFIED);
++
 +    return 0;
 +}
 +
++static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
++{
++    return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE);
++}
++
++static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx)
++{
++    return common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE);
++}
++
 +static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
 +{
 +    DeintV4L2M2MContext *priv = avctx->priv;
@@ -61478,6 +63798,51 @@ index 0000000000..d1c714b805
 +
 +AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m);
 +
++#define OFFSET(x) offsetof(DeintV4L2M2MContext, x)
++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
++
++static const AVOption scale_v4l2m2m_options[] = {
++    { "w", "Output video width",
++      OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
++    { "h", "Output video height",
++      OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
++    { "format", "Output video format (software format of hardware frames)",
++      OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
++      // These colour properties match the ones of the same name in vf_scale.
++      { "out_color_matrix", "Output colour matrix coefficient set",
++      OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS },
++    { "out_range", "Output colour range",
++      OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED },
++      AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" },
++        { "full",    "Full range",
++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
++        { "limited", "Limited range",
++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
++        { "jpeg",    "Full range",
++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
++        { "mpeg",    "Limited range",
++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
++        { "tv",      "Limited range",
++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" },
++        { "pc",      "Full range",
++          0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" },
++    // These colour properties match the ones in the VAAPI scaler
++    { "out_color_primaries", "Output colour primaries",
++      OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING,
++      { .str = NULL }, .flags = FLAGS },
++    { "out_color_transfer", "Output colour transfer characteristics",
++      OFFSET(colour_transfer_string),  AV_OPT_TYPE_STRING,
++      { .str = NULL }, .flags = FLAGS },
++    { "out_chroma_location", "Output chroma sample location",
++      OFFSET(chroma_location_string),  AV_OPT_TYPE_STRING,
++      { .str = NULL }, .flags = FLAGS },
++    { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" },
++    { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS },
++    { NULL },
++};
++
++AVFILTER_DEFINE_CLASS(scale_v4l2m2m);
++
 +static const AVFilterPad deint_v4l2m2m_inputs[] = {
 +    {
 +        .name         = "default",
@@ -61507,6 +63872,20 @@ index 0000000000..d1c714b805
 +    .priv_class     = &deinterlace_v4l2m2m_class,
 +    .activate       = deint_v4l2m2m_activate,
 +};
++
++AVFilter ff_vf_scale_v4l2m2m = {
++    .name           = "scale_v4l2m2m",
++    .description    = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"),
++    .priv_size      = sizeof(DeintV4L2M2MContext),
++    .init           = &scale_v4l2m2m_init,
++    .uninit         = &deint_v4l2m2m_uninit,
++    .query_formats  = &deint_v4l2m2m_query_formats,
++    .inputs         = deint_v4l2m2m_inputs,
++    .outputs        = deint_v4l2m2m_outputs,
++    .priv_class     = &scale_v4l2m2m_class,
++    .activate       = deint_v4l2m2m_activate,
++};
++
 diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c
 new file mode 100644
 index 0000000000..fbea56dd09
@@ -61747,6 +64126,97 @@ index 0000000000..fbea56dd09
 +    .outputs       = avfilter_vf_unsand_outputs,
 +};
 +
+diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
+index eaed02bc92..181fedcd20 100644
+--- a/libavformat/matroskaenc.c
++++ b/libavformat/matroskaenc.c
+@@ -58,6 +58,9 @@
+  * Info, Tracks, Chapters, Attachments, Tags (potentially twice) and Cues */
+ #define MAX_SEEKHEAD_ENTRIES 7
+ 
++/* Reserved size for H264 headers if not extant at init time */
++#define MAX_H264_HEADER_SIZE 1024
++
+ #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
+                               !(mkv)->is_live)
+ 
+@@ -720,8 +723,12 @@ static int mkv_write_native_codecprivate(AVFormatContext *s, AVIOContext *pb,
+     case AV_CODEC_ID_WAVPACK:
+         return put_wv_codecpriv(dyn_cp, par);
+     case AV_CODEC_ID_H264:
+-        return ff_isom_write_avcc(dyn_cp, par->extradata,
+-                                  par->extradata_size);
++        if (par->extradata_size)
++            return ff_isom_write_avcc(dyn_cp, par->extradata,
++                                      par->extradata_size);
++        else
++            put_ebml_void(pb, MAX_H264_HEADER_SIZE);
++        break;
+     case AV_CODEC_ID_HEVC:
+         return ff_isom_write_hvcc(dyn_cp, par->extradata,
+                                   par->extradata_size, 0);
+@@ -2233,7 +2240,9 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
+         break;
+     // FIXME: Remove the following once libaom starts propagating extradata during init()
+     //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012
++    // H264 V4L2 has a similar issue
+     case AV_CODEC_ID_AV1:
++    case AV_CODEC_ID_H264:
+         if (side_data_size && mkv->track.bc && !par->extradata_size) {
+             AVIOContext *dyn_cp;
+             uint8_t *codecpriv;
+@@ -2241,7 +2250,10 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
+             ret = avio_open_dyn_buf(&dyn_cp);
+             if (ret < 0)
+                 return ret;
+-            ff_isom_write_av1c(dyn_cp, side_data, side_data_size);
++            if (par->codec_id == AV_CODEC_ID_H264)
++                ff_isom_write_avcc(dyn_cp, side_data, side_data_size);
++            else
++                ff_isom_write_av1c(dyn_cp, side_data, side_data_size);
+             codecpriv_size = avio_get_dyn_buf(dyn_cp, &codecpriv);
+             if ((ret = dyn_cp->error) < 0 ||
+                 !codecpriv_size && (ret = AVERROR_INVALIDDATA)) {
+@@ -2249,8 +2261,25 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
+                 return ret;
+             }
+             avio_seek(mkv->track.bc, track->codecpriv_offset, SEEK_SET);
+-            // Do not write the OBUs as we don't have space saved for them
+-            put_ebml_binary(mkv->track.bc, MATROSKA_ID_CODECPRIVATE, codecpriv, 4);
++            if (par->codec_id == AV_CODEC_ID_H264) {
++                int filler;
++                // Up to 6 bytes for header and the filler must be at least 2
++                if (codecpriv_size > MAX_H264_HEADER_SIZE - 8) {
++                    av_log(s, AV_LOG_ERROR, "H264 header size %d > %d bytes\n", codecpriv_size, MAX_H264_HEADER_SIZE - 8);
++                    return AVERROR_INVALIDDATA;
++                }
++                put_ebml_binary(mkv->track.bc, MATROSKA_ID_CODECPRIVATE, codecpriv, codecpriv_size);
++                filler = MAX_H264_HEADER_SIZE - (avio_tell(mkv->track.bc) - track->codecpriv_offset);
++                if (filler < 2) {
++                    av_log(s, AV_LOG_ERROR, "Unexpected SPS/PPS filler length: %d\n", filler);
++                    return AVERROR_BUG;
++                }
++                put_ebml_void(mkv->track.bc, filler);
++            }
++            else {
++                // Do not write the OBUs as we don't have space saved for them
++                put_ebml_binary(mkv->track.bc, MATROSKA_ID_CODECPRIVATE, codecpriv, 4);
++            }
+             ffio_free_dyn_buf(&dyn_cp);
+             ret = ff_alloc_extradata(par, side_data_size);
+             if (ret < 0)
+diff --git a/libavformat/movenc.c b/libavformat/movenc.c
+index 5d8dc4fd5d..97fabf260c 100644
+--- a/libavformat/movenc.c
++++ b/libavformat/movenc.c
+@@ -5767,6 +5767,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
+     if (trk->par->codec_id == AV_CODEC_ID_MP4ALS ||
+             trk->par->codec_id == AV_CODEC_ID_AAC ||
+             trk->par->codec_id == AV_CODEC_ID_AV1 ||
++            trk->par->codec_id == AV_CODEC_ID_H264 ||
+             trk->par->codec_id == AV_CODEC_ID_FLAC) {
+         int side_size = 0;
+         uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
 diff --git a/libavformat/utils.c b/libavformat/utils.c
 index 7185fbfd71..c7b0553903 100644
 --- a/libavformat/utils.c
@@ -61886,10 +64356,10 @@ index 5613813ba8..ab8bcfcf34 100644
 +
 diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
 new file mode 100644
-index 0000000000..cdcf71ee67
+index 0000000000..2f07d9674c
 --- /dev/null
 +++ b/libavutil/aarch64/rpi_sand_neon.S
-@@ -0,0 +1,676 @@
+@@ -0,0 +1,781 @@
 +/*
 +Copyright (c) 2021 Michael Eiler
 +
@@ -62140,228 +64610,6 @@ index 0000000000..cdcf71ee67
 +    ret
 +endfunc
 +
-+//void ff_rpi_sand30_lines_to_planar_y16(
-+//  uint8_t * dest,             // [x0]
-+//  unsigned int dst_stride,    // [w1] -> assumed to be equal to _w
-+//  const uint8_t * src,        // [x2]
-+//  unsigned int src_stride1,   // [w3] -> 128
-+//  unsigned int src_stride2,   // [w4]
-+//  unsigned int _x,            // [w5]
-+//  unsigned int y,             // [w6]
-+//  unsigned int _w,            // [w7]
-+//  unsigned int h);            // [sp, #0]
-+
-+function ff_rpi_sand30_lines_to_planar_y16, export=1
-+    stp x19, x20, [sp, #-48]!
-+    stp x21, x22, [sp, #16]
-+    stp x23, x24, [sp, #32]
-+
-+    // w6 = argument h
-+    ldr w6, [sp, #48]
-+
-+    // slice_inc = ((stride2 - 1) * stride1)
-+    mov w5, w4
-+    sub w5, w5, #1
-+    lsl w5, w5, #7
-+
-+    // total number of bytes per row = (width / 3) * 4
-+    mov w8, w7
-+    mov w9, #3
-+    udiv w8, w8, w9
-+    lsl w8, w8, #2
-+
-+    // number of full 128 byte blocks to be processed
-+    mov w9, #96
-+    udiv w9, w7, w9 // = (width * 4) / (3*128) = width/96
-+
-+    // w10 = number of full integers to process (4 bytes)
-+    // w11 = remaning zero to two 10bit values still to copy over
-+    mov w12, #96
-+    mul w12, w9, w12
-+    sub w12, w7, w12  // width - blocks*96 = remaining points per row
-+    mov w11, #3
-+    udiv w10, w12, w11 // full integers to process = w12 / 3 
-+    mul w11, w10, w11  // #integers *3
-+    sub w11, w12, w11  // remaining 0-2 points = remaining points - integers*3
-+
-+    // increase w9 by one if w10+w11 is not zero, and decrease the row count by one
-+    // this is to efficiently copy incomplete blocks at the end of the rows
-+    // the last row is handled explicitly to avoid writing out of bounds
-+    add w22, w10, w11
-+    cmp w22, #0
-+    cset w22, ne // 1 iff w10+w11 not zero, 0 otherwise
-+    add w9, w9, w22
-+    sub w6, w6, #1
-+
-+    // store the number of bytes in w20 which we copy too much for every row
-+    // when the width of the frame is not a multiple of 96 (128bytes storing 96 10bit values)
-+    mov w20, #96*2
-+    mul w20, w20, w9
-+    sub w20, w1, w20
-+
-+    mov w23, #0 // flag to check whether the last line had already been processed
-+    
-+    // bitmask to clear the uppper 6bits of the result values
-+    mov x19, #0x03ff03ff03ff03ff
-+    dup v22.2d, x19
-+
-+    // row counter = 0
-+    eor w12, w12, w12
-+row_loop_y16:
-+    cmp w12, w6               // jump to row_loop_y16_fin if we processed all rows
-+    bge row_loop_y16_fin
-+
-+    mov x13, x2               // row src
-+    eor w14, w14, w14         // full block counter
-+block_loop_y16:
-+    cmp w14, w9
-+    bge block_loop_y16_fin
-+
-+    // load 64 bytes
-+    ld1 { v0.4s,  v1.4s, v2.4s, v3.4s }, [x13], #64
-+   
-+    // process v0 and v1
-+    xtn v16.4h, v0.4s
-+    ushr v0.4s, v0.4s, #10
-+    xtn v17.4h, v0.4s
-+    ushr v0.4s, v0.4s, #10
-+    xtn v18.4h, v0.4s
-+   
-+    xtn2 v16.8h, v1.4s
-+    and v16.16b, v16.16b, v22.16b
-+    ushr v1.4s, v1.4s, #10
-+    xtn2 v17.8h, v1.4s
-+    and v17.16b, v17.16b, v22.16b
-+    ushr v1.4s, v1.4s, #10
-+    xtn2 v18.8h, v1.4s
-+    and v18.16b, v18.16b, v22.16b
-+
-+    st3 { v16.8h, v17.8h, v18.8h }, [x0], #48
-+
-+    // process v2 and v3
-+    xtn v23.4h, v2.4s
-+    ushr v2.4s, v2.4s, #10
-+    xtn v24.4h, v2.4s
-+    ushr v2.4s, v2.4s, #10
-+    xtn v25.4h, v2.4s
-+    
-+    xtn2 v23.8h, v3.4s
-+    and v23.16b, v23.16b, v22.16b
-+    ushr v3.4s, v3.4s, #10
-+    xtn2 v24.8h, v3.4s
-+    and v24.16b, v24.16b, v22.16b
-+    ushr v3.4s, v3.4s, #10
-+    xtn2 v25.8h, v3.4s
-+    and v25.16b, v25.16b, v22.16b
-+
-+    st3 { v23.8h, v24.8h, v25.8h }, [x0], #48
-+
-+    // load the second half of the block -> 64 bytes into registers v4-v7
-+    ld1 { v4.4s,  v5.4s,  v6.4s,  v7.4s }, [x13], #64
-+    
-+    // process v4 and v5
-+    xtn v16.4h, v4.4s
-+    ushr v4.4s, v4.4s, #10
-+    xtn v17.4h, v4.4s
-+    ushr v4.4s, v4.4s, #10
-+    xtn v18.4h, v4.4s
-+   
-+    xtn2 v16.8h, v5.4s 
-+    and v16.16b, v16.16b, v22.16b
-+    ushr v5.4s, v5.4s, #10
-+    xtn2 v17.8h, v5.4s
-+    and v17.16b, v17.16b, v22.16b
-+    ushr v5.4s, v5.4s, #10
-+    xtn2 v18.8h, v5.4s
-+    and v18.16b, v18.16b, v22.16b
-+
-+    st3 { v16.8h, v17.8h, v18.8h }, [x0], #48
-+
-+    // v6 and v7
-+    xtn v23.4h, v6.4s
-+    ushr v6.4s, v6.4s, #10
-+    xtn v24.4h, v6.4s
-+    ushr v6.4s, v6.4s, #10
-+    xtn v25.4h, v6.4s
-+   
-+    xtn2 v23.8h, v7.4s 
-+    and v23.16b, v23.16b, v22.16b
-+    ushr v7.4s, v7.4s, #10
-+    xtn2 v24.8h, v7.4s
-+    and v24.16b, v24.16b, v22.16b
-+    ushr v7.4s, v7.4s, #10
-+    xtn2 v25.8h, v7.4s
-+    and v25.16b, v25.16b, v22.16b
-+
-+    st3 { v23.8h, v24.8h, v25.8h }, [x0], #48
-+ 
-+    add x13, x13, x5          // row src += slice_inc
-+    add w14, w14, #1
-+    b block_loop_y16
-+block_loop_y16_fin:
-+
-+    
-+
-+
-+    add x2, x2, #128          // src += stride1 (start of the next row)
-+    add x0, x0, w20, sxtw     // subtract the bytes we copied too much from dst
-+    add w12, w12, #1
-+    b row_loop_y16
-+row_loop_y16_fin:
-+
-+    // check whether we have incomplete blocks at the end of every row
-+    // in that case decrease row block count by one
-+    // change height back to it's original value (meaning increase it by 1)
-+    // and jump back to another iteration of row_loop_y16
-+
-+    cmp w23, #1
-+    beq row_loop_y16_fin2 // don't continue here if we already processed the last row
-+    add w6, w6, #1    // increase height to the original value
-+    sub w9, w9, w22   // block count - 1 or 0, depending on the remaining bytes count
-+    mov w23, #1
-+    b row_loop_y16
-+row_loop_y16_fin2:
-+
-+    sub x0, x0, w20, sxtw // with the last row we didn't actually move the dst ptr to far ahead, therefore readd the diference
-+
-+    // now we've got to handle the last block in the last row
-+    eor w12, w12, w12 // w12 = 0 = counter
-+integer_loop_y16:
-+    cmp w12, w10
-+    bge integer_loop_y16_fin
-+    ldr w14, [x13], #4
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+    lsr w14, w14, #10
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+    lsr w14, w14, #10
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+    add w12, w12, #1
-+    b integer_loop_y16
-+integer_loop_y16_fin:
-+
-+final_values_y16:
-+    // remaining point count = w11
-+    ldr w14, [x13], #4
-+    cmp w11, #0
-+    beq final_values_y16_fin
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+    cmp w11, #1
-+    beq final_values_y16_fin
-+    lsr w14, w14, #10
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+final_values_y16_fin:
-+
-+    ldp x23, x24, [sp, #32]
-+    ldp x21, x22, [sp, #16]
-+    ldp x19, x20, [sp], #48
-+    ret
-+endfunc
-+
 +//void ff_rpi_sand30_lines_to_planar_c16(
 +//  uint8_t * dst_u,            // [x0]
 +//  unsigned int dst_stride_u,  // [w1] == _w*2
@@ -62566,12 +64814,339 @@ index 0000000000..cdcf71ee67
 +//  unsigned int _w,
 +//  unsigned int h);
 +
++// void ff_rpi_sand30_lines_to_planar_y8(
++//   uint8_t * dest,            : x0
++//   unsigned int dst_stride,   : w1
++//   const uint8_t * src,       : x2
++//   unsigned int src_stride1,  : w3, always 128
++//   unsigned int src_stride2,  : w4
++//   unsigned int _x,           : w5
++//   unsigned int y,            : w6
++//   unsigned int _w,           : w7
++//   unsigned int h);           : [sp, #0]
++//
++// Assumes that we are starting on a stripe boundary and that overreading
++// within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y16, export=1
++                lsl             w4,  w4,  #7
++                sub             w4,  w4,  #64
++                sub             w1,  w1,  w7, lsl #1
++                uxtw            x6,  w6
++                add             x8,  x2,  x6, lsl #7
++                ldr             w6,  [sp, #0]
++
++10:
++                mov             x2,  x8
++                mov             w5,  w7
++1:
++                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
++                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
++
++                subs            w5,  w5,  #96
++
++                // v0, v1
++
++                shrn            v18.4h,  v0.4s,   #14
++                xtn             v16.4h,  v0.4s
++                shrn            v17.4h,  v0.4s,   #10
++
++                shrn2           v18.8h,  v1.4s,   #14
++                xtn2            v16.8h,  v1.4s
++                shrn2           v17.8h,  v1.4s,   #10
++
++                ushr            v18.8h,  v18.8h,  #6
++                bic             v16.8h,  #0xfc,   lsl #8
++                bic             v17.8h,  #0xfc,   lsl #8
++
++                // v2, v3
++
++                shrn            v21.4h,  v2.4s,   #14
++                xtn             v19.4h,  v2.4s
++                shrn            v20.4h,  v2.4s,   #10
++
++                shrn2           v21.8h,  v3.4s,   #14
++                xtn2            v19.8h,  v3.4s
++                shrn2           v20.8h,  v3.4s,   #10
++
++                ushr            v21.8h,  v21.8h,  #6
++                bic             v19.8h,  #0xfc,   lsl #8
++                bic             v20.8h,  #0xfc,   lsl #8
++
++                // v4, v5
++
++                shrn            v24.4h,  v4.4s,   #14
++                xtn             v22.4h,  v4.4s
++                shrn            v23.4h,  v4.4s,   #10
++
++                shrn2           v24.8h,  v5.4s,   #14
++                xtn2            v22.8h,  v5.4s
++                shrn2           v23.8h,  v5.4s,   #10
++
++                ushr            v24.8h,  v24.8h,  #6
++                bic             v22.8h,  #0xfc,   lsl #8
++                bic             v23.8h,  #0xfc,   lsl #8
++
++                // v6, v7
++
++                shrn            v27.4h,  v6.4s,   #14
++                xtn             v25.4h,  v6.4s
++                shrn            v26.4h,  v6.4s,   #10
++
++                shrn2           v27.8h,  v7.4s,   #14
++                xtn2            v25.8h,  v7.4s
++                shrn2           v26.8h,  v7.4s,   #10
++
++                ushr            v27.8h,  v27.8h,  #6
++                bic             v25.8h,  #0xfc,   lsl #8
++                bic             v26.8h,  #0xfc,   lsl #8
++
++                blt             2f
++
++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
++                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
++                st3             {v22.8h, v23.8h, v24.8h}, [x0], #48
++                st3             {v25.8h, v26.8h, v27.8h}, [x0], #48
++
++                bne             1b
++
++11:
++                subs            w6,  w6,  #1
++                add             x0,  x0,  w1,  uxtw
++                add             x8,  x8,  #128
++                bne             10b
++
++                ret
++
++// Partial final write
++2:
++                cmp             w5,  #48-96
++                blt             1f
++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
++                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
++                beq             11b
++                mov             v16.16b, v22.16b
++                mov             v17.16b, v23.16b
++                sub             w5,  w5,  #48
++                mov             v18.16b, v24.16b
++                mov             v19.16b, v25.16b
++                mov             v20.16b, v26.16b
++                mov             v21.16b, v27.16b
++1:
++                cmp             w5,  #24-96
++                blt             1f
++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
++                beq             11b
++                mov             v16.16b, v19.16b
++                mov             v17.16b, v20.16b
++                sub             w5,  w5,  #24
++                mov             v18.16b, v21.16b
++1:
++                cmp             w5,  #12-96
++                blt             1f
++                st3             {v16.4h, v17.4h, v18.4h}, [x0], #24
++                beq             11b
++                mov             v16.2d[0], v16.2d[1]
++                sub             w5,  w5,  #12
++                mov             v17.2d[0], v17.2d[1]
++                mov             v18.2d[0], v18.2d[1]
++1:
++                cmp             w5,  #6-96
++                blt             1f
++                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
++                st3             {v16.h, v17.h, v18.h}[1], [x0], #6
++                beq             11b
++                mov             v16.2s[0], v16.2s[1]
++                sub             w5,  w5,  #6
++                mov             v17.2s[0], v17.2s[1]
++                mov             v18.2s[0], v18.2s[1]
++1:
++                cmp             w5,  #3-96
++                blt             1f
++                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
++                beq             11b
++                mov             v16.4h[0], v16.4h[1]
++                sub             w5,  w5,  #3
++                mov             v17.4h[0], v17.4h[1]
++1:
++                cmp             w5,  #2-96
++                blt             1f
++                st2             {v16.h, v17.h}[0], [x0], #4
++                b               11b
++1:
++                st1             {v16.h}[0], [x0], #2
++                b               11b
++
++endfunc
++
++// void ff_rpi_sand30_lines_to_planar_y8(
++//   uint8_t * dest,            : x0
++//   unsigned int dst_stride,   : w1
++//   const uint8_t * src,       : x2
++//   unsigned int src_stride1,  : w3, always 128
++//   unsigned int src_stride2,  : w4
++//   unsigned int _x,           : w5
++//   unsigned int y,            : w6
++//   unsigned int _w,           : w7
++//   unsigned int h);           : [sp, #0]
++//
++// Assumes that we are starting on a stripe boundary and that overreading
++// within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y8, export=1
++                lsl             w4,  w4,  #7
++                sub             w4,  w4,  #64
++                sub             w1,  w1,  w7
++                uxtw            x6,  w6
++                add             x8,  x2,  x6, lsl #7
++                ldr             w6,  [sp, #0]
++
++10:
++                mov             x2,  x8
++                mov             w5,  w7
++1:
++                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
++                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
++
++                subs            w5,  w5,  #96
++
++                // v0, v1
++
++                shrn            v18.4h,  v0.4s,   #16
++                xtn             v16.4h,  v0.4s
++                shrn            v17.4h,  v0.4s,   #12
++
++                shrn2           v18.8h,  v1.4s,   #16
++                xtn2            v16.8h,  v1.4s
++                shrn2           v17.8h,  v1.4s,   #12
++
++                shrn            v18.8b,  v18.8h,  #6
++                shrn            v16.8b,  v16.8h,  #2
++                xtn             v17.8b,  v17.8h
++
++                // v2, v3
++
++                shrn            v21.4h,  v2.4s,   #16
++                xtn             v19.4h,  v2.4s
++                shrn            v20.4h,  v2.4s,   #12
++
++                shrn2           v21.8h,  v3.4s,   #16
++                xtn2            v19.8h,  v3.4s
++                shrn2           v20.8h,  v3.4s,   #12
++
++                shrn2           v18.16b, v21.8h,  #6
++                shrn2           v16.16b, v19.8h,  #2
++                xtn2            v17.16b, v20.8h
++
++                // v4, v5
++
++                shrn            v24.4h,  v4.4s,   #16
++                xtn             v22.4h,  v4.4s
++                shrn            v23.4h,  v4.4s,   #12
++
++                shrn2           v24.8h,  v5.4s,   #16
++                xtn2            v22.8h,  v5.4s
++                shrn2           v23.8h,  v5.4s,   #12
++
++                shrn            v21.8b,  v24.8h,  #6
++                shrn            v19.8b,  v22.8h,  #2
++                xtn             v20.8b,  v23.8h
++
++                // v6, v7
++
++                shrn            v27.4h,  v6.4s,   #16
++                xtn             v25.4h,  v6.4s
++                shrn            v26.4h,  v6.4s,   #12
++
++                shrn2           v27.8h,  v7.4s,   #16
++                xtn2            v25.8h,  v7.4s
++                shrn2           v26.8h,  v7.4s,   #12
++
++                shrn2           v21.16b, v27.8h,  #6
++                shrn2           v19.16b, v25.8h,  #2
++                xtn2            v20.16b, v26.8h
++
++                blt             2f
++
++                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
++                st3             {v19.16b, v20.16b, v21.16b}, [x0], #48
++
++                bne             1b
++
++11:
++                subs            w6,  w6,  #1
++                add             x0,  x0,  w1,  uxtw
++                add             x8,  x8,  #128
++                bne             10b
++
++                ret
++
++// Partial final write
++2:
++                cmp             w5,  #48-96
++                blt             1f
++                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
++                beq             11b
++                mov             v16.16b, v22.16b
++                mov             v17.16b, v23.16b
++                sub             w5,  w5,  #48
++                mov             v18.16b, v24.16b
++1:
++                cmp             w5,  #24-96
++                blt             1f
++                st3             {v16.8b, v17.8b, v18.8b}, [x0], #24
++                beq             11b
++                mov             v16.2d[0], v16.2d[1]
++                sub             w5,  w5,  #24
++                mov             v17.2d[0], v17.2d[1]
++                mov             v18.2d[0], v18.2d[1]
++1:
++                cmp             w5,  #12-96
++                blt             1f
++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
++                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
++                st3             {v16.b, v17.b, v18.b}[2], [x0], #3
++                st3             {v16.b, v17.b, v18.b}[3], [x0], #3
++                beq             11b
++                mov             v16.2s[0], v16.2s[1]
++                sub             w5,  w5,  #12
++                mov             v17.2s[0], v17.2s[1]
++                mov             v18.2s[0], v18.2s[1]
++1:
++                cmp             w5,  #6-96
++                blt             1f
++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
++                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
++                beq             11b
++                mov             v16.4h[0], v16.4h[1]
++                sub             w5,  w5,  #6
++                mov             v17.4h[0], v17.4h[1]
++                mov             v18.4h[0], v18.4h[1]
++1:
++                cmp             w5,  #3-96
++                blt             1f
++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
++                beq             11b
++                mov             v16.8b[0], v16.8b[1]
++                sub             w5,  w5,  #3
++                mov             v17.8b[0], v17.8b[1]
++1:
++                cmp             w5,  #2-96
++                blt             1f
++                st2             {v16.b, v17.b}[0], [x0], #2
++                b               11b
++1:
++                st1             {v16.b}[0], [x0], #1
++                b               11b
++
++endfunc
++
 diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h
 new file mode 100644
-index 0000000000..b3aa481ea4
+index 0000000000..2a56135bc3
 --- /dev/null
 +++ b/libavutil/aarch64/rpi_sand_neon.h
-@@ -0,0 +1,55 @@
+@@ -0,0 +1,59 @@
 +/*
 +Copyright (c) 2021 Michael Eiler
 +
@@ -62623,6 +65198,10 @@ index 0000000000..b3aa481ea4
 +  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
 +  unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
 +
++void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
 +#ifdef __cplusplus
 +}
 +#endif
@@ -62638,10 +65217,10 @@ index 5da44b0542..b74b7c4e2f 100644
 +             arm/rpi_sand_neon.o                                        \
 diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S
 new file mode 100644
-index 0000000000..80890fe985
+index 0000000000..60e697f681
 --- /dev/null
 +++ b/libavutil/arm/rpi_sand_neon.S
-@@ -0,0 +1,768 @@
+@@ -0,0 +1,925 @@
 +/*
 +Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
 +All rights reserved.
@@ -63004,7 +65583,6 @@ index 0000000000..80890fe985
 +                ldr             r6,  [sp, #36]
 +                ldr             r7,  [sp, #32]  @ y
 +                mov             r12, #48
-+                vmov.u16        q15, #0x3ff
 +                sub             r3,  #1
 +                lsl             r3,  #7
 +                sub             r1,  r1,  r6,  lsl #1
@@ -63020,37 +65598,33 @@ index 0000000000..80890fe985
 +                vldm            r2!, {q10-q13}
 +                add             lr,  #64
 +
-+                vshr.u32        q14, q10, #20    @ Cannot vshrn.u32 #20!
++                vshrn.u32       d4 , q10, #14    @ Cannot vshrn.u32 #20!
 +                ands            lr,  #127
 +                vshrn.u32       d2,  q10, #10
 +                vmovn.u32       d0,  q10
-+                vmovn.u32       d4,  q14
 +
-+                vshr.u32        q14, q11, #20
++                vshrn.u32       d5,  q11, #14
 +                it              eq
 +                addeq           r2,  r3
 +                vshrn.u32       d3,  q11, #10
 +                vmovn.u32       d1,  q11
-+                vmovn.u32       d5,  q14
 +
 +                subs            r5,  #48
-+                vand            q0,  q15
-+                vand            q1,  q15
-+                vand            q2,  q15
++                vshr.u16        q2,  #6
++                vbic.u16        q0,  #0xfc00
++                vbic.u16        q1,  #0xfc00
 +
-+                vshr.u32        q14, q12, #20
++                vshrn.u32       d20, q12, #14
 +                vshrn.u32       d18, q12, #10
 +                vmovn.u32       d16, q12
-+                vmovn.u32       d20, q14
 +
-+                vshr.u32        q14, q13, #20
++                vshrn.u32       d21, q13, #14
 +                vshrn.u32       d19, q13, #10
 +                vmovn.u32       d17, q13
-+                vmovn.u32       d21, q14
 +
-+                vand            q8,  q15
-+                vand            q9,  q15
-+                vand            q10, q15
++                vshr.u16        q10, #6
++                vbic.u16        q8,  #0xfc00
++                vbic.u16        q9 , #0xfc00
 +                blt             2f
 +
 +                vst3.16         {d0,  d2,  d4},  [r0], r12
@@ -63143,7 +65717,6 @@ index 0000000000..80890fe985
 +                ldr             r7,  [sp, #48]
 +                ldr             r9,  [sp, #52]
 +                mov             r12, #48
-+                vmov.u16        q15, #0x3ff
 +                sub             r8,  #1
 +                lsl             r8,  #7
 +                add             r5,  r5,  r7,  lsl #7
@@ -63159,48 +65732,44 @@ index 0000000000..80890fe985
 +                add             lr,  #64
 +
 +                @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
-+                vshr.u32        q14, q0,  #20
-+                vshrn.u32       d16, q0,  #10
++                vshrn.u32       d20, q0,  #14
 +                vmovn.u32       d18, q0
++                vshrn.u32       d0,  q0,  #10
 +                ands            lr,  #127
-+                vmovn.u32       d20, q14
 +
-+                vshr.u32        q14, q1,  #20
-+                vshrn.u32       d17, q1,  #10
++                vshrn.u32       d21, q1,  #14
 +                vmovn.u32       d19, q1
-+                vmovn.u32       d21, q14
++                vshrn.u32       d1,  q1,  #10
 +
-+                vshr.u32        q14, q2,  #20
 +                vshrn.u32       d22, q2,  #10
-+                vmovn.u32       d24, q2
-+                vmovn.u32       d26, q14
++                vmovn.u32       d2,  q2
++                vshrn.u32       d4,  q2,  #14
 +
-+                vshr.u32        q14, q3,  #20
-+                vshrn.u32       d23, q3,  #10
-+                vmovn.u32       d25, q3
 +                add             r10, r0,  #24
-+                vmovn.u32       d27, q14
++                vshrn.u32       d23, q3,  #10
++                vmovn.u32       d3,  q3
++                vshrn.u32       d5,  q3,  #14
 +
 +                it              eq
 +                addeq           r4,  r8
-+                vuzp.16         q8,  q11
-+                vuzp.16         q9,  q12
-+                vuzp.16         q10, q13
++                vuzp.16         q0,  q11
++                vuzp.16         q9,  q1
++                vuzp.16         q10, q2
 +
-+                @ q8   V0, V3,.. -> q0
++                @ q0   V0, V3,..
 +                @ q9   U0, U3...
 +                @ q10  U1, U4...
 +                @ q11  U2, U5,..
-+                @ q12  V1, V4,.. -> q1
-+                @ q13  V2, V5,.. -> q2
++                @ q1   V1, V4,
++                @ q2   V2, V5,..
 +
 +                subs            r6,  #24
-+                vand            q11, q15
-+                vand            q9,  q15
-+                vand            q10, q15
-+                vand            q0,  q8,  q15
-+                vand            q1,  q12, q15
-+                vand            q2,  q13, q15
++                vbic.u16        q11, #0xfc00
++                vbic.u16        q9,  #0xfc00
++                vshr.u16        q10, #6
++                vshr.u16        q2,  #6
++                vbic.u16        q0,  #0xfc00
++                vbic.u16        q1,  #0xfc00
 +
 +                blt             2f
 +
@@ -63409,13 +65978,180 @@ index 0000000000..80890fe985
 +endfunc
 +
 +
++@ void ff_rpi_sand30_lines_to_planar_y8(
++@   uint8_t * dest,             // [r0]
++@   unsigned int dst_stride,    // [r1]
++@   const uint8_t * src,        // [r2]
++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++@   unsigned int src_stride2,   // [sp, #0]  -> r3
++@   unsigned int _x,            // [sp, #4]  Ignored - 0
++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++@   unsigned int h);            // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y8, export=1
++                push            {r4-r8, lr}     @ +24
++                ldr             r3,  [sp, #24]
++                ldr             r6,  [sp, #36]
++                ldr             r7,  [sp, #32]  @ y
++                mov             r12, #48
++                lsl             r3,  #7
++                sub             r1,  r1,  r6
++                add             r8,  r2,  r7,  lsl #7
++                ldr             r7,  [sp, #40]
++
++10:
++                mov             r2,  r8
++                add             r4,  r0,  #24
++                mov             r5,  r6
++1:
++                vldm            r2,  {q8-q15}
++
++                subs            r5,  #96
++
++                vmovn.u32       d0,  q8
++                vshrn.u32       d2,  q8,  #12
++                vshrn.u32       d4,  q8,  #16    @ Cannot vshrn.u32 #20!
++
++                add             r2,  r3
++
++                vmovn.u32       d1,  q9
++                vshrn.u32       d3,  q9,  #12
++                vshrn.u32       d5,  q9,  #16
++
++                pld             [r2, #0]
++
++                vshrn.u16       d0,  q0,  #2
++                vmovn.u16       d1,  q1
++                vshrn.u16       d2,  q2,  #6
++
++                vmovn.u32       d16, q10
++                vshrn.u32       d18, q10, #12
++                vshrn.u32       d20, q10, #16
++
++                vmovn.u32       d17, q11
++                vshrn.u32       d19, q11, #12
++                vshrn.u32       d21, q11, #16
++
++                pld             [r2, #64]
++
++                vshrn.u16       d4,  q8,  #2
++                vmovn.u16       d5,  q9
++                vshrn.u16       d6,  q10, #6
++
++                vmovn.u32       d16, q12
++                vshrn.u32       d18, q12, #12
++                vshrn.u32       d20, q12, #16
++
++                vmovn.u32       d17, q13
++                vshrn.u32       d19, q13, #12
++                vshrn.u32       d21, q13, #16
++
++                vshrn.u16       d16, q8,  #2
++                vmovn.u16       d17, q9
++                vshrn.u16       d18, q10, #6
++
++                vmovn.u32       d20, q14
++                vshrn.u32       d22, q14, #12
++                vshrn.u32       d24, q14, #16
++
++                vmovn.u32       d21, q15
++                vshrn.u32       d23, q15, #12
++                vshrn.u32       d25, q15, #16
++
++                vshrn.u16       d20, q10, #2
++                vmovn.u16       d21, q11
++                vshrn.u16       d22, q12, #6
++
++                blt             2f
++
++                vst3.8          {d0,  d1,  d2},  [r0], r12
++                vst3.8          {d4,  d5,  d6},  [r4], r12
++                vst3.8          {d16, d17, d18}, [r0], r12
++                vst3.8          {d20, d21, d22}, [r4], r12
++
++                bne             1b
++
++11:
++                subs            r7,  #1
++                add             r0,  r1
++                add             r8,  #128
++                bne             10b
++
++                pop             {r4-r8, pc}
++
++@ Partial final write
++2:
++                cmp             r5,  #48-96
++                blt             1f
++                vst3.8          {d0,  d1,  d2},  [r0], r12
++                vst3.8          {d4,  d5,  d6},  [r4], r12
++                beq             11b
++                vmov            q0,  q8
++                vmov            q2,  q10
++                sub             r5,  #48
++                vmov            d2,  d18
++                vmov            d6,  d22
++1:
++                cmp             r5,  #24-96
++                blt             1f
++                vst3.8          {d0,  d1,  d2},  [r0]!
++                beq             11b
++                vmov            q0,  q2
++                sub             r5,  #24
++                vmov            d2,  d6
++1:
++                cmp             r5,  #12-96
++                blt             1f
++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
++                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
++                vst3.8          {d0[2], d1[2], d2[2]}, [r0]!
++                vst3.8          {d0[3], d1[3], d2[3]}, [r0]!
++                beq             11b
++                vmov            s0,  s1
++                sub             r5,  #12
++                vmov            s2,  s3
++                vmov            s4,  s5
++1:
++                cmp             r5,  #6-96
++                blt             1f
++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
++                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
++                add             r0,  #12
++                beq             11b
++                vshr.u32        d0,  #16
++                sub             r5,  #6
++                vshr.u32        d1,  #16
++                vshr.u32        d2,  #16
++1:
++                cmp             r5, #3-96
++                blt             1f
++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
++                beq             11b
++                sub             r5, #3
++                vshr.u32        d0, #8
++                vshr.u32        d1, #8
++1:
++                cmp             r5, #2-96
++                blt             1f
++                vst2.8          {d0[0], d1[0]}, [r0]!
++                b               11b
++1:
++                vst1.8          {d0[0]}, [r0]!
++                b               11b
++
++endfunc
++
 +
 diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h
 new file mode 100644
-index 0000000000..447f367bea
+index 0000000000..d457c10870
 --- /dev/null
 +++ b/libavutil/arm/rpi_sand_neon.h
-@@ -0,0 +1,99 @@
+@@ -0,0 +1,110 @@
 +/*
 +Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
 +All rights reserved.
@@ -63513,6 +66249,17 @@ index 0000000000..447f367bea
 +  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
 +  unsigned int h);            // [sp, #16] -> r7
 +
++void ff_rpi_sand30_lines_to_planar_y8(
++  uint8_t * dest,             // [r0]
++  unsigned int dst_stride,    // [r1]
++  const uint8_t * src,        // [r2]
++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++  unsigned int src_stride2,   // [sp, #0]  -> r3
++  unsigned int _x,            // [sp, #4]  Ignored - 0
++  unsigned int y,             // [sp, #8]  (r7 in prefix)
++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++  unsigned int h);            // [sp, #16] -> r7
++
 +#endif // AVUTIL_ARM_SAND_NEON_H
 +
 diff --git a/libavutil/frame.c b/libavutil/frame.c
@@ -63573,7 +66320,7 @@ index fc67db0f6c..b1a7eb4858 100644
   * @}
   */
 diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c
-index 32cbde82eb..c5d0b960af 100644
+index 32cbde82eb..b0868608c6 100644
 --- a/libavutil/hwcontext_drm.c
 +++ b/libavutil/hwcontext_drm.c
 @@ -19,8 +19,10 @@
@@ -63717,13 +66464,25 @@ index 32cbde82eb..c5d0b960af 100644
          if (map->address[i])
              munmap(map->address[i], map->length[i]);
      }
-@@ -178,7 +241,15 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx,
-     if (!pix_fmts)
+@@ -172,16 +235,29 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx,
+                                     enum AVHWFrameTransferDirection dir,
+                                     enum AVPixelFormat **formats)
+ {
+-    enum AVPixelFormat *pix_fmts;
++    enum AVPixelFormat *p;
+ 
+-    pix_fmts = av_malloc_array(2, sizeof(*pix_fmts));
+-    if (!pix_fmts)
++    p = *formats = av_malloc_array(3, sizeof(*p));
++    if (!p)
          return AVERROR(ENOMEM);
  
 -    pix_fmts[0] = ctx->sw_format;
+-    pix_fmts[1] = AV_PIX_FMT_NONE;
+-
+-    *formats = pix_fmts;
 +    // **** Offer native sand too ????
-+    pix_fmts[0] =
++    *p++ =
 +#if CONFIG_SAND
 +        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
 +            AV_PIX_FMT_YUV420P :
@@ -63731,10 +66490,18 @@ index 32cbde82eb..c5d0b960af 100644
 +            AV_PIX_FMT_YUV420P10LE :
 +#endif
 +            ctx->sw_format;
-     pix_fmts[1] = AV_PIX_FMT_NONE;
++
++#if CONFIG_SAND
++    if (ctx->sw_format == AV_PIX_FMT_RPI4_10 ||
++        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128)
++        *p++ = AV_PIX_FMT_NV12;
++#endif
++
++    *p = AV_PIX_FMT_NONE;
+     return 0;
+ }
  
-     *formats = pix_fmts;
-@@ -197,18 +268,80 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc,
+@@ -197,18 +273,63 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc,
      map = av_frame_alloc();
      if (!map)
          return AVERROR(ENOMEM);
@@ -63769,29 +66536,12 @@ index 32cbde82eb..c5d0b960af 100644
 +        const unsigned int w = FFMIN(dst->width, map->width);
 +        const unsigned int h = FFMIN(dst->height, map->height);
 +
-+        if (map->format == AV_PIX_FMT_RPI4_8 && dst->format == AV_PIX_FMT_YUV420P) {
-+            av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
-+                                     map->data[0],
-+                                     128, stride2,
-+                                     0, 0, w, h);
-+            av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
-+                                     dst->data[2], dst->linesize[2],
-+                                     map->data[1],
-+                                     128, stride2,
-+                                     0, 0, w / 2, h / 2);
-+        }
-+        else if (map->format == AV_PIX_FMT_RPI4_10 && dst->format == AV_PIX_FMT_YUV420P10LE) {
-+            av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
-+                                     map->data[0],
-+                                     128, stride2,
-+                                     0, 0, w, h);
-+            av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
-+                                     dst->data[2], dst->linesize[2],
-+                                     map->data[1],
-+                                     128, stride2,
-+                                     0, 0, w / 2, h / 2);
-+        }
-+        else
++        map->crop_top = 0;
++        map->crop_bottom = 0;
++        map->crop_left = 0;
++        map->crop_right = 0;
++
++        if (av_rpi_sand_to_planar_frame(dst, map) != 0)
 +        {
 +            av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
 +            err = AVERROR(EINVAL);
@@ -63819,7 +66569,7 @@ index 32cbde82eb..c5d0b960af 100644
  
      err = 0;
  fail:
-@@ -223,7 +356,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc,
+@@ -223,7 +344,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc,
      int err;
  
      if (src->width > hwfc->width || src->height > hwfc->height)
@@ -63831,10 +66581,10 @@ index 32cbde82eb..c5d0b960af 100644
      map = av_frame_alloc();
      if (!map)
 diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
-index 9d61c52567..701c0e356b 100644
+index 9d61c52567..b40b8dc6c9 100644
 --- a/libavutil/pixdesc.c
 +++ b/libavutil/pixdesc.c
-@@ -2371,6 +2371,38 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
+@@ -2371,6 +2371,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
          .name = "vulkan",
          .flags = AV_PIX_FMT_FLAG_HWACCEL,
      },
@@ -63862,12 +66612,24 @@ index 9d61c52567..701c0e356b 100644
 +        },
 +        .flags = 0,
 +    },
++    [AV_PIX_FMT_SAND64_16] = {
++        .name = "sand64_16",
++        .nb_components = 3,
++        .log2_chroma_w = 1,
++        .log2_chroma_h = 1,
++        .comp = {
++            { 0, 2, 0, 0, 16, 0, 15, 1 },        /* Y */
++            { 1, 4, 0, 0, 16, 3, 15, 1 },        /* U */
++            { 1, 4, 2, 0, 16, 3, 15, 3 },        /* V */
++        },
++        .flags = 0,
++    },
 +    [AV_PIX_FMT_RPI4_8] = {
-+        .name = "rpi",
++        .name = "rpi4_8",
 +        .flags = AV_PIX_FMT_FLAG_HWACCEL,
 +    },
 +    [AV_PIX_FMT_RPI4_10] = {
-+        .name = "rpi",
++        .name = "rpi4_10",
 +        .flags = AV_PIX_FMT_FLAG_HWACCEL,
 +    },
  };
@@ -64125,10 +66887,10 @@ index 0000000000..0d5d203dc3
 +
 diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
 new file mode 100644
-index 0000000000..1f543e9357
+index 0000000000..b6071e2928
 --- /dev/null
 +++ b/libavutil/rpi_sand_fns.c
-@@ -0,0 +1,356 @@
+@@ -0,0 +1,445 @@
 +/*
 +Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
 +All rights reserved.
@@ -64360,6 +67122,75 @@ index 0000000000..1f543e9357
 +    }
 +}
 +
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// single lose bottom 2 bits truncation
++// _x & _w in pixels, strides in bytes
++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
++    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
++    const unsigned int x1 = ((_x + _w) / 3) * 4;
++    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
++    const unsigned int mask = stride1 - 1;
++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++    if (_x == 0) {
++        ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if (x0 == x1) {
++        // *******************
++        // Partial single word xfer
++        return;
++    }
++
++    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
++    {
++        unsigned int x = x0;
++        const uint32_t * p = (const uint32_t *)p0;
++        uint8_t * d = dst;
++
++        if (xskip0 != 0) {
++            const uint32_t p3 = *p++;
++
++            if (xskip0 == 1)
++                *d++ = (p3 >> 12) & 0xff;
++            *d++ = (p3 >> 22) & 0xff;
++
++            if (((x += 4) & mask) == 0)
++                p += slice_inc;
++        }
++
++        while (x != x1) {
++            const uint32_t p3 = *p++;
++            *d++ = (p3 >> 2) & 0xff;
++            *d++ = (p3 >> 12) & 0xff;
++            *d++ = (p3 >> 22) & 0xff;
++
++            if (((x += 4) & mask) == 0)
++                p += slice_inc;
++        }
++
++        if (xrem1 != 0) {
++            const uint32_t p3 = *p;
++
++            *d++ = (p3 >> 2) & 0xff;
++            if (xrem1 == 2)
++                *d++ = (p3 >> 12) & 0xff;
++        }
++    }
++}
++
++
 +
 +// w/h in pixels
 +void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
@@ -64441,6 +67272,16 @@ index 0000000000..1f543e9357
 +                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
 +                                             x/2, y/2,  w/2, h/2);
 +                    break;
++                case AV_PIX_FMT_NV12:
++                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y, w, h);
++                    av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x/2, y/2, w, h/2);
++                    break;
 +                default:
 +                    return -1;
 +            }
@@ -64475,6 +67316,16 @@ index 0000000000..1f543e9357
 +                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
 +                                             x/2, y/2, w/2, h/2);
 +                    break;
++                case AV_PIX_FMT_NV12:
++                    av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y, w, h);
++                    av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x/2, y/2, w, h/2);
++                    break;
 +                default:
 +                    return -1;
 +            }
@@ -64487,10 +67338,10 @@ index 0000000000..1f543e9357
 +}
 diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h
 new file mode 100644
-index 0000000000..634b55e800
+index 0000000000..462ccb8abd
 --- /dev/null
 +++ b/libavutil/rpi_sand_fns.h
-@@ -0,0 +1,183 @@
+@@ -0,0 +1,188 @@
 +/*
 +Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
 +All rights reserved.
@@ -64578,6 +67429,11 @@ index 0000000000..634b55e800
 +                             unsigned int _x, unsigned int y,
 +                             unsigned int _w, unsigned int h);
 +
++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
 +
 +// w/h in pixels
 +void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
@@ -66308,3 +69164,644 @@ index 0000000000..5935a11ca5
 +
 +    do_logparse(args.logfile)
 +
+diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
+index 9e9569777b..7bafda7ee5 100644
+--- a/tests/checkasm/Makefile
++++ b/tests/checkasm/Makefile
+@@ -9,8 +9,10 @@ AVCODECOBJS-$(CONFIG_G722DSP)           += g722dsp.o
+ AVCODECOBJS-$(CONFIG_H264DSP)           += h264dsp.o
+ AVCODECOBJS-$(CONFIG_H264PRED)          += h264pred.o
+ AVCODECOBJS-$(CONFIG_H264QPEL)          += h264qpel.o
++AVCODECOBJS-$(CONFIG_IDCTDSP)           += idctdsp.o
+ AVCODECOBJS-$(CONFIG_LLVIDDSP)          += llviddsp.o
+ AVCODECOBJS-$(CONFIG_LLVIDENCDSP)       += llviddspenc.o
++AVCODECOBJS-$(CONFIG_VC1DSP)            += vc1dsp.o
+ AVCODECOBJS-$(CONFIG_VP8DSP)            += vp8dsp.o
+ AVCODECOBJS-$(CONFIG_VIDEODSP)          += videodsp.o
+ 
+diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
+index 899f68bb32..4d75291057 100644
+--- a/tests/checkasm/checkasm.c
++++ b/tests/checkasm/checkasm.c
+@@ -121,6 +121,9 @@ static const struct {
+     #if CONFIG_HUFFYUV_DECODER
+         { "huffyuvdsp", checkasm_check_huffyuvdsp },
+     #endif
++    #if CONFIG_IDCTDSP
++        { "idctdsp", checkasm_check_idctdsp },
++    #endif
+     #if CONFIG_JPEG2000_DECODER
+         { "jpeg2000dsp", checkasm_check_jpeg2000dsp },
+     #endif
+@@ -145,6 +148,9 @@ static const struct {
+     #if CONFIG_V210_ENCODER
+         { "v210enc", checkasm_check_v210enc },
+     #endif
++    #if CONFIG_VC1DSP
++        { "vc1dsp", checkasm_check_vc1dsp },
++    #endif
+     #if CONFIG_VP8DSP
+         { "vp8dsp", checkasm_check_vp8dsp },
+     #endif
+diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
+index 0190bc912c..5178c49aed 100644
+--- a/tests/checkasm/checkasm.h
++++ b/tests/checkasm/checkasm.h
+@@ -60,6 +60,7 @@ void checkasm_check_hevc_add_res(void);
+ void checkasm_check_hevc_idct(void);
+ void checkasm_check_hevc_sao(void);
+ void checkasm_check_huffyuvdsp(void);
++void checkasm_check_idctdsp(void);
+ void checkasm_check_jpeg2000dsp(void);
+ void checkasm_check_llviddsp(void);
+ void checkasm_check_llviddspenc(void);
+@@ -73,6 +74,7 @@ void checkasm_check_sw_scale(void);
+ void checkasm_check_utvideodsp(void);
+ void checkasm_check_v210dec(void);
+ void checkasm_check_v210enc(void);
++void checkasm_check_vc1dsp(void);
+ void checkasm_check_vf_eq(void);
+ void checkasm_check_vf_gblur(void);
+ void checkasm_check_vf_hflip(void);
+diff --git a/tests/checkasm/idctdsp.c b/tests/checkasm/idctdsp.c
+new file mode 100644
+index 0000000000..02724536a7
+--- /dev/null
++++ b/tests/checkasm/idctdsp.c
+@@ -0,0 +1,98 @@
++/*
++ * Copyright (c) 2022 Ben Avison
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License along
++ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
++ */
++
++#include <string.h>
++
++#include "checkasm.h"
++
++#include "libavcodec/idctdsp.h"
++
++#include "libavutil/common.h"
++#include "libavutil/internal.h"
++#include "libavutil/intreadwrite.h"
++#include "libavutil/mem_internal.h"
++
++#define IDCTDSP_TEST(func) { #func, offsetof(IDCTDSPContext, func) },
++
++typedef struct {
++    const char *name;
++    size_t offset;
++} test;
++
++#define RANDOMIZE_BUFFER16(name, size)          \
++    do {                                        \
++        int i;                                  \
++        for (i = 0; i < size; ++i) {            \
++            uint16_t r = rnd() % 0x201 - 0x100; \
++            AV_WN16A(name##0 + i, r);           \
++            AV_WN16A(name##1 + i, r);           \
++        }                                       \
++    } while (0)
++
++#define RANDOMIZE_BUFFER8(name, size)         \
++    do {                                      \
++        int i;                                \
++        for (i = 0; i < size; ++i) {          \
++            uint8_t r = rnd();                \
++            name##0[i] = r;                   \
++            name##1[i] = r;                   \
++        }                                     \
++    } while (0)
++
++static void check_add_put_clamped(void)
++{
++    /* Source buffers are only as big as needed, since any over-read won't affect results */
++    LOCAL_ALIGNED_16(int16_t, src0, [64]);
++    LOCAL_ALIGNED_16(int16_t, src1, [64]);
++    /* Destination buffers have borders of one row above/below and 8 columns left/right to catch overflows */
++    LOCAL_ALIGNED_8(uint8_t, dst0, [10 * 24]);
++    LOCAL_ALIGNED_8(uint8_t, dst1, [10 * 24]);
++
++    AVCodecContext avctx = { 0 };
++    IDCTDSPContext h;
++
++    const test tests[] = {
++        IDCTDSP_TEST(add_pixels_clamped)
++        IDCTDSP_TEST(put_pixels_clamped)
++        IDCTDSP_TEST(put_signed_pixels_clamped)
++    };
++
++    ff_idctdsp_init(&h, &avctx);
++
++    for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {
++        void (*func)(const int16_t *, uint8_t * ptrdiff_t) = *(void **)((intptr_t) &h + tests[t].offset);
++        if (check_func(func, "idctdsp.%s", tests[t].name)) {
++            declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *, uint8_t *, ptrdiff_t);
++            RANDOMIZE_BUFFER16(src, 64);
++            RANDOMIZE_BUFFER8(dst, 10 * 24);
++            call_ref(src0, dst0 + 24 + 8, 24);
++            call_new(src1, dst1 + 24 + 8, 24);
++            if (memcmp(dst0, dst1, 10 * 24))
++                fail();
++            bench_new(src1, dst1 + 24 + 8, 24);
++        }
++    }
++}
++
++void checkasm_check_idctdsp(void)
++{
++    check_add_put_clamped();
++    report("idctdsp");
++}
+diff --git a/tests/checkasm/vc1dsp.c b/tests/checkasm/vc1dsp.c
+new file mode 100644
+index 0000000000..52628d15e4
+--- /dev/null
++++ b/tests/checkasm/vc1dsp.c
+@@ -0,0 +1,452 @@
++/*
++ * Copyright (c) 2022 Ben Avison
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License along
++ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
++ */
++
++#include <string.h>
++
++#include "checkasm.h"
++
++#include "libavcodec/vc1dsp.h"
++
++#include "libavutil/common.h"
++#include "libavutil/internal.h"
++#include "libavutil/intreadwrite.h"
++#include "libavutil/mem_internal.h"
++
++#define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) },
++#define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height },
++
++typedef struct {
++    const char *name;
++    size_t offset;
++    int width;
++    int height;
++} test;
++
++typedef struct matrix {
++    size_t width;
++    size_t height;
++    float d[];
++} matrix;
++
++static const matrix T8 = { 8, 8, {
++        12,  12,  12,  12,  12,  12,  12,  12,
++        16,  15,   9,   4,  -4,  -9, -15, -16,
++        16,   6,  -6, -16, -16,  -6,   6,  16,
++        15,  -4, -16,  -9,   9,  16,   4, -15,
++        12, -12, -12,  12,  12, -12, -12,  12,
++         9, -16,   4,  15, -15,  -4,  16,  -9,
++         6, -16,  16,  -6,  -6,  16, -16,   6,
++         4,  -9,  15, -16,  16, -15,   9,  -4
++} };
++
++static const matrix T4 = { 4, 4, {
++        17,  17,  17,  17,
++        22,  10, -10, -22,
++        17, -17, -17,  17,
++        10, -22,  22, -10
++} };
++
++static const matrix T8t = { 8, 8, {
++        12,  16,  16,  15,  12,   9,   6,   4,
++        12,  15,   6,  -4, -12, -16, -16,  -9,
++        12,   9,  -6, -16, -12,   4,  16,  15,
++        12,   4, -16,  -9,  12,  15,  -6, -16,
++        12,  -4, -16,   9,  12, -15,  -6,  16,
++        12,  -9,  -6,  16, -12,  -4,  16, -15,
++        12, -15,   6,   4, -12,  16, -16,   9,
++        12, -16,  16, -15,  12,  -9,   6,  -4
++} };
++
++static const matrix T4t = { 4, 4, {
++        17,  22,  17,  10,
++        17,  10, -17, -22,
++        17, -10, -17,  22,
++        17, -22,  17, -10
++} };
++
++static matrix *new_matrix(size_t width, size_t height)
++{
++    matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float));
++    if (out == NULL) {
++        fprintf(stderr, "Memory allocation failure\n");
++        exit(EXIT_FAILURE);
++    }
++    out->width = width;
++    out->height = height;
++    return out;
++}
++
++static matrix *multiply(const matrix *a, const matrix *b)
++{
++    matrix *out;
++    if (a->width != b->height) {
++        fprintf(stderr, "Incompatible multiplication\n");
++        exit(EXIT_FAILURE);
++    }
++    out = new_matrix(b->width, a->height);
++    for (int j = 0; j < out->height; ++j)
++        for (int i = 0; i < out->width; ++i) {
++            float sum = 0;
++            for (int k = 0; k < a->width; ++k)
++                sum += a->d[j * a->width + k] * b->d[k * b->width + i];
++            out->d[j * out->width + i] = sum;
++        }
++    return out;
++}
++
++static void normalise(matrix *a)
++{
++    for (int j = 0; j < a->height; ++j)
++        for (int i = 0; i < a->width; ++i) {
++            float *p = a->d + j * a->width + i;
++            *p *= 64;
++            if (a->height == 4)
++                *p /= (const unsigned[]) { 289, 292, 289, 292 } [j];
++            else
++                *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j];
++            if (a->width == 4)
++                *p /= (const unsigned[]) { 289, 292, 289, 292 } [i];
++            else
++                *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i];
++        }
++}
++
++static void divide_and_round_nearest(matrix *a, float by)
++{
++    for (int j = 0; j < a->height; ++j)
++        for (int i = 0; i < a->width; ++i) {
++            float *p = a->d + j * a->width + i;
++            *p = rintf(*p / by);
++        }
++}
++
++static void tweak(matrix *a)
++{
++    for (int j = 4; j < a->height; ++j)
++        for (int i = 0; i < a->width; ++i) {
++            float *p = a->d + j * a->width + i;
++            *p += 1;
++        }
++}
++
++/* The VC-1 spec places restrictions on the values permitted at three
++ * different stages:
++ * - D: the input coefficients in frequency domain
++ * - E: the intermediate coefficients, inverse-transformed only horizontally
++ * - R: the fully inverse-transformed coefficients
++ *
++ * To fully cater for the ranges specified requires various intermediate
++ * values to be held to 17-bit precision; yet these conditions do not appear
++ * to be utilised in real-world streams. At least some assembly
++ * implementations have chosen to restrict these values to 16-bit precision,
++ * to accelerate the decoding of real-world streams at the cost of strict
++ * adherence to the spec. To avoid our test marking these as failures,
++ * reduce our random inputs.
++ */
++#define ATTENUATION 4
++
++static matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height)
++{
++    matrix *raw, *tmp, *D, *E, *R;
++    raw = new_matrix(width, height);
++    for (int i = 0; i < width * height; ++i)
++        raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION;
++    tmp = multiply(height == 8 ? &T8 : &T4, raw);
++    D = multiply(tmp, width == 8 ? &T8t : &T4t);
++    normalise(D);
++    divide_and_round_nearest(D, 1);
++    for (int i = 0; i < width * height; ++i) {
++        if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) {
++            /* Rare, so simply try again */
++            av_free(raw);
++            av_free(tmp);
++            av_free(D);
++            return generate_inverse_quantized_transform_coefficients(width, height);
++        }
++    }
++    E = multiply(D, width == 8 ? &T8 : &T4);
++    divide_and_round_nearest(E, 8);
++    for (int i = 0; i < width * height; ++i)
++        if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) {
++            /* Rare, so simply try again */
++            av_free(raw);
++            av_free(tmp);
++            av_free(D);
++            av_free(E);
++            return generate_inverse_quantized_transform_coefficients(width, height);
++        }
++    R = multiply(height == 8 ? &T8t : &T4t, E);
++    tweak(R);
++    divide_and_round_nearest(R, 128);
++    for (int i = 0; i < width * height; ++i)
++        if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) {
++            /* Rare, so simply try again */
++            av_free(raw);
++            av_free(tmp);
++            av_free(D);
++            av_free(E);
++            av_free(R);
++            return generate_inverse_quantized_transform_coefficients(width, height);
++        }
++    av_free(raw);
++    av_free(tmp);
++    av_free(E);
++    av_free(R);
++    return D;
++}
++
++#define RANDOMIZE_BUFFER16(name, size)        \
++    do {                                      \
++        int i;                                \
++        for (i = 0; i < size; ++i) {          \
++            uint16_t r = rnd();               \
++            AV_WN16A(name##0 + i, r);         \
++            AV_WN16A(name##1 + i, r);         \
++        }                                     \
++    } while (0)
++
++#define RANDOMIZE_BUFFER8(name, size)         \
++    do {                                      \
++        int i;                                \
++        for (i = 0; i < size; ++i) {          \
++            uint8_t r = rnd();                \
++            name##0[i] = r;                   \
++            name##1[i] = r;                   \
++        }                                     \
++    } while (0)
++
++#define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size)  \
++    do {                                            \
++        uint8_t *p##0 = name##0, *p##1 = name##1;   \
++        int i = (size);                             \
++        while (i-- > 0) {                           \
++            int x = 0x80 | (rnd() & 0x7F);          \
++            x >>= rnd() % 9;                        \
++            if (rnd() & 1)                          \
++                x = -x;                             \
++            *p##1++ = *p##0++ = 0x80 + x;           \
++        }                                           \
++    } while (0)
++
++static void check_inv_trans_inplace(void)
++{
++    /* Inverse transform input coefficients are stored in a 16-bit buffer
++     * with row stride of 8 coefficients irrespective of transform size.
++     * vc1_inv_trans_8x8 differs from the others in two ways: coefficients
++     * are stored in column-major order, and the outputs are written back
++     * to the input buffer, so we oversize it slightly to catch overruns. */
++    LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]);
++    LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]);
++
++    VC1DSPContext h;
++
++    ff_vc1dsp_init(&h);
++
++    if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) {
++        matrix *coeffs;
++        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *);
++        RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8);
++        coeffs = generate_inverse_quantized_transform_coefficients(8, 8);
++        for (int j = 0; j < 8; ++j)
++            for (int i = 0; i < 8; ++i) {
++                int idx = 8 + i * 8 + j;
++                inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i];
++            }
++        call_ref(inv_trans_in0 + 8);
++        call_new(inv_trans_in1 + 8);
++        if (memcmp(inv_trans_in0,  inv_trans_in1,  10 * 8 * sizeof (int16_t)))
++            fail();
++        bench_new(inv_trans_in1 + 8);
++        av_free(coeffs);
++    }
++}
++
++static void check_inv_trans_adding(void)
++{
++    /* Inverse transform input coefficients are stored in a 16-bit buffer
++     * with row stride of 8 coefficients irrespective of transform size. */
++    LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]);
++    LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]);
++
++    /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and
++     * added with saturation to an array of unsigned 8-bit values. Oversize
++     * this by 8 samples left and right and one row above and below. */
++    LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]);
++    LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]);
++
++    VC1DSPContext h;
++
++    const test tests[] = {
++        VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4)
++        VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8)
++        VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4)
++        VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8)
++        VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4)
++        VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8)
++        VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4)
++    };
++
++    ff_vc1dsp_init(&h);
++
++    for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {
++        void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset);
++        if (check_func(func, "vc1dsp.%s", tests[t].name)) {
++            matrix *coeffs;
++            declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *);
++            RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8);
++            RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24);
++            coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height);
++            for (int j = 0; j < tests[t].height; ++j)
++                for (int i = 0; i < tests[t].width; ++i) {
++                    int idx = j * 8 + i;
++                    inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i];
++                }
++            call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0);
++            call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1);
++            if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24))
++                fail();
++            bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8);
++            av_free(coeffs);
++        }
++    }
++}
++
++static void check_loop_filter(void)
++{
++    /* Deblocking filter buffers are big enough to hold a 16x16 block,
++     * plus 16 columns left and 4 rows above to hold filter inputs
++     * (depending on whether v or h neighbouring block edge, oversized
++     * horizontally to maintain 16-byte alignment) plus 16 columns and
++     * 4 rows below to catch write overflows */
++    LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]);
++    LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]);
++
++    VC1DSPContext h;
++
++    const test tests[] = {
++        VC1DSP_TEST(vc1_v_loop_filter4)
++        VC1DSP_TEST(vc1_h_loop_filter4)
++        VC1DSP_TEST(vc1_v_loop_filter8)
++        VC1DSP_TEST(vc1_h_loop_filter8)
++        VC1DSP_TEST(vc1_v_loop_filter16)
++        VC1DSP_TEST(vc1_h_loop_filter16)
++    };
++
++    ff_vc1dsp_init(&h);
++
++    for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) {
++        void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset);
++        declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int);
++        if (check_func(func, "vc1dsp.%s", tests[t].name)) {
++            for (int count = 1000; count > 0; --count) {
++                int pq = rnd() % 31 + 1;
++                RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48);
++                call_ref(filter_buf0 + 4 * 48 + 16, 48, pq);
++                call_new(filter_buf1 + 4 * 48 + 16, 48, pq);
++                if (memcmp(filter_buf0, filter_buf1, 24 * 48))
++                    fail();
++            }
++        }
++        for (int j = 0; j < 24; ++j)
++            for (int i = 0; i < 48; ++i)
++                filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4);
++        if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name))
++            bench_new(filter_buf1 + 4 * 48 + 16, 48, 1);
++        if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name))
++            bench_new(filter_buf1 + 4 * 48 + 16, 48, 31);
++    }
++}
++
++#define TEST_UNESCAPE                                                                               \
++    do {                                                                                            \
++        for (int count = 100; count > 0; --count) {                                                 \
++            escaped_offset = rnd() & 7;                                                             \
++            unescaped_offset = rnd() & 7;                                                           \
++            escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7);                                    \
++            RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE);                                        \
++            len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \
++            len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \
++            if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE))                  \
++                fail();                                                                             \
++        }                                                                                           \
++    } while (0)
++
++static void check_unescape(void)
++{
++    /* This appears to be a typical length of buffer in use */
++#define LOG2_UNESCAPE_BUF_SIZE 17
++#define UNESCAPE_BUF_SIZE (1u<<LOG2_UNESCAPE_BUF_SIZE)
++    LOCAL_ALIGNED_8(uint8_t, escaped0, [UNESCAPE_BUF_SIZE]);
++    LOCAL_ALIGNED_8(uint8_t, escaped1, [UNESCAPE_BUF_SIZE]);
++    LOCAL_ALIGNED_8(uint8_t, unescaped0, [UNESCAPE_BUF_SIZE]);
++    LOCAL_ALIGNED_8(uint8_t, unescaped1, [UNESCAPE_BUF_SIZE]);
++
++    VC1DSPContext h;
++
++    ff_vc1dsp_init(&h);
++
++    if (check_func(h.vc1_unescape_buffer, "vc1dsp.vc1_unescape_buffer")) {
++        int len0, len1, escaped_offset, unescaped_offset, escaped_len;
++        declare_func_emms(AV_CPU_FLAG_MMX, int, const uint8_t *, int, uint8_t *);
++
++        /* Test data which consists of escapes sequences packed as tightly as possible */
++        for (int x = 0; x < UNESCAPE_BUF_SIZE; ++x)
++            escaped1[x] = escaped0[x] = 3 * (x % 3 == 0);
++        TEST_UNESCAPE;
++
++        /* Test random data */
++        RANDOMIZE_BUFFER8(escaped, UNESCAPE_BUF_SIZE);
++        TEST_UNESCAPE;
++
++        /* Test data with escape sequences at random intervals */
++        for (int x = 0; x <= UNESCAPE_BUF_SIZE - 4;) {
++            int gap, gap_msb;
++            escaped1[x+0] = escaped0[x+0] = 0;
++            escaped1[x+1] = escaped0[x+1] = 0;
++            escaped1[x+2] = escaped0[x+2] = 3;
++            escaped1[x+3] = escaped0[x+3] = rnd() & 3;
++            gap_msb = 2u << (rnd() % 8);
++            gap = (rnd() &~ -gap_msb) | gap_msb;
++            x += gap;
++        }
++        TEST_UNESCAPE;
++
++        /* Test data which is known to contain no escape sequences */
++        memset(escaped0, 0xFF, UNESCAPE_BUF_SIZE);
++        memset(escaped1, 0xFF, UNESCAPE_BUF_SIZE);
++        TEST_UNESCAPE;
++
++        /* Benchmark the no-escape-sequences case */
++        bench_new(escaped1, UNESCAPE_BUF_SIZE, unescaped1);
++    }
++}
++
++void checkasm_check_vc1dsp(void)
++{
++    check_inv_trans_inplace();
++    check_inv_trans_adding();
++    report("inv_trans");
++
++    check_loop_filter();
++    report("loop_filter");
++
++    check_unescape();
++    report("unescape_buffer");
++}
+diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
+index 07f1d8238e..aa5f45ec8f 100644
+--- a/tests/fate/checkasm.mak
++++ b/tests/fate/checkasm.mak
+@@ -16,6 +16,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
+                 fate-checkasm-hevc_add_res                              \
+                 fate-checkasm-hevc_idct                                 \
+                 fate-checkasm-hevc_sao                                  \
++                fate-checkasm-idctdsp                                   \
+                 fate-checkasm-jpeg2000dsp                               \
+                 fate-checkasm-llviddsp                                  \
+                 fate-checkasm-llviddspenc                               \
+@@ -27,6 +28,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
+                 fate-checkasm-sw_scale                                  \
+                 fate-checkasm-v210dec                                   \
+                 fate-checkasm-v210enc                                   \
++                fate-checkasm-vc1dsp                                    \
+                 fate-checkasm-vf_blend                                  \
+                 fate-checkasm-vf_colorspace                             \
+                 fate-checkasm-vf_eq                                     \