From ac7906e00b77f6734c196224242ea207365b583e Mon Sep 17 00:00:00 2001
From: Matthias Reichl <hias@horus.com>
Date: Fri, 13 Dec 2024 21:50:24 +0100
Subject: [PATCH] ffmpeg: update rpi patch

Patch created using revisions b08d796..5f39f6c
from branch test/7.1/main of https://github.com/jc-kynesim/rpi-ffmpeg
---
 .../ffmpeg/patches/rpi/ffmpeg-001-rpi.patch   | 58391 ++++++----------
 1 file changed, 20798 insertions(+), 37593 deletions(-)

diff --git a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch
index f0e0ffe03b..d9dc15a210 100644
--- a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch
+++ b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch
@@ -1,5266 +1,33 @@
-From bedd295922f7df955c45801720503eb632711525 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 26 Apr 2021 12:34:50 +0100
-Subject: [PATCH 001/186] Add pi configs and scripts
-
----
- pi-util/BUILD.txt                  |  59 ++++++++
- pi-util/NOTES.txt                  |  69 +++++++++
- pi-util/TESTMESA.txt               |  82 +++++++++++
- pi-util/clean_usr_libs.sh          |  26 ++++
- pi-util/conf_arm64_native.sh       |  45 ++++++
- pi-util/conf_h265.2016.csv         | 195 ++++++++++++++++++++++++++
- pi-util/conf_h265.2016_HEVC_v1.csv | 147 ++++++++++++++++++++
- pi-util/conf_h265.csv              | 144 +++++++++++++++++++
- pi-util/conf_native.sh             | 108 +++++++++++++++
- pi-util/ffconf.py                  | 215 +++++++++++++++++++++++++++++
- pi-util/ffperf.py                  | 128 +++++++++++++++++
- pi-util/genpatch.sh                |  35 +++++
- pi-util/make_array.py              |  23 +++
- pi-util/mkinst.sh                  |   5 +
- pi-util/patkodi.sh                 |   9 ++
- pi-util/perfcmp.py                 | 101 ++++++++++++++
- pi-util/qem.sh                     |   9 ++
- pi-util/v3dusage.py                | 128 +++++++++++++++++
- 18 files changed, 1528 insertions(+)
- create mode 100644 pi-util/BUILD.txt
- create mode 100644 pi-util/NOTES.txt
- create mode 100644 pi-util/TESTMESA.txt
- create mode 100755 pi-util/clean_usr_libs.sh
- create mode 100644 pi-util/conf_arm64_native.sh
- create mode 100644 pi-util/conf_h265.2016.csv
- create mode 100644 pi-util/conf_h265.2016_HEVC_v1.csv
- create mode 100644 pi-util/conf_h265.csv
- create mode 100755 pi-util/conf_native.sh
- create mode 100755 pi-util/ffconf.py
- create mode 100755 pi-util/ffperf.py
- create mode 100755 pi-util/genpatch.sh
- create mode 100755 pi-util/make_array.py
- create mode 100755 pi-util/mkinst.sh
- create mode 100644 pi-util/patkodi.sh
- create mode 100755 pi-util/perfcmp.py
- create mode 100755 pi-util/qem.sh
- create mode 100755 pi-util/v3dusage.py
-
-diff --git a/pi-util/BUILD.txt b/pi-util/BUILD.txt
-new file mode 100644
-index 000000000000..b050971f63c5
---- /dev/null
-+++ b/pi-util/BUILD.txt
-@@ -0,0 +1,59 @@
-+Building Pi FFmpeg
-+==================
-+
-+Current only building on a Pi is supported.
-+This builds ffmpeg the way I've tested it
-+
-+Get all dependencies - the current package dependencies are good enough
-+
-+$ sudo apt-get build-dep ffmpeg
-+
-+Configure using the pi-util/conf_native.sh script
-+-------------------------------------------------
-+
-+This sets the normal release options and creates an ouutput dir to build into
-+The directory name will depend on system and options but will be under out/
-+
-+There are a few choices here
-+ --mmal  build including the legacy mmal-based decoders and zero-copy code
-+         this requires appropriate libraries which currently will exist for
-+         armv7 but not arm64
-+ --noshared
-+         Build a static image rather than a shared library one.  Static is
-+         easier for testing as there is no need to worry about library
-+         paths being confused and therefore running the wrong code,  Shared
-+         is what is needed, in most cases, when building for use by other
-+         programs.
-+
-+So for a static build
-+---------------------
-+
-+$ pi-util/conf_native.sh --noshared
-+
-+$ make -j8 -C out/<wherever the script said it was building to>
-+
-+You can now run ffmpeg directly from where it was built
-+
-+For a shared build
-+------------------
-+
-+$ pi-util/conf_native.sh
-+
-+You will normally want an install target if shared. Note that the script has
-+set this up to be generated in out/<builddir>/install, you don't have to worry
-+about overwriting your system libs.
-+
-+$ make -j8 -C out/<builddir> install
-+
-+You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was
-+built or install the image on the system - you have to be careful to get rid
-+of all other ffmpeg libs or confusion may result.  There is a little script
-+that wipes all other versions - obviously use with care!
-+
-+$ sudo pi-util/clean_usr_libs.sh
-+
-+Then simply copying from the install to /usr works
-+
-+$ sudo cp -r out/<builddir>/install/* /usr
-+
-+
-diff --git a/pi-util/NOTES.txt b/pi-util/NOTES.txt
-new file mode 100644
-index 000000000000..fcce72226a32
---- /dev/null
-+++ b/pi-util/NOTES.txt
-@@ -0,0 +1,69 @@
-+Notes on the hevc_rpi decoder & associated support code
-+-------------------------------------------------------
-+
-+There are 3 main parts to the existing code:
-+
-+1) The decoder - this is all in libavcodec as rpi_hevc*.
-+
-+2) A few filters to deal with Sand frames and a small patch to
-+automatically select the sand->i420 converter when required.
-+
-+3) A kludge in ffmpeg.c to display the decoded video. This could & should
-+be converted into a proper ffmpeg display module.
-+
-+
-+Decoder
-+-------
-+
-+The decoder is a modified version of the existing ffmpeg hevc decoder.
-+Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder.
-+More complex bitstreams can be up to ~200% faster but particularly easy
-+streams can cut its advantage down to ~50%.  This means that a Pi3+ can
-+display nearly all 8-bit 1080p30 streams and with some overclocking it can
-+display most lower bitrate 10-bit 1080p30 streams - this latter case is
-+not helped by the requirement to downsample to 8-bit before display on a
-+Pi.
-+
-+It has had co-processor offload added for inter-pred and large block
-+residual transform.  Various parts have had optimized ARM NEON assembler
-+added and the existing ARM asm sections have been profiled and
-+re-optimized for A53. The main C code has been substantially reworked at
-+its lower levels in an attempt to optimize it and minimize memory
-+bandwidth. To some extent code paths that deal with frame types that it
-+doesn't support have been pruned.
-+
-+It outputs frames in Broadcom Sand format. This is a somewhat annoying
-+layout that doesn't fit into ffmpegs standard frame descriptions. It has
-+vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for
-+the stripe followed by interleaved U & V, that is then followed by the Y
-+for the next stripe, etc. The final stripe is always padded to
-+stripe-width. This is used in an attempt to help with cache locality and
-+cut down on the number of dram bank switches. It is annoying to use for
-+inter-pred with conventional processing but the way the Pi QPU (which is
-+used for inter-pred) works means that it has negligible downsides here and
-+the improved memory performance exceeds the overhead of the increased
-+complexity in the rest of the code.
-+
-+Frames must be allocated out of GPU memory (as otherwise they can't be
-+accessed by the co-processors). Utility functions (in rpi_zc.c) have been
-+written to make this easier. As the frames are already in GPU memory they
-+can be displayed by the Pi h/w without any further copying.
-+
-+
-+Known non-features
-+------------------
-+
-+Frame allocation should probably be done in some other way in order to fit
-+into the standard framework better.
-+
-+Sand frames are currently declared as software frames, there is an
-+argument that they should be hardware frames but they aren't really.
-+
-+There must be a better way of auto-selecting the hevc_rpi decoder over the
-+normal s/w hevc decoder, but I became confused by the existing h/w
-+acceleration framework and what I wanted to do didn't seem to fit in
-+neatly.
-+
-+Display should be a proper device rather than a kludge in ffmpeg.c
-+
-+
-diff --git a/pi-util/TESTMESA.txt b/pi-util/TESTMESA.txt
-new file mode 100644
-index 000000000000..92bc13a3dfa1
---- /dev/null
-+++ b/pi-util/TESTMESA.txt
-@@ -0,0 +1,82 @@
-+# Setup & Build instructions for testing Argon30 mesa support (on Pi4)
-+
-+# These assume that the drm_mmal test for Sand8 has been built on this Pi
-+# as build relies on many of the same files
-+
-+# 1st get everything required to build ffmpeg
-+# If sources aren't already enabled on your Pi then enable them
-+sudo su
-+sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list
-+sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list
-+mv /tmp/sources.list /etc/apt/
-+mv /tmp/raspi.list /etc/apt/sources.list.d/
-+apt update
-+
-+# Get dependancies
-+sudo apt build-dep ffmpeg
-+
-+sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev
-+
-+# Enable H265 V4L2 request decoder
-+sudo su
-+echo dtoverlay=rpivid-v4l2 >> /boot/config.txt
-+# You may also want to add more CMA if you are going to try 4k videos
-+# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read
-+# dtoverlay=vc4-fkms-v3d,cma-512
-+reboot
-+# Check it has turned up
-+ls -la /dev/video*
-+# This should include video19
-+# crw-rw----+ 1 root video 81, 7 Aug  4 17:25 /dev/video19
-+
-+# Currently on the Pi the linux headers from the debian distro don't match
-+# the kernel that we ship and we need to update them - hopefully this step
-+# will be unneeded in the future
-+sudo apt install git bc bison flex libssl-dev make
-+git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y
-+cd linux
-+KERNEL=kernel7l
-+make bcm2711_defconfig
-+make headers_install
-+sudo cp -r usr/include/linux /usr/include
-+cd ..
-+
-+# Config - this builds a staticly linked ffmpeg which is easier for testing
-+pi-util/conf_native.sh --noshared
-+
-+# Build (this is a bit dull)
-+# If you want to poke the source the libavdevice/egl_vout.c contains the
-+# output code -
-+cd out/armv7-static-rel
-+
-+# Check that you have actually configured V4L2 request
-+grep HEVC_V4L2REQUEST config.h
-+# You are hoping for
-+# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1
-+# if you get 0 then the config has failed
-+
-+make -j6
-+
-+# Grab test streams
-+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv
-+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv
-+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv
-+
-+# Test i420 output (works currently)
-+./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl -
-+
-+# Test Sand8 output - doesn't currently work but should once you have
-+# Sand8 working in drm_mmal. I can't guarantee that this will work as
-+# I can't test this path with a known working format, but the debug looks
-+# good.  If this doesn't work & drm_mmal does with sand8 then come back to me
-+# The "show_all 1" forces vout to display every frame otherwise it drops any
-+# frame that would cause it to block
-+./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl -
-+
-+# Test Sand30 - doesn't currently work
-+# (Beware that when FFmpeg errors out it often leaves your teminal window
-+# in a state where you need to reset it)
-+./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl -
-+
-+
-+
-diff --git a/pi-util/clean_usr_libs.sh b/pi-util/clean_usr_libs.sh
-new file mode 100755
-index 000000000000..b3b2d5509de0
---- /dev/null
-+++ b/pi-util/clean_usr_libs.sh
-@@ -0,0 +1,26 @@
-+set -e
-+U=/usr/lib/arm-linux-gnueabihf
-+rm -f $U/libavcodec.*
-+rm -f $U/libavdevice.*
-+rm -f $U/libavfilter.*
-+rm -f $U/libavformat.*
-+rm -f $U/libavutil.*
-+rm -f $U/libswresample.*
-+rm -f $U/libswscale.*
-+U=/usr/lib/arm-linux-gnueabihf/neon/vfp
-+rm -f $U/libavcodec.*
-+rm -f $U/libavdevice.*
-+rm -f $U/libavfilter.*
-+rm -f $U/libavformat.*
-+rm -f $U/libavutil.*
-+rm -f $U/libswresample.*
-+rm -f $U/libswscale.*
-+U=/usr/lib/aarch64-linux-gnu
-+rm -f $U/libavcodec.*
-+rm -f $U/libavdevice.*
-+rm -f $U/libavfilter.*
-+rm -f $U/libavformat.*
-+rm -f $U/libavutil.*
-+rm -f $U/libswresample.*
-+rm -f $U/libswscale.*
-+
-diff --git a/pi-util/conf_arm64_native.sh b/pi-util/conf_arm64_native.sh
-new file mode 100644
-index 000000000000..9e3bbfa1908a
---- /dev/null
-+++ b/pi-util/conf_arm64_native.sh
-@@ -0,0 +1,45 @@
-+echo "Configure for ARM64 native build"
-+
-+#RPI_KEEPS="-save-temps=obj"
-+
-+SHARED_LIBS="--enable-shared"
-+if [ "$1" == "--noshared" ]; then
-+  SHARED_LIBS="--disable-shared"
-+  echo Static libs
-+  OUT=out/arm64-static-rel
-+else
-+  echo Shared libs
-+  OUT=out/arm64-shared-rel
-+fi
-+
-+mkdir -p $OUT
-+cd $OUT
-+
-+A=aarch64-linux-gnu
-+USR_PREFIX=`pwd`/install
-+LIB_PREFIX=$USR_PREFIX/lib/$A
-+INC_PREFIX=$USR_PREFIX/include/$A
-+
-+../../configure \
-+ --prefix=$USR_PREFIX\
-+ --libdir=$LIB_PREFIX\
-+ --incdir=$INC_PREFIX\
-+ --disable-stripping\
-+ --disable-thumb\
-+ --disable-mmal\
-+ --enable-sand\
-+ --enable-v4l2-request\
-+ --enable-libdrm\
-+ --enable-epoxy\
-+ --enable-libudev\
-+ --enable-vout-drm\
-+ --enable-vout-egl\
-+ $SHARED_LIBS\
-+ --extra-cflags="-ggdb"
-+
-+# --enable-decoder=hevc_rpi\
-+# --enable-extra-warnings\
-+# --arch=armv71\
-+
-+# gcc option for getting asm listing
-+# -Wa,-ahls
-diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv
-new file mode 100644
-index 000000000000..4efd5d1c676d
---- /dev/null
-+++ b/pi-util/conf_h265.2016.csv
-@@ -0,0 +1,195 @@
-+1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8
-+1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8
-+1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8
-+1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8
-+1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8
-+1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8
-+1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8
-+1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8
-+1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8
-+1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8
-+1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8
-+1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8
-+1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8
-+1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8
-+1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8
-+1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8
-+1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8
-+1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8
-+1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8
-+1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8
-+1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8
-+1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10
-+1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8
-+1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8
-+1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8
-+1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8
-+1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8
-+1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8
-+1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8
-+1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8
-+1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8
-+1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8
-+1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8
-+1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8
-+1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8
-+1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8
-+1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8
-+1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8
-+1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8
-+1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8
-+1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8
-+1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8
-+1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10
-+1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8
-+1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8
-+1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8
-+1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8
-+1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8
-+1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8
-+1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8
-+1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8
-+1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8
-+1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8
-+1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8
-+1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8
-+1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8
-+1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8
-+1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8
-+1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8
-+1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8
-+1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8
-+1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8
-+1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8
-+1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8
-+1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8
-+1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8
-+1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8
-+1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8
-+1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8
-+1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8
-+1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8
-+1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8
-+1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8
-+1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8
-+1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8
-+1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8
-+1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8
-+1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8
-+1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8
-+1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8
-+1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8
-+1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8
-+1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8
-+1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8
-+1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8
-+1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8
-+1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8
-+1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8
-+1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8
-+1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8
-+1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8
-+1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8
-+1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8
-+1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8
-+1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8
-+1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8
-+1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8
-+1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8
-+1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8
-+1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8
-+1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8
-+1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8
-+1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8
-+1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8
-+1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8
-+1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8
-+1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8
-+1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8
-+1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8
-+1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8
-+1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8
-+1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8
-+1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8
-+1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8
-+1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8
-+1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8
-+1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8
-+1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8
-+1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8
-+1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8
-+1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8
-+1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8
-+1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8
-+1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8
-+1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8
-+1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8
-+1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8
-+3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10
-+1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8
-+1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8
-+3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
-+1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10
-+1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8
-+1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8
-+1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10
-+1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10
-+1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8
-+1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10
-+1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8
-+1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10
-+1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8
-+1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10
-+1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8
-+1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10
-+1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8
-+1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10
-+1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8
-+1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0
-+0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8
-+0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8
-+0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10
-+0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8
-+0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8
-+1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0
-+0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8
-+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
-+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
-+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
-+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
-+0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
-+0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
-+0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
-+0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
-+1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10
-+1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0
-+1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0
-+1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0
-+1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0
-+1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0
-+1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0
-+0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0
-+0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8
-+0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8
-+1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0
-+1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8
-+1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0
-+1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0
-+1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0
-+1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0
-+1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0
-+1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0
-+1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0
-+0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8
-+0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10
-+0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10
-+0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8
-+0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8
-+0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8
-+0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8
-+0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8
-+1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8
-+1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8
-+1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8
-+1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8
-+1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8
-diff --git a/pi-util/conf_h265.2016_HEVC_v1.csv b/pi-util/conf_h265.2016_HEVC_v1.csv
-new file mode 100644
-index 000000000000..60826412715c
---- /dev/null
-+++ b/pi-util/conf_h265.2016_HEVC_v1.csv
-@@ -0,0 +1,147 @@
-+1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5
-+1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5
-+1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
-+1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
-+1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
-+1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
-+1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
-+1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5
-+1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
-+1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
-+1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
-+1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
-+1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
-+1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
-+1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
-+1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
-+1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
-+1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
-+1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
-+1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
-+1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
-+1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5
-+1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
-+1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
-+1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
-+1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
-+1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
-+1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
-+1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
-+1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
-+1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
-+1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
-+1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
-+1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
-+1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
-+1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
-+1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
-+1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
-+1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
-+1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
-+1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
-+1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
-+1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
-+1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
-+1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
-+1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
-+1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
-+1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
-+1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
-+1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
-+1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
-+1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
-+1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
-+1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
-+1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5
-+1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5
-+1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5
-+1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
-+1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
-+1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
-+1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
-+1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
-+1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
-+1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
-+1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
-+1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
-+1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
-+1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
-+1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
-+1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
-+1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
-+1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
-+1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
-+1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
-+1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
-+1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
-+1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
-+1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
-+1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
-+1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
-+1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
-+1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
-+1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
-+1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
-+1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
-+1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
-+1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
-+1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
-+1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
-+1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
-+1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
-+1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
-+1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
-+1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
-+1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
-+1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
-+1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
-+1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
-+1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
-+1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
-+1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
-+1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
-+1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
-+1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
-+1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
-+1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
-+1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
-+1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
-+1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
-+1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5
-+2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt
-+2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
-+1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
-+1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
-+1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5
-+1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5
-+1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5
-+1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
-+1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
-+1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5
-+1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5
-+1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
-+1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
-+1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
-+1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
-+1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
-+1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
-+3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth
-+1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
-+1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
-+3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???
-+1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
-+1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
-+1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
-+1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
-+1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
-+1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
-+1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
-+1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
-+1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
-+1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
-+1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
-+1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
-+1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
-+1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
-+1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
-+1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
-diff --git a/pi-util/conf_h265.csv b/pi-util/conf_h265.csv
-new file mode 100644
-index 000000000000..fc14f2a3c2bb
---- /dev/null
-+++ b/pi-util/conf_h265.csv
-@@ -0,0 +1,144 @@
-+1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5
-+1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5
-+1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5
-+1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
-+1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
-+1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
-+1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
-+1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
-+1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5
-+1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
-+1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
-+1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
-+1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
-+1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
-+1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
-+1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
-+1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
-+1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
-+1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
-+1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
-+1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
-+1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
-+1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5
-+1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
-+1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
-+1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
-+1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
-+1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
-+1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
-+1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
-+1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
-+1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
-+1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
-+1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
-+1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
-+1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
-+1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
-+1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
-+1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
-+1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
-+1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
-+1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
-+1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
-+1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
-+1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
-+1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
-+1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
-+1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
-+1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
-+1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
-+1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
-+1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
-+1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
-+1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
-+1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
-+1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5
-+1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5
-+1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5
-+1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
-+1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
-+1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
-+1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
-+1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
-+1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
-+1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
-+1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
-+1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
-+1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
-+1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
-+1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
-+1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
-+1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
-+1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
-+1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
-+1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
-+1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
-+1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
-+1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
-+1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
-+1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
-+1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
-+1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
-+1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
-+1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
-+1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
-+1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
-+1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
-+1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
-+1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
-+1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
-+1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
-+1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
-+1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
-+1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
-+1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
-+1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
-+1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
-+1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
-+1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
-+1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
-+1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
-+1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
-+1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
-+1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
-+1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
-+1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
-+1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
-+1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
-+1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
-+1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
-+1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
-+1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
-+1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5
-+1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5
-+1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5
-+1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
-+1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
-+1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5
-+1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5
-+1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
-+1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
-+1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
-+1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
-+1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
-+1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
-+0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched
-+1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
-+1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
-+1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
-+1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
-+1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
-+1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
-+1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
-+1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
-+1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
-+1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
-+1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
-+1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
-+1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
-+1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
-+1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
-+1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
-+1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
-+1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
-diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
-new file mode 100755
-index 000000000000..65576846e81f
---- /dev/null
-+++ b/pi-util/conf_native.sh
-@@ -0,0 +1,108 @@
-+echo "Configure for native build"
-+
-+FFSRC=`pwd`
-+MC=`dpkg --print-architecture`
-+BUILDBASE=$FFSRC/out
-+
-+#RPI_KEEPS="-save-temps=obj"
-+RPI_KEEPS=""
-+
-+NOSHARED=
-+MMAL=
-+
-+while [ "$1" != "" ] ; do
-+    case $1 in
-+	--noshared)
-+	    NOSHARED=1
-+	    ;;
-+	--mmal)
-+	    MMAL=1
-+	    ;;
-+	*)
-+	    echo "Usage $0: [--noshared] [--mmal]"
-+	    exit 1
-+	    ;;
-+    esac
-+    shift
-+done
-+
-+
-+MCOPTS=
-+RPI_INCLUDES=
-+RPI_LIBDIRS=
-+RPI_DEFINES=
-+RPI_EXTRALIBS=
-+
-+if [ "$MC" == "arm64" ]; then
-+  echo "M/C aarch64"
-+  A=aarch64-linux-gnu
-+  B=arm64
-+elif [ "$MC" == "armhf" ]; then
-+  echo "M/C armv7"
-+  A=arm-linux-gnueabihf
-+  B=armv7
-+  MCOPTS="--arch=armv6t2 --cpu=cortex-a7"
-+  RPI_DEFINES=-mfpu=neon-vfpv4
-+else
-+  echo Unexpected architecture $MC
-+  exit 1
-+fi
-+
-+if [ $MMAL ]; then
-+  RPI_OPT_VC=/opt/vc
-+  RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
-+  RPI_LIBDIRS="-L$RPI_OPT_VC/lib"
-+  RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000"
-+  RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group"
-+  RPIOPTS="--enable-mmal --enable-rpi"
-+else
-+  RPIOPTS="--disable-mmal --enable-sand"
-+fi
-+
-+C=`lsb_release -sc`
-+V=`cat RELEASE`
-+
-+SHARED_LIBS="--enable-shared"
-+if [ $NOSHARED ]; then
-+  SHARED_LIBS="--disable-shared"
-+  OUT=$BUILDBASE/$B-$C-$V-static-rel
-+  echo Static libs
-+else
-+  echo Shared libs
-+  OUT=$BUILDBASE/$B-$C-$V-shared-rel
-+fi
-+
-+USR_PREFIX=$OUT/install
-+LIB_PREFIX=$USR_PREFIX/lib/$A
-+INC_PREFIX=$USR_PREFIX/include/$A
-+
-+echo Destination directory: $OUT
-+mkdir -p $OUT
-+# Nothing under here need worry git - including this .gitignore!
-+echo "**" > $BUILDBASE/.gitignore
-+cd $OUT
-+
-+$FFSRC/configure \
-+ --prefix=$USR_PREFIX\
-+ --libdir=$LIB_PREFIX\
-+ --incdir=$INC_PREFIX\
-+ $MCOPTS\
-+ --disable-stripping\
-+ --disable-thumb\
-+ --enable-v4l2-request\
-+ --enable-libdrm\
-+ --enable-epoxy\
-+ --enable-libudev\
-+ --enable-vout-egl\
-+ --enable-vout-drm\
-+ $SHARED_LIBS\
-+ $RPIOPTS\
-+ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\
-+ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\
-+ --extra-ldflags="$RPI_LIBDIRS"\
-+ --extra-libs="$RPI_EXTRALIBS"\
-+ --extra-version="rpi"
-+
-+
-+# gcc option for getting asm listing
-+# -Wa,-ahls
-diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py
-new file mode 100755
-index 000000000000..657568014e57
---- /dev/null
-+++ b/pi-util/ffconf.py
-@@ -0,0 +1,215 @@
-+#!/usr/bin/env python3
-+
-+import string
-+import os
-+import subprocess
-+import re
-+import argparse
-+import sys
-+import csv
-+from stat import *
-+
-+CODEC_HEVC_RPI  = 1
-+HWACCEL_RPI     = 2
-+HWACCEL_DRM     = 3
-+HWACCEL_VAAPI   = 4
-+
-+def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec):
-+    hwaccel = ""
-+    if dectype == HWACCEL_RPI:
-+        hwaccel = "rpi"
-+    elif dectype == HWACCEL_DRM:
-+        hwaccel = "drm"
-+    elif dectype == HWACCEL_VAAPI:
-+        hwaccel = "vaapi"
-+
-+    pix_fmt = []
-+    if pix == "8":
-+        pix_fmt = ["-pix_fmt", "yuv420p"]
-+    elif pix == "10":
-+        pix_fmt = ["-pix_fmt", "yuv420p10le"]
-+    elif pix == "12":
-+        pix_fmt = ["-pix_fmt", "yuv420p12le"]
-+
-+    tmp_root = "/tmp"
-+
-+    names = srcname.split('/')
-+    while len(names) > 1:
-+        tmp_root = os.path.join(tmp_root, names[0])
-+        del names[0]
-+    name = names[0]
-+
-+    if not os.path.exists(tmp_root):
-+        os.makedirs(tmp_root)
-+
-+    dec_file = os.path.join(tmp_root, name + ".dec.md5")
-+    try:
-+        os.remove(dec_file)
-+    except:
-+        pass
-+
-+    flog = open(os.path.join(tmp_root, name + ".log"), "wt")
-+
-+    ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file]
-+
-+    # Unaligned needed for cropping conformance
-+    if hwaccel:
-+        rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT)
-+    else:
-+        rstr = subprocess.call(
-+            [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file],
-+            stdout=flog, stderr=subprocess.STDOUT)
-+
-+    try:
-+        m1 = None
-+        m2 = None
-+        with open(os.path.join(fileroot, md5_file)) as f:
-+            for line in f:
-+                m1 = re.search("[0-9a-f]{32}", line.lower())
-+                if m1:
-+                    break
-+
-+        with open(dec_file) as f:
-+            m2 = re.search("[0-9a-f]{32}", f.readline())
-+    except:
-+        pass
-+
-+    if  m1 and m2 and m1.group() == m2.group():
-+        print("Match: " + m1.group(), file=flog)
-+        rv = 0
-+    elif not m1:
-+        print("****** Cannot find m1", file=flog)
-+        rv = 3
-+    elif not m2:
-+        print("****** Cannot find m2", file=flog)
-+        rv = 2
-+    else:
-+        print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog)
-+        rv = 1
-+    flog.close()
-+    return rv
-+
-+def scandir(root):
-+    aconf = []
-+    ents = os.listdir(root)
-+    ents.sort(key=str.lower)
-+    for name in ents:
-+        test_path = os.path.join(root, name)
-+        if S_ISDIR(os.stat(test_path).st_mode):
-+            files = os.listdir(test_path)
-+            es_file = "?"
-+            md5_file = "?"
-+            for f in files:
-+                (base, ext) = os.path.splitext(f)
-+                if base[0] == '.':
-+                    pass
-+                elif ext == ".bit" or ext == ".bin":
-+                    es_file = f
-+                elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")):
-+                    if md5_file == "?":
-+                        md5_file = f
-+                    elif base[-3:] == "yuv":
-+                        md5_file = f
-+            aconf.append((1, name, es_file, md5_file))
-+    return aconf
-+
-+def runtest(name, tests):
-+    if not tests:
-+        return True
-+    for t in tests:
-+        if name[0:len(t)] == t or name.find("/" + t) != -1:
-+            return True
-+    return False
-+
-+def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec):
-+    unx_failures = []
-+    unx_success = []
-+    failures = 0
-+    successes = 0
-+    for a in csva:
-+        exp_test = int(a[0])
-+        if (exp_test and runtest(a[1], tests)):
-+            name = a[1]
-+            print ("==== ", name, end="")
-+            sys.stdout.flush()
-+
-+            rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec)
-+            if (rv == 0):
-+                successes += 1
-+            else:
-+                failures += 1
-+
-+            if (rv == 0):
-+                if exp_test == 2:
-+                    print(": * OK *")
-+                    unx_success.append(name)
-+                else:
-+                    print(": ok")
-+            elif exp_test == 2 and rv == 1:
-+                print(": fail")
-+            elif exp_test == 3 and rv == 2:
-+                # Call an expected "crash" an abort
-+                print(": abort")
-+            else:
-+                unx_failures.append(name)
-+                if rv == 1:
-+                    print(": * FAIL *")
-+                elif (rv == 2) :
-+                    print(": * CRASH *")
-+                elif (rv == 3) :
-+                    print(": * MD5 MISSING *")
-+                else :
-+                    print(": * BANG *")
-+
-+    if unx_failures or unx_success:
-+        print("Unexpected Failures:", unx_failures)
-+        print("Unexpected Success: ", unx_success)
-+    else:
-+        print("All tests normal:", successes, "ok,", failures, "failed")
-+
-+
-+class ConfCSVDialect(csv.Dialect):
-+    delimiter = ','
-+    doublequote = True
-+    lineterminator = '\n'
-+    quotechar='"'
-+    quoting = csv.QUOTE_MINIMAL
-+    skipinitialspace = True
-+    strict = True
-+
-+if __name__ == '__main__':
-+
-+    argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester")
-+    argp.add_argument("tests", nargs='*')
-+    argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line")
-+    argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line")
-+    argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line")
-+    argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test")
-+    argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir")
-+    argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename")
-+    argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use")
-+    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
-+    args = argp.parse_args()
-+
-+    if args.csvgen:
-+        csv.writer(sys.stdout).writerows(scandir(args.test_root))
-+        exit(0)
-+
-+    with open(args.csv, 'rt') as csvfile:
-+        csva = [a for a in csv.reader(csvfile, ConfCSVDialect())]
-+
-+    dectype = CODEC_HEVC_RPI
-+    if os.path.exists("/dev/rpivid-hevcmem"):
-+        dectype = HWACCEL_RPI
-+    if args.drm or os.path.exists("/sys/module/rpivid_hevc"):
-+        dectype = HWACCEL_DRM
-+
-+    if args.pi4:
-+        dectype = HWACCEL_RPI
-+    elif args.drm:
-+        dectype = HWACCEL_DRM
-+    elif args.vaapi:
-+        dectype = HWACCEL_VAAPI
-+
-+    doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg)
-+
-diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py
-new file mode 100755
-index 000000000000..65c5224cd8fb
---- /dev/null
-+++ b/pi-util/ffperf.py
-@@ -0,0 +1,128 @@
-+#!/usr/bin/env python3
-+
-+import time
-+import string
-+import os
-+import tempfile
-+import subprocess
-+import re
-+import argparse
-+import sys
-+import csv
-+from stat import *
-+
-+class tstats:
-+    close_threshold = 0.01
-+
-+    def __init__(self, stats_dict=None):
-+        if stats_dict != None:
-+            self.name = stats_dict["name"]
-+            self.elapsed = float(stats_dict["elapsed"])
-+            self.user = float(stats_dict["user"])
-+            self.sys = float(stats_dict["sys"])
-+
-+    def times_str(self):
-+        ctime = self.sys + self.user
-+        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
-+
-+    def dict(self):
-+        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
-+
-+    def is_close(self, other):
-+        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
-+
-+    def __lt__(self, other):
-+        return self.elapsed < other.elapsed
-+    def __gt__(self, other):
-+        return self.elapsed > other.elapsed
-+
-+    def time_file(name, prefix, ffmpeg="./ffmpeg"):
-+        stats = tstats()
-+        stats.name = name
-+        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
-+        cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw",
-+                                  "-vcodec", "hevc_rpi",
-+                                  "-t", "30", "-i", prefix + name,
-+                                  "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
-+        pinfo = os.wait4(cproc.pid, 0)
-+        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
-+        stats.elapsed = end_time - start_time
-+        stats.user = pinfo[2].ru_utime
-+        stats.sys = pinfo[2].ru_stime
-+        return stats
-+
-+
-+def common_prefix(s1, s2):
-+    for i in range(min(len(s1),len(s2))):
-+        if s1[i] != s2[i]:
-+            return s1[:i]
-+    return s1[:i+1]
-+
-+def main():
-+    global flog
-+
-+    argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog="""
-+To blank the screen before starting use "xdg-screensaver activate"
-+(For some reason this doesn't seem to work from within python).
-+""")
-+
-+    argp.add_argument("streams", nargs='*')
-+    argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename")
-+    argp.add_argument("--csv_in", help="CSV input filename")
-+    argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).")
-+    argp.add_argument("--repeat", default=3, type=int, help="Run repeat count")
-+    argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable")
-+
-+    args = argp.parse_args()
-+
-+    csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"])
-+    csv_out.writeheader()
-+
-+    stats_in = {}
-+    if args.csv_in != None:
-+        with open(args.csv_in, 'r', newline='') as f_in:
-+            stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
-+
-+    flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt")
-+
-+    streams = args.streams
-+    if not streams:
-+        if not stats_in:
-+            print ("No source streams specified")
-+            return 1
-+        prefix = "" if args.prefix == None else args.prefix
-+        streams = [k for k in stats_in]
-+    elif args.prefix != None:
-+        prefix = args.prefix
-+    else:
-+        prefix = streams[0]
-+        for f in streams[1:]:
-+            prefix = common_prefix(prefix, f)
-+        pp = prefix.rpartition(os.sep)
-+        prefix = pp[0] + pp[1]
-+        streams = [s[len(prefix):] for s in streams]
-+
-+    for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()):
-+        print ("====", f)
-+
-+        t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999})
-+        for i in range(args.repeat):
-+            t = tstats.time_file(f, prefix, args.ffmpeg)
-+            print ("...", t.times_str())
-+            if t0 > t:
-+                t0 = t
-+
-+        if t0.name in stats_in:
-+            pstat = stats_in[t0.name]
-+            print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str())
-+
-+        csv_out.writerow(t0.dict())
-+
-+        print ()
-+
-+    return 0
-+
-+
-+if __name__ == '__main__':
-+    exit(main())
-+
-diff --git a/pi-util/genpatch.sh b/pi-util/genpatch.sh
-new file mode 100755
-index 000000000000..0948a68a7ad7
---- /dev/null
-+++ b/pi-util/genpatch.sh
-@@ -0,0 +1,35 @@
-+set -e
-+
-+NOPATCH=
-+if [ "$1" == "--notag" ]; then
-+  shift
-+  NOPATCH=1
-+fi
-+
-+if [ "$1" == "" ]; then
-+  echo Usage: $0 [--notag] \<patch_tag\>
-+  echo e.g.: $0 mmal_4
-+  exit 1
-+fi
-+
-+VERSION=`cat RELEASE`
-+if [ "$VERSION" == "" ]; then
-+  echo Can\'t find version RELEASE
-+  exit 1
-+fi
-+
-+PATCHFILE=../ffmpeg-$VERSION-$1.patch
-+
-+if [ $NOPATCH ]; then
-+  echo Not tagged
-+else
-+  # Only continue if we are all comitted
-+  git diff --name-status --exit-code
-+
-+  PATCHTAG=pi/$VERSION/$1
-+  echo Tagging: $PATCHTAG
-+
-+  git tag $PATCHTAG
-+fi
-+echo Generating patch: $PATCHFILE
-+git diff n$VERSION -- > $PATCHFILE
-diff --git a/pi-util/make_array.py b/pi-util/make_array.py
-new file mode 100755
-index 000000000000..67b22d2d517f
---- /dev/null
-+++ b/pi-util/make_array.py
-@@ -0,0 +1,23 @@
-+#!/usr/bin/env python
-+
-+# Usage
-+#   make_array file.bin
-+#   Produces file.h with array of bytes.
-+#
-+import sys
-+for file in sys.argv[1:]:
-+  prefix,suffix = file.split('.')
-+  assert suffix=='bin'
-+  name=prefix.split('/')[-1]
-+  print 'Converting',file
-+  with open(prefix+'.h','wb') as out:
-+    print >>out, 'static const unsigned char',name,'[] = {'
-+    with open(file,'rb') as fd:
-+      i = 0
-+      for byte in fd.read():
-+        print >>out, '0x%02x, ' % ord(byte),
-+        i = i + 1
-+        if i % 8 == 0:
-+          print >>out, ' // %04x' % (i - 8)
-+    print >>out,'};'
-+
-diff --git a/pi-util/mkinst.sh b/pi-util/mkinst.sh
-new file mode 100755
-index 000000000000..271a39e8460a
---- /dev/null
-+++ b/pi-util/mkinst.sh
-@@ -0,0 +1,5 @@
-+set -e
-+
-+make install
-+
-+cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr
-diff --git a/pi-util/patkodi.sh b/pi-util/patkodi.sh
-new file mode 100644
-index 000000000000..dcd05a606e85
---- /dev/null
-+++ b/pi-util/patkodi.sh
-@@ -0,0 +1,9 @@
-+set -e
-+KODIBASE=/home/jc/rpi/kodi/xbmc
-+JOBS=-j20
-+make $JOBS
-+git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch
-+make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS
-+make -C $KODIBASE/build install
-+
-+
-diff --git a/pi-util/perfcmp.py b/pi-util/perfcmp.py
-new file mode 100755
-index 000000000000..e44cfa0c3c4d
---- /dev/null
-+++ b/pi-util/perfcmp.py
-@@ -0,0 +1,101 @@
-+#!/usr/bin/env python3
-+
-+import time
-+import string
-+import os
-+import tempfile
-+import subprocess
-+import re
-+import argparse
-+import sys
-+import csv
-+from stat import *
-+
-+class tstats:
-+    close_threshold = 0.01
-+
-+    def __init__(self, stats_dict=None):
-+        if stats_dict != None:
-+            self.name = stats_dict["name"]
-+            self.elapsed = float(stats_dict["elapsed"])
-+            self.user = float(stats_dict["user"])
-+            self.sys = float(stats_dict["sys"])
-+
-+    def times_str(self):
-+        ctime = self.sys + self.user
-+        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
-+
-+    def dict(self):
-+        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
-+
-+    def is_close(self, other):
-+        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
-+
-+    def __lt__(self, other):
-+        return self.elapsed < other.elapsed
-+    def __gt__(self, other):
-+        return self.elapsed > other.elapsed
-+
-+    def time_file(name, prefix):
-+        stats = tstats()
-+        stats.name = name
-+        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
-+        cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name,
-+                                  "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
-+        pinfo = os.wait4(cproc.pid, 0)
-+        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
-+        stats.elapsed = end_time - start_time
-+        stats.user = pinfo[2].ru_utime
-+        stats.sys = pinfo[2].ru_stime
-+        return stats
-+
-+
-+def common_prefix(s1, s2):
-+    for i in range(min(len(s1),len(s2))):
-+        if s1[i] != s2[i]:
-+            return s1[:i]
-+    return s1[:i+1]
-+
-+def main():
-+    argp = argparse.ArgumentParser(description="FFmpeg performance compare")
-+
-+    argp.add_argument("stream0", help="CSV to compare")
-+    argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare")
-+
-+    args = argp.parse_args()
-+
-+    with open(args.stream0, 'r', newline='') as f_in:
-+        stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
-+    with open(args.stream1, 'r', newline='') as f_in:
-+        stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
-+
-+    print (args.stream0, "<<-->>", args.stream1)
-+    print ()
-+
-+    for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()):
-+       if not (f in stats0) :
-+           print ("           XX               :", f)
-+           continue
-+       if not (f in stats1) :
-+           print ("       XX                   :", f)
-+           continue
-+
-+       s0 = stats0[f]
-+       s1 = stats1[f]
-+
-+       pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0
-+       thresh = 0.3
-+       tc = 6
-+
-+       nchar = min(tc - 1, int(abs(pcent) / thresh))
-+       cc = "  --  " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar
-+
-+       print ("%6.2f %s%6.2f (%+5.2f) : %s" %
-+           (s0.elapsed, cc, s1.elapsed, pcent, f))
-+
-+    return 0
-+
-+
-+if __name__ == '__main__':
-+    exit(main())
-+
-diff --git a/pi-util/qem.sh b/pi-util/qem.sh
-new file mode 100755
-index 000000000000..a4dbb6eacd18
---- /dev/null
-+++ b/pi-util/qem.sh
-@@ -0,0 +1,9 @@
-+TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex
-+QASM=python\ ../local/bin/qasm.py
-+SRC_FILE=libavcodec/rpi_hevc_shader.qasm
-+DST_BASE=shader
-+
-+cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR
-+$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c
-+$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h
-+
-diff --git a/pi-util/v3dusage.py b/pi-util/v3dusage.py
-new file mode 100755
-index 000000000000..5935a11ca553
---- /dev/null
-+++ b/pi-util/v3dusage.py
-@@ -0,0 +1,128 @@
-+#!/usr/bin/env python
-+
-+import sys
-+import argparse
-+import re
-+
-+def do_logparse(logname):
-+
-+    rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ')
-+    rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$')
-+    rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$')
-+    rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$')
-+
-+    ttotal = {'idle':0.0}
-+    tstart = {}
-+    qctotal = {}
-+    qtstotal = {}
-+    l2hits = {}
-+    l2total = {}
-+    time0 = None
-+    idle_start = None
-+    qpu_op_no = 0
-+    op_count = 0
-+
-+    with open(logname, "rt") as infile:
-+        for line in infile:
-+            match = rmatch.match(line)
-+            if match:
-+#                print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":"
-+                time = float(match.group(1))
-+                unit = match.group(3)
-+                opstart = not match.group(2)
-+                optype = match.group(7)
-+                hascb = match.group(8) != "0"
-+
-+                if unit == 'qpu1':
-+                    unit = unit + "." + str(qpu_op_no)
-+                    if not opstart:
-+                        if hascb or optype == 'EXECUTE_SYNC':
-+                            qpu_op_no = 0
-+                        else:
-+                            qpu_op_no += 1
-+
-+                # Ignore sync type
-+                if optype == 'EXECUTE_SYNC':
-+                    continue
-+
-+                if not time0:
-+                    time0 = time
-+
-+                if opstart:
-+                    tstart[unit] = time;
-+                elif unit in tstart:
-+                    op_count += 1
-+                    if not unit in ttotal:
-+                        ttotal[unit] = 0.0
-+                    ttotal[unit] += time - tstart[unit]
-+                    del tstart[unit]
-+
-+                if not idle_start and not tstart:
-+                    idle_start = time
-+                elif idle_start and tstart:
-+                    ttotal['idle'] += time - idle_start
-+                    idle_start = None
-+
-+            match = rqcycle.match(line)
-+            if match:
-+                unit = "qpu1." + str(qpu_op_no)
-+                if not unit in qctotal:
-+                    qctotal[unit] = 0
-+                qctotal[unit] += int(match.group(2))
-+
-+            match = rqtscycle.match(line)
-+            if match:
-+                unit = "qpu1." + str(qpu_op_no)
-+                if not unit in qtstotal:
-+                    qtstotal[unit] = 0
-+                qtstotal[unit] += int(match.group(2))
-+
-+            match = rl2hits.match(line)
-+            if match:
-+                unit = "qpu1." + str(qpu_op_no)
-+                if not unit in l2total:
-+                    l2total[unit] = 0
-+                    l2hits[unit] = 0
-+                l2total[unit] += int(match.group(3))
-+                if match.group(2) == "hits":
-+                    l2hits[unit] += int(match.group(3))
-+
-+
-+    if not time0:
-+        print "No v3d profile records found"
-+    else:
-+        tlogged = time - time0
-+
-+        print "Logged time:", tlogged, "  Op count:", op_count
-+        for unit in sorted(ttotal):
-+            print b'%6s: %10.3f    %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged)
-+        print
-+        for unit in sorted(qctotal):
-+            if not unit in qtstotal:
-+                qtstotal[unit] = 0;
-+            print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit])
-+            if unit in l2total:
-+                print b'        L2Total: %10d, hits:      %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit])
-+
-+
-+
-+if __name__ == '__main__':
-+    argp = argparse.ArgumentParser(
-+        formatter_class=argparse.RawDescriptionHelpFormatter,
-+        description="QPU/VPU perf summary from VC logging",
-+        epilog = """
-+Will also summarise TMU stalls if logging requests set in qpu noflush param
-+in the profiled code.
-+
-+Example use:
-+  vcgencmd set_logging level=0xc0
-+  <command to profile>
-+  sudo vcdbg log msg >& t.log
-+  v3dusage.py t.log
-+""")
-+
-+    argp.add_argument("logfile")
-+    args = argp.parse_args()
-+
-+    do_logparse(args.logfile)
-+
-
-From b6b137b1d039b42b15325f87f55cb7c38e2270b0 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 11:27:39 +0100
-Subject: [PATCH 002/186] Add sand pix fmts & conversion fns
-
----
- configure                     |   3 +
- libavutil/Makefile            |   3 +
- libavutil/arm/Makefile        |   1 +
- libavutil/arm/rpi_sand_neon.S | 768 ++++++++++++++++++++++++++++++++++
- libavutil/arm/rpi_sand_neon.h |  99 +++++
- libavutil/pixdesc.c           |  44 ++
- libavutil/pixfmt.h            |   6 +
- libavutil/rpi_sand_fn_pw.h    | 227 ++++++++++
- libavutil/rpi_sand_fns.c      | 353 ++++++++++++++++
- libavutil/rpi_sand_fns.h      | 183 ++++++++
- 10 files changed, 1687 insertions(+)
- create mode 100644 libavutil/arm/rpi_sand_neon.S
- create mode 100644 libavutil/arm/rpi_sand_neon.h
- create mode 100644 libavutil/rpi_sand_fn_pw.h
- create mode 100644 libavutil/rpi_sand_fns.c
- create mode 100644 libavutil/rpi_sand_fns.h
-
 diff --git a/configure b/configure
-index 3cd3bdfb4496..5a5ada20711f 100755
+index d77a55b653c1..5c0854f6270b 100755
 --- a/configure
 +++ b/configure
-@@ -344,6 +344,7 @@ External library support:
+@@ -202,6 +202,7 @@ External library support:
+   --disable-bzlib          disable bzlib [autodetect]
+   --disable-coreimage      disable Apple CoreImage framework [autodetect]
+   --enable-chromaprint     enable audio fingerprinting with chromaprint [no]
++  --disable-epoxy          disable epoxy [autodetect]
+   --enable-frei0r          enable frei0r video filtering [no]
+   --enable-gcrypt          enable gcrypt, needed for rtmp(t)e support
+                            if openssl, librtmp or gmp is not used [no]
+@@ -287,6 +288,7 @@ External library support:
+   --enable-libtorch        enable Torch as one DNN backend [no]
+   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
+   --enable-libuavs3d       enable AVS3 decoding via libuavs3d [no]
++  --disable-libudev        disable libudev [autodetect]
+   --enable-libv4l2         enable libv4l2/v4l-utils [no]
+   --enable-libvidstab      enable video stabilization using vid.stab [no]
+   --enable-libvmaf         enable vmaf filter via libvmaf [no]
+@@ -353,12 +355,16 @@ External library support:
    --enable-libvpl          enable Intel oneVPL code via libvpl if libmfx is not used [no]
    --enable-libnpp          enable Nvidia Performance Primitives-based code [no]
    --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
 +  --enable-sand            enable sand video formats [rpi]
++  --enable-vout-drm        enable the vout_drm module - for internal testing only [no]
++  --enable-vout-egl        enable the vout_egl module - for internal testing only [no]
    --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
    --disable-nvenc          disable Nvidia video encoding code [autodetect]
    --enable-omx             enable OpenMAX IL code [no]
-@@ -1930,6 +1931,7 @@ FEATURE_LIST="
-     omx_rpi
-     runtime_cpudetect
-     safe_bitstream_reader
-+    sand
-     shared
-     small
-     static
-@@ -2495,6 +2497,7 @@ CONFIG_EXTRA="
-     rtpdec
-     rtpenc_chain
-     rv34dsp
-+    sand
-     scene_sad
-     sinewin
-     snappy
-diff --git a/libavutil/Makefile b/libavutil/Makefile
-index dc9012f9a83a..e33f5db0996a 100644
---- a/libavutil/Makefile
-+++ b/libavutil/Makefile
-@@ -73,6 +73,7 @@ HEADERS = adler32.h                                                     \
-           rational.h                                                    \
-           replaygain.h                                                  \
-           ripemd.h                                                      \
-+	  rpi_sand_fns.h                                                \
-           samplefmt.h                                                   \
-           sha.h                                                         \
-           sha512.h                                                      \
-@@ -192,6 +193,7 @@ OBJS-$(CONFIG_MACOS_KPERF)              += macos_kperf.o
- OBJS-$(CONFIG_MEDIACODEC)               += hwcontext_mediacodec.o
- OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
- OBJS-$(CONFIG_QSV)                      += hwcontext_qsv.o
-+OBJS-$(CONFIG_SAND)                     += rpi_sand_fns.o
- OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
- OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
- OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
-@@ -212,6 +214,7 @@ SKIPHEADERS-$(CONFIG_D3D11VA)          += hwcontext_d3d11va.h
- SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
- SKIPHEADERS-$(CONFIG_QSV)              += hwcontext_qsv.h
- SKIPHEADERS-$(CONFIG_OPENCL)           += hwcontext_opencl.h
-+SKIPHEADERS-$(CONFIG-RPI)              += rpi_sand_fn_pw.h
- SKIPHEADERS-$(CONFIG_VAAPI)            += hwcontext_vaapi.h
- SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += hwcontext_videotoolbox.h
- SKIPHEADERS-$(CONFIG_VDPAU)            += hwcontext_vdpau.h
-diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile
-index 5da44b05427a..b74b7c4e2f25 100644
---- a/libavutil/arm/Makefile
-+++ b/libavutil/arm/Makefile
-@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o                                    \
- 
- NEON-OBJS += arm/float_dsp_init_neon.o                                  \
-              arm/float_dsp_neon.o                                       \
-+             arm/rpi_sand_neon.o                                        \
-diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S
-new file mode 100644
-index 000000000000..80890fe9854b
---- /dev/null
-+++ b/libavutil/arm/rpi_sand_neon.S
-@@ -0,0 +1,768 @@
-+/*
-+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+    * Redistributions of source code must retain the above copyright
-+      notice, this list of conditions and the following disclaimer.
-+    * Redistributions in binary form must reproduce the above copyright
-+      notice, this list of conditions and the following disclaimer in the
-+      documentation and/or other materials provided with the distribution.
-+    * Neither the name of the copyright holder nor the
-+      names of its contributors may be used to endorse or promote products
-+      derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+Authors: John Cox
-+*/
-+
-+#include "libavutil/arm/asm.S"
-+
-+
-+@ General notes:
-+@ Having done some timing on this in sand8->y8 (Pi4)
-+@  vst1 (680fps) is a bit faster than vstm (660fps)
-+@  vldm (680fps) is noticably faster than vld1 (480fps)
-+@  (or it might be that a mix is what is required)
-+@
-+@ At least on a Pi4 it is no more expensive to have a single auto-inc register
-+@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted
-+@ the latter was better)
-+@
-+@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless
-+@ the memory is uncached.
-+@ As these are Sand -> planar we can assume that src is going to be aligned but
-+@ it is possible that dest isn't (converting to .yuv or other packed format).
-+@ Luckily vst1 is faster than vstm :-) so all is well
-+@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4
-+@ .8 stores would let us do non-word aligned stores into uncached but it
-+@ probably isn't worth it.
-+
-+
-+
-+
-+@ void ff_rpi_sand128b_stripe_to_8_10(
-+@   uint8_t * dest,             // [r0]
-+@   const uint8_t * src1,       // [r1]
-+@   const uint8_t * src2,       // [r2]
-+@   unsigned int lines);        // [r3]
-+
-+.macro  stripe2_to_8, bit_depth
-+        vpush    {q4-q7}
-+1:
-+        vldm     r1!, {q0-q7}
-+        subs     r3, #1
-+        vldm     r2!, {q8-q15}
-+        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
-+        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
-+        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
-+        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
-+        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
-+        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
-+        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
-+        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
-+        vqrshrn.u16 d8,  q8,  #\bit_depth - 8
-+        vqrshrn.u16 d9,  q9,  #\bit_depth - 8
-+        vqrshrn.u16 d10, q10, #\bit_depth - 8
-+        vqrshrn.u16 d11, q11, #\bit_depth - 8
-+        vqrshrn.u16 d12, q12, #\bit_depth - 8
-+        vqrshrn.u16 d13, q13, #\bit_depth - 8
-+        vqrshrn.u16 d14, q14, #\bit_depth - 8
-+        vqrshrn.u16 d15, q15, #\bit_depth - 8
-+        vstm     r0!, {q0-q7}
-+        bne      1b
-+        vpop     {q4-q7}
-+        bx       lr
-+.endm
-+
-+function ff_rpi_sand128b_stripe_to_8_10, export=1
-+        stripe2_to_8     10
-+endfunc
-+
-+@ void ff_rpi_sand8_lines_to_planar_y8(
-+@   uint8_t * dest,             // [r0]
-+@   unsigned int dst_stride,    // [r1]
-+@   const uint8_t * src,        // [r2]
-+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
-+@   unsigned int src_stride2,   // [sp, #0]  -> r3
-+@   unsigned int _x,            // [sp, #4]  Ignored - 0
-+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
-+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-+@   unsigned int h);            // [sp, #16] -> r7
-+@
-+@ Assumes that we are starting on a stripe boundary and that overreading
-+@ within the stripe is OK. However it does respect the dest size for writing
-+
-+function ff_rpi_sand8_lines_to_planar_y8, export=1
-+                push            {r4-r8, lr}     @ +24            L
-+                ldr             r3,  [sp, #24]
-+                ldr             r6,  [sp, #36]
-+                ldr             r7,  [sp, #32]  @ y
-+                lsl             r3,  #7
-+                sub             r1,  r6
-+                add             r8,  r2,  r7,  lsl #7
-+                ldr             r7,  [sp, #40]
-+
-+10:
-+                mov             r2,  r8
-+                add             r4,  r0,  #24
-+                mov             r5,  r6
-+                mov             lr,  #0
-+1:
-+                vldm            r2,  {q8-q15}
-+                add             r2,  r3
-+                subs            r5,  #128
-+                blt             2f
-+                vst1.8          {d16, d17, d18, d19}, [r0]!
-+                vst1.8          {d20, d21, d22, d23}, [r0]!
-+                vst1.8          {d24, d25, d26, d27}, [r0]!
-+                vst1.8          {d28, d29, d30, d31}, [r0]!
-+                bne             1b
-+11:
-+                subs            r7,  #1
-+                add             r0,  r1
-+                add             r8,  #128
-+                bne             10b
-+
-+                pop             {r4-r8, pc}
-+
-+@ Partial final write
-+2:
-+                cmp             r5,  #64-128
-+                blt             1f
-+                vst1.8          {d16, d17, d18, d19}, [r0]!
-+                vst1.8          {d20, d21, d22, d23}, [r0]!
-+                beq             11b
-+                vmov            q8,  q12
-+                vmov            q9,  q13
-+                sub             r5,  #64
-+                vmov            q10, q14
-+                vmov            q11, q15
-+1:
-+                cmp             r5,  #32-128
-+                blt             1f
-+                vst1.8          {d16, d17, d18, d19}, [r0]!
-+                beq             11b
-+                vmov            q8,  q10
-+                sub             r5,  #32
-+                vmov            q9,  q11
-+1:
-+                cmp             r5,  #16-128
-+                blt             1f
-+                vst1.8          {d16, d17}, [r0]!
-+                beq             11b
-+                sub             r5,  #16
-+                vmov            q8,  q9
-+1:
-+                cmp             r5,  #8-128
-+                blt             1f
-+                vst1.8          {d16}, [r0]!
-+                beq             11b
-+                sub             r5,  #8
-+                vmov            d16, d17
-+1:
-+                cmp             r5,  #4-128
-+                blt             1f
-+                vst1.32         {d16[0]}, [r0]!
-+                beq             11b
-+                sub             r5,  #4
-+                vshr.u64        d16, #32
-+1:
-+                cmp             r5,  #2-128
-+                blt             1f
-+                vst1.16         {d16[0]}, [r0]!
-+                beq             11b
-+                vst1.8          {d16[2]}, [r0]!
-+                b               11b
-+1:
-+                vst1.8          {d16[0]}, [r0]!
-+                b               11b
-+endfunc
-+
-+@ void ff_rpi_sand8_lines_to_planar_c8(
-+@   uint8_t * dst_u,            // [r0]
-+@   unsigned int dst_stride_u,  // [r1]
-+@   uint8_t * dst_v,            // [r2]
-+@   unsigned int dst_stride_v,  // [r3]
-+@   const uint8_t * src,        // [sp, #0]  -> r4, r5
-+@   unsigned int stride1,       // [sp, #4]  128
-+@   unsigned int stride2,       // [sp, #8]  -> r8
-+@   unsigned int _x,            // [sp, #12] 0
-+@   unsigned int y,             // [sp, #16] (r7 in prefix)
-+@   unsigned int _w,            // [sp, #20] -> r12, r6
-+@   unsigned int h);            // [sp, #24] -> r7
-+@
-+@ Assumes that we are starting on a stripe boundary and that overreading
-+@ within the stripe is OK. However it does respect the dest size for writing
-+
-+function ff_rpi_sand8_lines_to_planar_c8, export=1
-+                push            {r4-r8, lr}     @ +24
-+
-+                ldr             r5,  [sp, #24]
-+                ldr             r8,  [sp, #32]
-+                ldr             r7,  [sp, #40]
-+                ldr             r6,  [sp, #44]
-+                lsl             r8,  #7
-+                add             r5,  r5,  r7,  lsl #7
-+                sub             r1,  r1,  r6
-+                sub             r3,  r3,  r6
-+                ldr             r7,  [sp, #48]
-+                vpush           {q4-q7}
-+
-+10:
-+                mov             r4,  r5
-+                mov             r12, r6
-+1:
-+                subs            r12, #64
-+                vldm            r4,  {q0-q7}
-+                add             r4,  r8
-+                it              gt
-+                vldmgt          r4,  {q8-q15}
-+                add             r4,  r8
-+
-+                vuzp.8          q0,  q1
-+                vuzp.8          q2,  q3
-+                vuzp.8          q4,  q5
-+                vuzp.8          q6,  q7
-+
-+                vuzp.8          q8,  q9
-+                vuzp.8          q10, q11
-+                vuzp.8          q12, q13
-+                vuzp.8          q14, q15
-+                subs            r12, #64
-+
-+                @ Rearrange regs so we can use vst1 with 4 regs
-+                vswp            q1,  q2
-+                vswp            q5,  q6
-+                vswp            q9,  q10
-+                vswp            q13, q14
-+                blt             2f
-+
-+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
-+                vst1.8          {d8,  d9,  d10, d11}, [r0]!
-+                vst1.8          {d16, d17, d18, d19}, [r0]!
-+                vst1.8          {d24, d25, d26, d27}, [r0]!
-+
-+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
-+                vst1.8          {d12, d13, d14, d15}, [r2]!
-+                vst1.8          {d20, d21, d22, d23}, [r2]!
-+                vst1.8          {d28, d29, d30, d31}, [r2]!
-+                bne             1b
-+11:
-+                subs            r7,  #1
-+                add             r5,  #128
-+                add             r0,  r1
-+                add             r2,  r3
-+                bne             10b
-+                vpop            {q4-q7}
-+                pop             {r4-r8,pc}
-+
-+2:
-+                cmp             r12, #64-128
-+                blt             1f
-+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
-+                vst1.8          {d8,  d9,  d10, d11}, [r0]!
-+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
-+                vst1.8          {d12, d13, d14, d15}, [r2]!
-+                beq             11b
-+                sub             r12, #64
-+                vmov            q0,  q8
-+                vmov            q1,  q9
-+                vmov            q2,  q10
-+                vmov            q3,  q11
-+                vmov            q4,  q12
-+                vmov            q5,  q13
-+                vmov            q6,  q14
-+                vmov            q7,  q15
-+1:
-+                cmp             r12, #32-128
-+                blt             1f
-+                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
-+                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
-+                beq             11b
-+                sub             r12, #32
-+                vmov            q0,  q4
-+                vmov            q1,  q5
-+                vmov            q2,  q6
-+                vmov            q3,  q7
-+1:
-+                cmp             r12, #16-128
-+                blt             1f
-+                vst1.8          {d0,  d1 }, [r0]!
-+                vst1.8          {d4,  d5 }, [r2]!
-+                beq             11b
-+                sub             r12, #16
-+                vmov            q0,  q1
-+                vmov            q2,  q3
-+1:
-+                cmp             r12, #8-128
-+                blt             1f
-+                vst1.8          {d0}, [r0]!
-+                vst1.8          {d4}, [r2]!
-+                beq             11b
-+                sub             r12, #8
-+                vmov            d0,  d1
-+                vmov            d4,  d5
-+1:
-+                cmp             r12, #4-128
-+                blt             1f
-+                vst1.32         {d0[0]}, [r0]!
-+                vst1.32         {d4[0]}, [r2]!
-+                beq             11b
-+                sub             r12, #4
-+                vmov            s0,  s1
-+                vmov            s8,  s9
-+1:
-+                cmp             r12, #2-128
-+                blt             1f
-+                vst1.16         {d0[0]}, [r0]!
-+                vst1.16         {d4[0]}, [r2]!
-+                beq             11b
-+                vst1.8          {d0[2]}, [r0]!
-+                vst1.8          {d4[2]}, [r2]!
-+                b               11b
-+1:
-+                vst1.8          {d0[0]}, [r0]!
-+                vst1.8          {d4[0]}, [r2]!
-+                b               11b
-+endfunc
-+
-+
-+
-+@ void ff_rpi_sand30_lines_to_planar_y16(
-+@   uint8_t * dest,             // [r0]
-+@   unsigned int dst_stride,    // [r1]
-+@   const uint8_t * src,        // [r2]
-+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
-+@   unsigned int src_stride2,   // [sp, #0]  -> r3
-+@   unsigned int _x,            // [sp, #4]  Ignored - 0
-+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
-+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-+@   unsigned int h);            // [sp, #16] -> r7
-+@
-+@ Assumes that we are starting on a stripe boundary and that overreading
-+@ within the stripe is OK. However it does respect the dest size for writing
-+
-+function ff_rpi_sand30_lines_to_planar_y16, export=1
-+                push            {r4-r8, lr}     @ +24
-+                ldr             r3,  [sp, #24]
-+                ldr             r6,  [sp, #36]
-+                ldr             r7,  [sp, #32]  @ y
-+                mov             r12, #48
-+                vmov.u16        q15, #0x3ff
-+                sub             r3,  #1
-+                lsl             r3,  #7
-+                sub             r1,  r1,  r6,  lsl #1
-+                add             r8,  r2,  r7,  lsl #7
-+                ldr             r7,  [sp, #40]
-+
-+10:
-+                mov             r2,  r8
-+                add             r4,  r0,  #24
-+                mov             r5,  r6
-+                mov             lr,  #0
-+1:
-+                vldm            r2!, {q10-q13}
-+                add             lr,  #64
-+
-+                vshr.u32        q14, q10, #20    @ Cannot vshrn.u32 #20!
-+                ands            lr,  #127
-+                vshrn.u32       d2,  q10, #10
-+                vmovn.u32       d0,  q10
-+                vmovn.u32       d4,  q14
-+
-+                vshr.u32        q14, q11, #20
-+                it              eq
-+                addeq           r2,  r3
-+                vshrn.u32       d3,  q11, #10
-+                vmovn.u32       d1,  q11
-+                vmovn.u32       d5,  q14
-+
-+                subs            r5,  #48
-+                vand            q0,  q15
-+                vand            q1,  q15
-+                vand            q2,  q15
-+
-+                vshr.u32        q14, q12, #20
-+                vshrn.u32       d18, q12, #10
-+                vmovn.u32       d16, q12
-+                vmovn.u32       d20, q14
-+
-+                vshr.u32        q14, q13, #20
-+                vshrn.u32       d19, q13, #10
-+                vmovn.u32       d17, q13
-+                vmovn.u32       d21, q14
-+
-+                vand            q8,  q15
-+                vand            q9,  q15
-+                vand            q10, q15
-+                blt             2f
-+
-+                vst3.16         {d0,  d2,  d4},  [r0], r12
-+                vst3.16         {d1,  d3,  d5},  [r4], r12
-+                vst3.16         {d16, d18, d20}, [r0], r12
-+                vst3.16         {d17, d19, d21}, [r4], r12
-+
-+                bne             1b
-+
-+11:
-+                subs            r7,  #1
-+                add             r0,  r1
-+                add             r8,  #128
-+                bne             10b
-+
-+                pop             {r4-r8, pc}
-+
-+@ Partial final write
-+2:
-+                cmp             r5,  #24-48
-+                blt             1f
-+                vst3.16         {d0,  d2,  d4},  [r0], r12
-+                vst3.16         {d1,  d3,  d5},  [r4]
-+                beq             11b
-+                vmov            q0,  q8
-+                sub             r5,  #24
-+                vmov            q1,  q9
-+                vmov            q2,  q10
-+1:
-+                cmp             r5,  #12-48
-+                blt             1f
-+                vst3.16         {d0,  d2,  d4},  [r0]!
-+                beq             11b
-+                vmov            d0, d1
-+                sub             r5, #12
-+                vmov            d2, d3
-+                vmov            d4, d5
-+1:
-+                cmp             r5,  #6-48
-+                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
-+                blt             1f
-+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
-+                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
-+                add             r0,  #12
-+                beq             11b
-+                vmov            s0,  s1
-+                sub             r5,  #6
-+                vmov            s4,  s5
-+                vmov            s8,  s9
-+1:
-+                cmp             r5, #3-48
-+                blt             1f
-+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
-+                beq             11b
-+                sub             r5, #3
-+                vshr.u32        d0, #16
-+                vshr.u32        d2, #16
-+1:
-+                cmp             r5, #2-48
-+                blt             1f
-+                vst2.16         {d0[0], d2[0]}, [r0]!
-+                b               11b
-+1:
-+                vst1.16         {d0[0]}, [r0]!
-+                b               11b
-+
-+endfunc
-+
-+
-+@ void ff_rpi_sand30_lines_to_planar_c16(
-+@   uint8_t * dst_u,            // [r0]
-+@   unsigned int dst_stride_u,  // [r1]
-+@   uint8_t * dst_v,            // [r2]
-+@   unsigned int dst_stride_v,  // [r3]
-+@   const uint8_t * src,        // [sp, #0]  -> r4, r5
-+@   unsigned int stride1,       // [sp, #4]  128
-+@   unsigned int stride2,       // [sp, #8]  -> r8
-+@   unsigned int _x,            // [sp, #12] 0
-+@   unsigned int y,             // [sp, #16] (r7 in prefix)
-+@   unsigned int _w,            // [sp, #20] -> r6, r9
-+@   unsigned int h);            // [sp, #24] -> r7
-+@
-+@ Assumes that we are starting on a stripe boundary and that overreading
-+@ within the stripe is OK. However it does respect the dest size for writing
-+
-+function ff_rpi_sand30_lines_to_planar_c16, export=1
-+                push            {r4-r10, lr}    @ +32
-+                ldr             r5,  [sp, #32]
-+                ldr             r8,  [sp, #40]
-+                ldr             r7,  [sp, #48]
-+                ldr             r9,  [sp, #52]
-+                mov             r12, #48
-+                vmov.u16        q15, #0x3ff
-+                sub             r8,  #1
-+                lsl             r8,  #7
-+                add             r5,  r5,  r7,  lsl #7
-+                sub             r1,  r1,  r9,  lsl #1
-+                sub             r3,  r3,  r9,  lsl #1
-+                ldr             r7,  [sp, #56]
-+10:
-+                mov             lr,  #0
-+                mov             r4,  r5
-+                mov             r6,  r9
-+1:
-+                vldm            r4!, {q0-q3}
-+                add             lr,  #64
-+
-+                @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
-+                vshr.u32        q14, q0,  #20
-+                vshrn.u32       d16, q0,  #10
-+                vmovn.u32       d18, q0
-+                ands            lr,  #127
-+                vmovn.u32       d20, q14
-+
-+                vshr.u32        q14, q1,  #20
-+                vshrn.u32       d17, q1,  #10
-+                vmovn.u32       d19, q1
-+                vmovn.u32       d21, q14
-+
-+                vshr.u32        q14, q2,  #20
-+                vshrn.u32       d22, q2,  #10
-+                vmovn.u32       d24, q2
-+                vmovn.u32       d26, q14
-+
-+                vshr.u32        q14, q3,  #20
-+                vshrn.u32       d23, q3,  #10
-+                vmovn.u32       d25, q3
-+                add             r10, r0,  #24
-+                vmovn.u32       d27, q14
-+
-+                it              eq
-+                addeq           r4,  r8
-+                vuzp.16         q8,  q11
-+                vuzp.16         q9,  q12
-+                vuzp.16         q10, q13
-+
-+                @ q8   V0, V3,.. -> q0
-+                @ q9   U0, U3...
-+                @ q10  U1, U4...
-+                @ q11  U2, U5,..
-+                @ q12  V1, V4,.. -> q1
-+                @ q13  V2, V5,.. -> q2
-+
-+                subs            r6,  #24
-+                vand            q11, q15
-+                vand            q9,  q15
-+                vand            q10, q15
-+                vand            q0,  q8,  q15
-+                vand            q1,  q12, q15
-+                vand            q2,  q13, q15
-+
-+                blt             2f
-+
-+                vst3.16         {d18, d20, d22}, [r0],  r12
-+                vst3.16         {d19, d21, d23}, [r10]
-+                add             r10, r2,  #24
-+                vst3.16         {d0,  d2,  d4},  [r2],  r12
-+                vst3.16         {d1,  d3,  d5},  [r10]
-+
-+                bne             1b
-+
-+11:
-+                subs            r7,  #1
-+                add             r5,  #128
-+                add             r0,  r1
-+                add             r2,  r3
-+                bne             10b
-+
-+                pop             {r4-r10, pc}
-+
-+@ Partial final write
-+2:
-+                cmp             r6,  #-12
-+                blt             1f
-+                vst3.16         {d18, d20, d22}, [r0]!
-+                vst3.16         {d0,  d2,  d4},  [r2]!
-+                beq             11b
-+                vmov            d18, d19
-+                vmov            d20, d21
-+                vmov            d22, d23
-+                sub             r6,  #12
-+                vmov            d0,  d1
-+                vmov            d2,  d3
-+                vmov            d4,  d5
-+1:
-+                cmp             r6,  #-18
-+                @ Rezip here as it makes the remaining tail handling easier
-+                vzip.16         d0,  d18
-+                vzip.16         d2,  d20
-+                vzip.16         d4,  d22
-+                blt             1f
-+                vst3.16         {d0[1],  d2[1],  d4[1]},  [r0]!
-+                vst3.16         {d0[0],  d2[0],  d4[0]},  [r2]!
-+                vst3.16         {d0[3],  d2[3],  d4[3]},  [r0]!
-+                vst3.16         {d0[2],  d2[2],  d4[2]},  [r2]!
-+                beq             11b
-+                vmov            d0,  d18
-+                vmov            d2,  d20
-+                sub             r6,  #6
-+                vmov            d4,  d22
-+1:
-+                cmp             r6,  #-21
-+                blt             1f
-+                vst3.16         {d0[1], d2[1], d4[1]}, [r0]!
-+                vst3.16         {d0[0], d2[0], d4[0]}, [r2]!
-+                beq             11b
-+                vmov            s4,  s5
-+                sub             r6,  #3
-+                vmov            s0,  s1
-+1:
-+                cmp             r6,  #-22
-+                blt             1f
-+                vst2.16         {d0[1], d2[1]}, [r0]!
-+                vst2.16         {d0[0], d2[0]}, [r2]!
-+                b               11b
-+1:
-+                vst1.16         {d0[1]}, [r0]!
-+                vst1.16         {d0[0]}, [r2]!
-+                b               11b
-+
-+endfunc
-+
-+@ void ff_rpi_sand30_lines_to_planar_p010(
-+@   uint8_t * dest,             // [r0]
-+@   unsigned int dst_stride,    // [r1]
-+@   const uint8_t * src,        // [r2]
-+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
-+@   unsigned int src_stride2,   // [sp, #0]  -> r3
-+@   unsigned int _x,            // [sp, #4]  Ignored - 0
-+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
-+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-+@   unsigned int h);            // [sp, #16] -> r7
-+@
-+@ Assumes that we are starting on a stripe boundary and that overreading
-+@ within the stripe is OK. However it does respect the dest size for writing
-+
-+function ff_rpi_sand30_lines_to_planar_p010, export=1
-+                push            {r4-r8, lr}     @ +24
-+                ldr             r3,  [sp, #24]
-+                ldr             r6,  [sp, #36]
-+                ldr             r7,  [sp, #32]  @ y
-+                mov             r12, #48
-+                vmov.u16        q15, #0xffc0
-+                sub             r3,  #1
-+                lsl             r3,  #7
-+                sub             r1,  r1,  r6,  lsl #1
-+                add             r8,  r2,  r7,  lsl #7
-+                ldr             r7,  [sp, #40]
-+
-+10:
-+                mov             r2,  r8
-+                add             r4,  r0,  #24
-+                mov             r5,  r6
-+                mov             lr,  #0
-+1:
-+                vldm            r2!, {q10-q13}
-+                add             lr,  #64
-+
-+                vshl.u32        q14, q10, #6
-+                ands            lr,  #127
-+                vshrn.u32       d4,  q10, #14
-+                vshrn.u32       d2,  q10, #4
-+                vmovn.u32       d0,  q14
-+
-+                vshl.u32        q14, q11, #6
-+                it              eq
-+                addeq           r2,  r3
-+                vshrn.u32       d5,  q11, #14
-+                vshrn.u32       d3,  q11, #4
-+                vmovn.u32       d1,  q14
-+
-+                subs            r5,  #48
-+                vand            q2,  q15
-+                vand            q1,  q15
-+                vand            q0,  q15
-+
-+                vshl.u32        q14, q12, #6
-+                vshrn.u32       d20, q12, #14
-+                vshrn.u32       d18, q12, #4
-+                vmovn.u32       d16, q14
-+
-+                vshl.u32        q14, q13, #6
-+                vshrn.u32       d21, q13, #14
-+                vshrn.u32       d19, q13, #4
-+                vmovn.u32       d17, q14
-+
-+                vand            q10, q15
-+                vand            q9,  q15
-+                vand            q8,  q15
-+                blt             2f
-+
-+                vst3.16         {d0,  d2,  d4},  [r0], r12
-+                vst3.16         {d1,  d3,  d5},  [r4], r12
-+                vst3.16         {d16, d18, d20}, [r0], r12
-+                vst3.16         {d17, d19, d21}, [r4], r12
-+
-+                bne             1b
-+
-+11:
-+                subs            r7,  #1
-+                add             r0,  r1
-+                add             r8,  #128
-+                bne             10b
-+
-+                pop             {r4-r8, pc}
-+
-+@ Partial final write
-+2:
-+                cmp             r5,  #24-48
-+                blt             1f
-+                vst3.16         {d0,  d2,  d4},  [r0], r12
-+                vst3.16         {d1,  d3,  d5},  [r4]
-+                beq             11b
-+                vmov            q0,  q8
-+                sub             r5,  #24
-+                vmov            q1,  q9
-+                vmov            q2,  q10
-+1:
-+                cmp             r5,  #12-48
-+                blt             1f
-+                vst3.16         {d0,  d2,  d4},  [r0]!
-+                beq             11b
-+                vmov            d0, d1
-+                sub             r5, #12
-+                vmov            d2, d3
-+                vmov            d4, d5
-+1:
-+                cmp             r5,  #6-48
-+                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
-+                blt             1f
-+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
-+                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
-+                add             r0,  #12
-+                beq             11b
-+                vmov            s0,  s1
-+                sub             r5,  #6
-+                vmov            s4,  s5
-+                vmov            s8,  s9
-+1:
-+                cmp             r5, #3-48
-+                blt             1f
-+                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
-+                beq             11b
-+                sub             r5, #3
-+                vshr.u32        d0, #16
-+                vshr.u32        d2, #16
-+1:
-+                cmp             r5, #2-48
-+                blt             1f
-+                vst2.16         {d0[0], d2[0]}, [r0]!
-+                b               11b
-+1:
-+                vst1.16         {d0[0]}, [r0]!
-+                b               11b
-+
-+endfunc
-+
-+
-+
-diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h
-new file mode 100644
-index 000000000000..447f367bea8f
---- /dev/null
-+++ b/libavutil/arm/rpi_sand_neon.h
-@@ -0,0 +1,99 @@
-+/*
-+Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+    * Redistributions of source code must retain the above copyright
-+      notice, this list of conditions and the following disclaimer.
-+    * Redistributions in binary form must reproduce the above copyright
-+      notice, this list of conditions and the following disclaimer in the
-+      documentation and/or other materials provided with the distribution.
-+    * Neither the name of the copyright holder nor the
-+      names of its contributors may be used to endorse or promote products
-+      derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+Authors: John Cox
-+*/
-+
-+#ifndef AVUTIL_ARM_SAND_NEON_H
-+#define AVUTIL_ARM_SAND_NEON_H
-+
-+void ff_rpi_sand128b_stripe_to_8_10(
-+  uint8_t * dest,             // [r0]
-+  const uint8_t * src1,       // [r1]
-+  const uint8_t * src2,       // [r2]
-+  unsigned int lines);        // [r3]
-+
-+void ff_rpi_sand8_lines_to_planar_y8(
-+  uint8_t * dest,             // [r0]
-+  unsigned int dst_stride,    // [r1]
-+  const uint8_t * src,        // [r2]
-+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
-+  unsigned int src_stride2,   // [sp, #0]  -> r3
-+  unsigned int _x,            // [sp, #4]  Ignored - 0
-+  unsigned int y,             // [sp, #8]  (r7 in prefix)
-+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-+  unsigned int h);            // [sp, #16] -> r7
-+
-+void ff_rpi_sand8_lines_to_planar_c8(
-+  uint8_t * dst_u,            // [r0]
-+  unsigned int dst_stride_u,  // [r1]
-+  uint8_t * dst_v,            // [r2]
-+  unsigned int dst_stride_v,  // [r3]
-+  const uint8_t * src,        // [sp, #0]  -> r4, r5
-+  unsigned int stride1,       // [sp, #4]  128
-+  unsigned int stride2,       // [sp, #8]  -> r8
-+  unsigned int _x,            // [sp, #12] 0
-+  unsigned int y,             // [sp, #16] (r7 in prefix)
-+  unsigned int _w,            // [sp, #20] -> r12, r6
-+  unsigned int h);            // [sp, #24] -> r7
-+
-+void ff_rpi_sand30_lines_to_planar_y16(
-+  uint8_t * dest,             // [r0]
-+  unsigned int dst_stride,    // [r1]
-+  const uint8_t * src,        // [r2]
-+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
-+  unsigned int src_stride2,   // [sp, #0]  -> r3
-+  unsigned int _x,            // [sp, #4]  Ignored - 0
-+  unsigned int y,             // [sp, #8]  (r7 in prefix)
-+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-+  unsigned int h);            // [sp, #16] -> r7
-+
-+void ff_rpi_sand30_lines_to_planar_c16(
-+  uint8_t * dst_u,            // [r0]
-+  unsigned int dst_stride_u,  // [r1]
-+  uint8_t * dst_v,            // [r2]
-+  unsigned int dst_stride_v,  // [r3]
-+  const uint8_t * src,        // [sp, #0]  -> r4, r5
-+  unsigned int stride1,       // [sp, #4]  128
-+  unsigned int stride2,       // [sp, #8]  -> r8
-+  unsigned int _x,            // [sp, #12] 0
-+  unsigned int y,             // [sp, #16] (r7 in prefix)
-+  unsigned int _w,            // [sp, #20] -> r6, r9
-+  unsigned int h);            // [sp, #24] -> r7
-+
-+void ff_rpi_sand30_lines_to_planar_p010(
-+  uint8_t * dest,             // [r0]
-+  unsigned int dst_stride,    // [r1]
-+  const uint8_t * src,        // [r2]
-+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
-+  unsigned int src_stride2,   // [sp, #0]  -> r3
-+  unsigned int _x,            // [sp, #4]  Ignored - 0
-+  unsigned int y,             // [sp, #8]  (r7 in prefix)
-+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-+  unsigned int h);            // [sp, #16] -> r7
-+
-+#endif // AVUTIL_ARM_SAND_NEON_H
-+
-diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
-index 62a2ae08d907..cb73521ea75c 100644
---- a/libavutil/pixdesc.c
-+++ b/libavutil/pixdesc.c
-@@ -2717,6 +2717,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
-         .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT |
-                  AV_PIX_FMT_FLAG_ALPHA,
-     },
-+    [AV_PIX_FMT_SAND128] = {
-+        .name = "sand128",
-+        .nb_components = 3,
-+        .log2_chroma_w = 1,
-+        .log2_chroma_h = 1,
-+        .comp = {
-+            { 0, 1, 0, 0, 8 },        /* Y */
-+            { 1, 2, 0, 0, 8 },        /* U */
-+            { 1, 2, 1, 0, 8 },        /* V */
-+        },
-+        .flags = 0,
-+    },
-+    [AV_PIX_FMT_SAND64_10] = {
-+        .name = "sand64_10",
-+        .nb_components = 3,
-+        .log2_chroma_w = 1,
-+        .log2_chroma_h = 1,
-+        .comp = {
-+            { 0, 2, 0, 0, 10 },        /* Y */
-+            { 1, 4, 0, 0, 10 },        /* U */
-+            { 1, 4, 2, 0, 10 },        /* V */
-+        },
-+        .flags = 0,
-+    },
-+    [AV_PIX_FMT_SAND64_16] = {
-+        .name = "sand64_16",
-+        .nb_components = 3,
-+        .log2_chroma_w = 1,
-+        .log2_chroma_h = 1,
-+        .comp = {
-+            { 0, 2, 0, 0, 16 },        /* Y */
-+            { 1, 4, 0, 0, 16 },        /* U */
-+            { 1, 4, 2, 0, 16 },        /* V */
-+        },
-+        .flags = 0,
-+    },
-+    [AV_PIX_FMT_RPI4_8] = {
-+        .name = "rpi4_8",
-+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-+    },
-+    [AV_PIX_FMT_RPI4_10] = {
-+        .name = "rpi4_10",
-+        .flags = AV_PIX_FMT_FLAG_HWACCEL,
-+    },
- };
- 
- static const char * const color_range_names[] = {
-diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
-index 37c2c79e0140..22f70007c3df 100644
---- a/libavutil/pixfmt.h
-+++ b/libavutil/pixfmt.h
-@@ -377,6 +377,12 @@ enum AVPixelFormat {
- 
-     AV_PIX_FMT_Y210BE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian
-     AV_PIX_FMT_Y210LE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian
-+// RPI - not on ifdef so can be got at by calling progs
-+    AV_PIX_FMT_SAND128,    ///< 4:2:0  8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
-+    AV_PIX_FMT_SAND64_10,  ///< 4:2:0 10-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
-+    AV_PIX_FMT_SAND64_16,  ///< 4:2:0 16-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
-+    AV_PIX_FMT_RPI4_8,
-+    AV_PIX_FMT_RPI4_10,
- 
-     AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined
-     AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined
-diff --git a/libavutil/rpi_sand_fn_pw.h b/libavutil/rpi_sand_fn_pw.h
-new file mode 100644
-index 000000000000..0324f6826dde
---- /dev/null
-+++ b/libavutil/rpi_sand_fn_pw.h
-@@ -0,0 +1,227 @@
-+/*
-+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+    * Redistributions of source code must retain the above copyright
-+      notice, this list of conditions and the following disclaimer.
-+    * Redistributions in binary form must reproduce the above copyright
-+      notice, this list of conditions and the following disclaimer in the
-+      documentation and/or other materials provided with the distribution.
-+    * Neither the name of the copyright holder nor the
-+      names of its contributors may be used to endorse or promote products
-+      derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+Authors: John Cox
-+*/
-+
-+// * Included twice from rpi_sand_fn with different PW
-+
-+#define STRCAT(x,y) x##y
-+
-+#if PW == 1
-+#define pixel uint8_t
-+#define FUNC(f) STRCAT(f, 8)
-+#elif PW == 2
-+#define pixel uint16_t
-+#define FUNC(f) STRCAT(f, 16)
-+#else
-+#error Unexpected PW
-+#endif
-+
-+// Fetches a single patch - offscreen fixup not done here
-+// w <= stride1
-+// unclipped
-+void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h)
-+{
-+    const unsigned int x = _x;
-+    const unsigned int w = _w;
-+    const unsigned int mask = stride1 - 1;
-+
-+#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64)
-+    if (_x == 0) {
-+        ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride,
-+                                     src, stride1, stride2, _x, y, _w, h);
-+        return;
-+    }
-+#endif
-+
-+    if ((x & ~mask) == ((x + w) & ~mask)) {
-+        // All in one sand stripe
-+        const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
-+        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) {
-+            memcpy(dst, p, w);
-+        }
-+    }
-+    else
-+    {
-+        // Two+ stripe
-+        const unsigned int sstride = stride1 * stride2;
-+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
-+        const uint8_t * p2 = p1 + sstride - (x & mask);
-+        const unsigned int w1 = stride1 - (x & mask);
-+        const unsigned int w3 = (x + w) & mask;
-+        const unsigned int w2 = w - (w1 + w3);
-+
-+        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) {
-+            unsigned int j;
-+            const uint8_t * p = p2;
-+            uint8_t * d = dst;
-+            memcpy(d, p1, w1);
-+            d += w1;
-+            for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) {
-+                memcpy(d, p, stride1);
-+            }
-+            memcpy(d, p, w3);
-+        }
-+    }
-+}
-+
-+// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V)
-+
-+void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u,
-+                             uint8_t * dst_v, const unsigned int dst_stride_v,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h)
-+{
-+    const unsigned int x = _x * 2;
-+    const unsigned int w = _w * 2;
-+    const unsigned int mask = stride1 - 1;
-+
-+#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64)
-+    if (_x == 0) {
-+        ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v,
-+                                     src, stride1, stride2, _x, y, _w, h);
-+        return;
-+    }
-+#endif
-+
-+    if ((x & ~mask) == ((x + w) & ~mask)) {
-+        // All in one sand stripe
-+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
-+        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) {
-+            pixel * du = (pixel *)dst_u;
-+            pixel * dv = (pixel *)dst_v;
-+            const pixel * p = (const pixel *)p1;
-+            for (unsigned int k = 0; k < w; k += 2 * PW) {
-+                *du++ = *p++;
-+                *dv++ = *p++;
-+            }
-+        }
-+    }
-+    else
-+    {
-+        // Two+ stripe
-+        const unsigned int sstride = stride1 * stride2;
-+        const unsigned int sstride_p = (sstride - stride1) / PW;
-+
-+        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
-+        const uint8_t * p2 = p1 + sstride - (x & mask);
-+        const unsigned int w1 = stride1 - (x & mask);
-+        const unsigned int w3 = (x + w) & mask;
-+        const unsigned int w2 = w - (w1 + w3);
-+
-+        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) {
-+            unsigned int j;
-+            const pixel * p = (const pixel *)p1;
-+            pixel * du = (pixel *)dst_u;
-+            pixel * dv = (pixel *)dst_v;
-+            for (unsigned int k = 0; k < w1; k += 2 * PW) {
-+                *du++ = *p++;
-+                *dv++ = *p++;
-+            }
-+            for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) {
-+                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
-+                    *du++ = *p++;
-+                    *dv++ = *p++;
-+                }
-+            }
-+            for (unsigned int k = 0; k < w3; k += 2 * PW) {
-+                *du++ = *p++;
-+                *dv++ = *p++;
-+            }
-+        }
-+    }
-+}
-+
-+void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c,
-+                             unsigned int stride1, unsigned int stride2,
-+                             const uint8_t * src_u, const unsigned int src_stride_u,
-+                             const uint8_t * src_v, const unsigned int src_stride_v,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h)
-+{
-+    const unsigned int x = _x * 2;
-+    const unsigned int w = _w * 2;
-+    const unsigned int mask = stride1 - 1;
-+    if ((x & ~mask) == ((x + w) & ~mask)) {
-+        // All in one sand stripe
-+        uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
-+        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) {
-+            const pixel * su = (const pixel *)src_u;
-+            const pixel * sv = (const pixel *)src_v;
-+            pixel * p = (pixel *)p1;
-+            for (unsigned int k = 0; k < w; k += 2 * PW) {
-+                *p++ = *su++;
-+                *p++ = *sv++;
-+            }
-+        }
-+    }
-+    else
-+    {
-+        // Two+ stripe
-+        const unsigned int sstride = stride1 * stride2;
-+        const unsigned int sstride_p = (sstride - stride1) / PW;
-+
-+        const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
-+        const uint8_t * p2 = p1 + sstride - (x & mask);
-+        const unsigned int w1 = stride1 - (x & mask);
-+        const unsigned int w3 = (x + w) & mask;
-+        const unsigned int w2 = w - (w1 + w3);
-+
-+        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) {
-+            unsigned int j;
-+            const pixel * su = (const pixel *)src_u;
-+            const pixel * sv = (const pixel *)src_v;
-+            pixel * p = (pixel *)p1;
-+            for (unsigned int k = 0; k < w1; k += 2 * PW) {
-+                *p++ = *su++;
-+                *p++ = *sv++;
-+            }
-+            for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) {
-+                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
-+                    *p++ = *su++;
-+                    *p++ = *sv++;
-+                }
-+            }
-+            for (unsigned int k = 0; k < w3; k += 2 * PW) {
-+                *p++ = *su++;
-+                *p++ = *sv++;
-+            }
-+        }
-+    }
-+}
-+
-+
-+#undef pixel
-+#undef STRCAT
-+#undef FUNC
-+
-diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
-new file mode 100644
-index 000000000000..ed0261b02f07
---- /dev/null
-+++ b/libavutil/rpi_sand_fns.c
-@@ -0,0 +1,353 @@
-+/*
-+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+    * Redistributions of source code must retain the above copyright
-+      notice, this list of conditions and the following disclaimer.
-+    * Redistributions in binary form must reproduce the above copyright
-+      notice, this list of conditions and the following disclaimer in the
-+      documentation and/or other materials provided with the distribution.
-+    * Neither the name of the copyright holder nor the
-+      names of its contributors may be used to endorse or promote products
-+      derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+Authors: John Cox
-+*/
-+
-+#include "config.h"
-+#include <stdint.h>
-+#include <string.h>
-+#include "rpi_sand_fns.h"
-+#include "avassert.h"
-+#include "frame.h"
-+
-+#if ARCH_ARM && HAVE_NEON
-+#include "arm/rpi_sand_neon.h"
-+#define HAVE_SAND_ASM 1
-+#else
-+#define HAVE_SAND_ASM 0
-+#endif
-+
-+#define PW 1
-+#include "rpi_sand_fn_pw.h"
-+#undef PW
-+
-+#define PW 2
-+#include "rpi_sand_fn_pw.h"
-+#undef PW
-+
-+#if 1
-+// Simple round
-+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
-+{
-+    const unsigned int rnd = (1 << shr) >> 1;
-+    const uint16_t * src = (const uint16_t *)_src;
-+
-+    for (; n != 0; --n) {
-+        *dst++ = (*src++ + rnd) >> shr;
-+    }
-+}
-+#else
-+// Dithered variation
-+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
-+{
-+    unsigned int rnd = (1 << shr) >> 1;
-+    const unsigned int mask = ((1 << shr) - 1);
-+    const uint16_t * src = (const uint16_t *)_src;
-+
-+    for (; n != 0; --n) {
-+        rnd = *src++ + (rnd & mask);
-+        *dst++ = rnd >> shr;
-+    }
-+}
-+#endif
-+
-+// Fetches a single patch - offscreen fixup not done here
-+// w <= stride1
-+// unclipped
-+// _x & _w in pixels, strides in bytes
-+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h)
-+{
-+    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
-+    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
-+    const unsigned int x1 = ((_x + _w) / 3) * 4;
-+    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
-+    const unsigned int mask = stride1 - 1;
-+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
-+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
-+
-+#if HAVE_SAND_ASM
-+    if (_x == 0) {
-+        ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
-+        return;
-+    }
-+#endif
-+
-+    if (x0 == x1) {
-+        // *******************
-+        // Partial single word xfer
-+        return;
-+    }
-+
-+    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
-+    {
-+        unsigned int x = x0;
-+        const uint32_t * p = (const uint32_t *)p0;
-+        uint16_t * d = (uint16_t *)dst;
-+
-+        if (xskip0 != 0) {
-+            const uint32_t p3 = *p++;
-+
-+            if (xskip0 == 1)
-+                *d++ = (p3 >> 10) & 0x3ff;
-+            *d++ = (p3 >> 20) & 0x3ff;
-+
-+            if (((x += 4) & mask) == 0)
-+                p += slice_inc;
-+        }
-+
-+        while (x != x1) {
-+            const uint32_t p3 = *p++;
-+            *d++ = p3 & 0x3ff;
-+            *d++ = (p3 >> 10) & 0x3ff;
-+            *d++ = (p3 >> 20) & 0x3ff;
-+
-+            if (((x += 4) & mask) == 0)
-+                p += slice_inc;
-+        }
-+
-+        if (xrem1 != 0) {
-+            const uint32_t p3 = *p;
-+
-+            *d++ = p3 & 0x3ff;
-+            if (xrem1 == 2)
-+                *d++ = (p3 >> 10) & 0x3ff;
-+        }
-+    }
-+}
-+
-+
-+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
-+                             uint8_t * dst_v, const unsigned int dst_stride_v,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h)
-+{
-+    const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word
-+    const unsigned int xskip0 = _x - (x0 >> 3) * 3;
-+    const unsigned int x1 = ((_x + _w) / 3) * 8;
-+    const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3;
-+    const unsigned int mask = stride1 - 1;
-+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
-+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
-+
-+#if HAVE_SAND_ASM
-+    if (_x == 0) {
-+        ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v,
-+                                       src, stride1, stride2, _x, y, _w, h);
-+        return;
-+    }
-+#endif
-+
-+    if (x0 == x1) {
-+        // *******************
-+        // Partial single word xfer
-+        return;
-+    }
-+
-+    for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1)
-+    {
-+        unsigned int x = x0;
-+        const uint32_t * p = (const uint32_t *)p0;
-+        uint16_t * du = (uint16_t *)dst_u;
-+        uint16_t * dv = (uint16_t *)dst_v;
-+
-+        if (xskip0 != 0) {
-+            const uint32_t p3a = *p++;
-+            const uint32_t p3b = *p++;
-+
-+            if (xskip0 == 1)
-+            {
-+                *du++ = (p3a >> 20) & 0x3ff;
-+                *dv++ = (p3b >>  0) & 0x3ff;
-+            }
-+            *du++ = (p3b >> 10) & 0x3ff;
-+            *dv++ = (p3b >> 20) & 0x3ff;
-+
-+            if (((x += 8) & mask) == 0)
-+                p += slice_inc;
-+        }
-+
-+        while (x != x1) {
-+            const uint32_t p3a = *p++;
-+            const uint32_t p3b = *p++;
-+
-+            *du++ = p3a & 0x3ff;
-+            *dv++ = (p3a >> 10) & 0x3ff;
-+            *du++ = (p3a >> 20) & 0x3ff;
-+            *dv++ = p3b & 0x3ff;
-+            *du++ = (p3b >> 10) & 0x3ff;
-+            *dv++ = (p3b >> 20) & 0x3ff;
-+
-+            if (((x += 8) & mask) == 0)
-+                p += slice_inc;
-+        }
-+
-+        if (xrem1 != 0) {
-+            const uint32_t p3a = *p++;
-+            const uint32_t p3b = *p++;
-+
-+            *du++ = p3a & 0x3ff;
-+            *dv++ = (p3a >> 10) & 0x3ff;
-+            if (xrem1 == 2)
-+            {
-+                *du++ = (p3a >> 20) & 0x3ff;
-+                *dv++ = p3b & 0x3ff;
-+            }
-+        }
-+    }
-+}
-+
-+
-+// w/h in pixels
-+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
-+                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
-+                         unsigned int w, unsigned int h, const unsigned int shr)
-+{
-+    const unsigned int n = dst_stride1 / 2;
-+    unsigned int j;
-+
-+    // This is true for our current layouts
-+    av_assert0(dst_stride1 == src_stride1);
-+
-+    // As we have the same stride1 for src & dest and src is wider than dest
-+    // then if we loop on src we can always write contiguously to dest
-+    // We make no effort to copy an exact width - round up to nearest src stripe
-+    // as we will always have storage in dest for that
-+
-+#if ARCH_ARM && HAVE_NEON
-+    if (shr == 3 && src_stride1 == 128) {
-+        for (j = 0; j + n < w; j += dst_stride1) {
-+            uint8_t * d = dst + j * dst_stride2;
-+            const uint8_t * s1 = src + j * 2 * src_stride2;
-+            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
-+
-+            ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h);
-+        }
-+    }
-+    else
-+#endif
-+    {
-+        for (j = 0; j + n < w; j += dst_stride1) {
-+            uint8_t * d = dst + j * dst_stride2;
-+            const uint8_t * s1 = src + j * 2 * src_stride2;
-+            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
-+
-+            for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) {
-+                cpy16_to_8(d, s1, n, shr);
-+                cpy16_to_8(d + n, s2, n, shr);
-+            }
-+        }
-+    }
-+
-+    // Fix up a trailing dest half stripe
-+    if (j < w) {
-+        uint8_t * d = dst + j * dst_stride2;
-+        const uint8_t * s1 = src + j * 2 * src_stride2;
-+
-+        for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) {
-+            cpy16_to_8(d, s1, n, shr);
-+        }
-+    }
-+}
-+
-+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
-+{
-+    const int w = av_frame_cropped_width(src);
-+    const int h = av_frame_cropped_height(src);
-+    const int x = src->crop_left;
-+    const int y = src->crop_top;
-+
-+    // We will crop as part of the conversion
-+    dst->crop_top = 0;
-+    dst->crop_left = 0;
-+    dst->crop_bottom = 0;
-+    dst->crop_right = 0;
-+
-+    switch (src->format){
-+        case AV_PIX_FMT_SAND128:
-+        case AV_PIX_FMT_RPI4_8:
-+            switch (dst->format){
-+                case AV_PIX_FMT_YUV420P:
-+                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
-+                                             src->data[0],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x, y, w, h);
-+                    av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
-+                                             dst->data[2], dst->linesize[2],
-+                                             src->data[1],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x/2, y/2,  w/2, h/2);
-+                    break;
-+                default:
-+                    return -1;
-+            }
-+            break;
-+        case AV_PIX_FMT_SAND64_10:
-+            switch (dst->format){
-+                case AV_PIX_FMT_YUV420P10:
-+                    av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0],
-+                                             src->data[0],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x*2, y, w*2, h);
-+                    av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1],
-+                                             dst->data[2], dst->linesize[2],
-+                                             src->data[1],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x, y/2,  w, h/2);
-+                    break;
-+                default:
-+                    return -1;
-+            }
-+            break;
-+        case AV_PIX_FMT_RPI4_10:
-+            switch (dst->format){
-+                case AV_PIX_FMT_YUV420P10:
-+                    av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
-+                                             src->data[0],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x, y, w, h);
-+                    av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
-+                                             dst->data[2], dst->linesize[2],
-+                                             src->data[1],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x/2, y/2, w/2, h/2);
-+                    break;
-+                default:
-+                    return -1;
-+            }
-+            break;
-+        default:
-+            return -1;
-+    }
-+
-+    return av_frame_copy_props(dst, src);
-+}
-diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h
-new file mode 100644
-index 000000000000..634b55e800dc
---- /dev/null
-+++ b/libavutil/rpi_sand_fns.h
-@@ -0,0 +1,183 @@
-+/*
-+Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
-+All rights reserved.
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+    * Redistributions of source code must retain the above copyright
-+      notice, this list of conditions and the following disclaimer.
-+    * Redistributions in binary form must reproduce the above copyright
-+      notice, this list of conditions and the following disclaimer in the
-+      documentation and/or other materials provided with the distribution.
-+    * Neither the name of the copyright holder nor the
-+      names of its contributors may be used to endorse or promote products
-+      derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+Authors: John Cox
-+*/
-+
-+#ifndef AVUTIL_RPI_SAND_FNS
-+#define AVUTIL_RPI_SAND_FNS
-+
-+#include "libavutil/frame.h"
-+
-+// For all these fns _x & _w are measured as coord * PW
-+// For the C fns coords are in chroma pels (so luma / 2)
-+// Strides are in bytes
-+
-+void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
-+void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
-+
-+void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u,
-+                             uint8_t * dst_v, const unsigned int dst_stride_v,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
-+void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
-+                             uint8_t * dst_v, const unsigned int dst_stride_v,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
-+
-+void av_rpi_planar_to_sand_c8(uint8_t * dst_c,
-+                             unsigned int stride1, unsigned int stride2,
-+                             const uint8_t * src_u, const unsigned int src_stride_u,
-+                             const uint8_t * src_v, const unsigned int src_stride_v,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
-+void av_rpi_planar_to_sand_c16(uint8_t * dst_c,
-+                             unsigned int stride1, unsigned int stride2,
-+                             const uint8_t * src_u, const unsigned int src_stride_u,
-+                             const uint8_t * src_v, const unsigned int src_stride_v,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
-+
-+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
-+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
-+                             uint8_t * dst_v, const unsigned int dst_stride_v,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
-+
-+
-+// w/h in pixels
-+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
-+                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
-+                         unsigned int w, unsigned int h, const unsigned int shr);
-+
-+
-+// dst must contain required pixel format & allocated data buffers
-+// Cropping on the src buffer will be honoured and dst crop will be set to zero
-+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src);
-+
-+
-+static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame)
-+{
-+#ifdef RPI_ZC_SAND128_ONLY
-+    // If we are sure we only only support 128 byte sand formats replace the
-+    // var with a constant which should allow for better optimisation
-+    return 128;
-+#else
-+    return frame->linesize[0];
-+#endif
-+}
-+
-+static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame)
-+{
-+    return frame->linesize[3];
-+}
-+
-+
-+static inline int av_rpi_is_sand_format(const int format)
-+{
-+    return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10);
-+}
-+
-+static inline int av_rpi_is_sand_frame(const AVFrame * const frame)
-+{
-+    return av_rpi_is_sand_format(frame->format);
-+}
-+
-+static inline int av_rpi_is_sand8_frame(const AVFrame * const frame)
-+{
-+    return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8);
-+}
-+
-+static inline int av_rpi_is_sand16_frame(const AVFrame * const frame)
-+{
-+    return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16);
-+}
-+
-+static inline int av_rpi_is_sand30_frame(const AVFrame * const frame)
-+{
-+    return (frame->format == AV_PIX_FMT_RPI4_10);
-+}
-+
-+static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame)
-+{
-+    return av_rpi_is_sand8_frame(frame) ? 0 : 1;
-+}
-+
-+// If x is measured in bytes (not pixels) then this works for sand64_16 as
-+// well as sand128 - but in the general case we work that out
-+
-+static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y)
-+{
-+    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
-+    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
-+    const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame);
-+    const unsigned int x1 = x & (stride1 - 1);
-+    const unsigned int x2 = x ^ x1;
-+
-+    return x1 + stride1 * y + stride2 * x2;
-+}
-+
-+static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c)
-+{
-+    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
-+    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
-+    const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1);
-+    const unsigned int x1 = x & (stride1 - 1);
-+    const unsigned int x2 = x ^ x1;
-+
-+    return x1 + stride1 * y_c + stride2 * x2;
-+}
-+
-+static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y)
-+{
-+    return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y);
-+}
-+
-+static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y)
-+{
-+    return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y);
-+}
-+
-+#endif
-+
-
-From c1b879de52690fb978f344b05cb213c34c35021f Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 11:36:47 +0100
-Subject: [PATCH 003/186] Add aarch64 asm sand conv functions
-
-Many thanks to eiler.mike@gmail.com (Michael Eiler) for these
-optimizations
----
- libavutil/aarch64/Makefile        |   2 +
- libavutil/aarch64/rpi_sand_neon.S | 676 ++++++++++++++++++++++++++++++
- libavutil/aarch64/rpi_sand_neon.h |  55 +++
- libavutil/rpi_sand_fn_pw.h        |   4 +-
- libavutil/rpi_sand_fns.c          |   3 +
- 5 files changed, 738 insertions(+), 2 deletions(-)
- create mode 100644 libavutil/aarch64/rpi_sand_neon.S
- create mode 100644 libavutil/aarch64/rpi_sand_neon.h
-
-diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
-index eba01513379a..1b44beab3942 100644
---- a/libavutil/aarch64/Makefile
-+++ b/libavutil/aarch64/Makefile
-@@ -4,3 +4,5 @@ OBJS += aarch64/cpu.o                                                 \
- 
- NEON-OBJS += aarch64/float_dsp_neon.o                                 \
-              aarch64/tx_float_neon.o                                  \
-+             aarch64/rpi_sand_neon.o                                  \
-+
-diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
-new file mode 100644
-index 000000000000..cdcf71ee6740
---- /dev/null
-+++ b/libavutil/aarch64/rpi_sand_neon.S
-@@ -0,0 +1,676 @@
-+/*
-+Copyright (c) 2021 Michael Eiler
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+    * Redistributions of source code must retain the above copyright
-+      notice, this list of conditions and the following disclaimer.
-+    * Redistributions in binary form must reproduce the above copyright
-+      notice, this list of conditions and the following disclaimer in the
-+      documentation and/or other materials provided with the distribution.
-+    * Neither the name of the copyright holder nor the
-+      names of its contributors may be used to endorse or promote products
-+      derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+Authors: Michael Eiler <eiler.mike@gmail.com>
-+*/
-+
-+#include "asm.S"
-+
-+// void ff_rpi_sand8_lines_to_planar_y8(
-+//   uint8_t * dest,            : x0
-+//   unsigned int dst_stride,   : w1
-+//   const uint8_t * src,       : x2
-+//   unsigned int src_stride1,  : w3, always 128
-+//   unsigned int src_stride2,  : w4
-+//   unsigned int _x,           : w5
-+//   unsigned int y,            : w6
-+//   unsigned int _w,           : w7
-+//   unsigned int h);           : [sp, #0]
-+
-+function ff_rpi_sand8_lines_to_planar_y8, export=1
-+    // w15 contains the number of rows we need to process
-+    ldr w15, [sp, #0]
-+
-+    // w8 will contain the number of blocks per row
-+    // w8 = floor(_w/stride1)
-+    // stride1 is assumed to always be 128
-+    mov w8, w1
-+    lsr w8, w8, #7
-+
-+    // in case the width of the image is not a multiple of 128, there will
-+    // be an incomplete block at the end of every row
-+    // w9 contains the number of pixels stored within this block
-+    // w9 = _w - w8 * 128
-+    lsl w9, w8, #7
-+    sub w9, w7, w9
-+
-+    // this is the value we have to add to the src pointer after reading a complete block
-+    // it will move the address to the start of the next block
-+    // w10 = stride2 * stride1 - stride1 
-+    mov w10, w4
-+    lsl w10, w10, #7
-+    sub w10, w10, #128
-+
-+    // w11 is the row offset, meaning the start offset of the first block of every collumn
-+    // this will be increased with stride1 within every iteration of the row_loop
-+    eor w11, w11, w11
-+
-+    // w12 = 0, processed row count
-+    eor w12, w12, w12
-+row_loop:
-+    // start of the first block within the current row
-+    // x13 = row offset + src
-+    mov x13, x2
-+    add x13, x13, x11
-+
-+    // w14 = 0, processed block count
-+    eor w14, w14, w14
-+
-+    cmp w8, #0
-+    beq no_main_y8
-+
-+block_loop:
-+    // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128
-+    // fortunately these aren't callee saved ones, meaning we don't need to backup them
-+    ld1 { v0.16b,  v1.16b,  v2.16b,  v3.16b}, [x13], #64
-+    ld1 { v4.16b,  v5.16b,  v6.16b,  v7.16b}, [x13], #64 
-+
-+    // write these registers back to the destination vector and increase the dst address by 128
-+    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
-+    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x0], #64
-+
-+    // move the source register to the beginning of the next block (x13 = src + block offset)
-+    add x13, x13, x10
-+    // increase the block counter
-+    add w14, w14, #1
-+
-+    // continue with the block_loop if we haven't copied all full blocks yet
-+    cmp w8, w14
-+    bgt block_loop
-+
-+    // handle the last block at the end of each row
-+    // at most 127 byte values copied from src to dst
-+no_main_y8:
-+    eor w5, w5, w5 // i = 0
-+incomplete_block_loop_y8:
-+    cmp w5, w9
-+    bge incomplete_block_loop_end_y8
-+
-+    ldrb w6, [x13]
-+    strb w6, [x0]
-+    add x13, x13, #1
-+    add x0, x0, #1
-+
-+    add w5, w5, #1
-+    b incomplete_block_loop_y8
-+incomplete_block_loop_end_y8:
-+    
-+   
-+    // increase the row offset by 128 (stride1) 
-+    add w11, w11, #128
-+    // increment the row counter
-+    add w12, w12, #1
-+    
-+    // process the next row if we haven't finished yet
-+    cmp w15, w12
-+    bgt row_loop
-+
-+    ret
-+endfunc
-+
-+
-+
-+// void ff_rpi_sand8_lines_to_planar_c8(
-+//   uint8_t * dst_u,           : x0
-+//   unsigned int dst_stride_u, : w1 == width
-+//   uint8_t * dst_v,           : x2
-+//   unsigned int dst_stride_v, : w3 == width
-+//   const uint8_t * src,       : x4
-+//   unsigned int stride1,      : w5 == 128
-+//   unsigned int stride2,      : w6
-+//   unsigned int _x,           : w7
-+//   unsigned int y,            : [sp, #0]
-+//   unsigned int _w,           : [sp, #8]
-+//   unsigned int h);           : [sp, #16]
-+
-+function ff_rpi_sand8_lines_to_planar_c8, export=1
-+    // w7 = width
-+    ldr w7, [sp, #8]
-+
-+    // w15 contains the number of rows we need to process
-+    // counts down
-+    ldr w15, [sp, #16]
-+
-+    // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6
-+    mov w8, w7
-+    lsr w8, w8, #6
-+
-+    // number of pixels in block at the end of every row
-+    // w9 = _w - (w8 * 64)
-+    lsl w9, w8, #6
-+    sub w9, w7, w9
-+
-+    // Skip at the end of the line to account for stride
-+    sub w12, w1, w7
-+
-+    // address delta to the beginning of the next block
-+    // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128
-+    lsl w10, w6, #7
-+    sub w10, w10, #128
-+
-+    // w11 = row address start offset = 0
-+    eor w11, w11, w11
-+
-+row_loop_c8:
-+    // start of the first block within the current row
-+    // x13 = row offset + src
-+    mov x13, x4
-+    add x13, x13, x11
-+
-+    // w14 = 0, processed block count
-+    eor w14, w14, w14
-+
-+    cmp w8, #0
-+    beq no_main_c8
-+
-+block_loop_c8:
-+    // load the full block -> 128 bytes, the block contains 64 interleaved U and V values 
-+    ld2 { v0.16b,  v1.16b }, [x13], #32
-+    ld2 { v2.16b,  v3.16b }, [x13], #32
-+    ld2 { v4.16b,  v5.16b }, [x13], #32
-+    ld2 { v6.16b,  v7.16b }, [x13], #32
-+
-+    // swap register so that we can write them out with a single instruction
-+    mov v16.16b, v1.16b
-+    mov v17.16b, v3.16b
-+    mov v18.16b, v5.16b
-+    mov v1.16b, v2.16b
-+    mov v2.16b, v4.16b
-+    mov v3.16b, v6.16b
-+    mov v4.16b, v16.16b
-+    mov v5.16b, v17.16b
-+    mov v6.16b, v18.16b
-+
-+    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
-+    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x2], #64
-+
-+    // increment row counter and move src to the beginning of the next block
-+    add w14, w14, #1
-+    add x13, x13, x10
-+    
-+    // jump to block_loop_c8 iff the block count is smaller than the number of full blocks
-+    cmp w8, w14
-+    bgt block_loop_c8
-+
-+no_main_c8:
-+    // handle incomplete block at the end of every row
-+    eor w5, w5, w5 // point counter, this might be 
-+incomplete_block_loop_c8:
-+    cmp w5, w9
-+    bge incomplete_block_loop_end_c8
-+
-+    ldrb w1, [x13]
-+    strb w1, [x0]
-+    add x13, x13, #1
-+
-+    ldrb w1, [x13]
-+    strb w1, [x2]
-+    add x13, x13, #1
-+
-+    add x0, x0, #1
-+    add x2, x2, #1
-+
-+    add w5, w5, #1
-+    b incomplete_block_loop_c8
-+incomplete_block_loop_end_c8:
-+
-+    // increase row_offset by stride1
-+    add w11, w11, #128
-+    add x0, x0, w12, sxtw
-+    add x2, x2, w12, sxtw
-+
-+    // jump to row_Loop_c8 iff the row count is small than the height
-+    subs w15, w15, #1
-+    bgt row_loop_c8
-+
-+    ret
-+endfunc
-+
-+//void ff_rpi_sand30_lines_to_planar_y16(
-+//  uint8_t * dest,             // [x0]
-+//  unsigned int dst_stride,    // [w1] -> assumed to be equal to _w
-+//  const uint8_t * src,        // [x2]
-+//  unsigned int src_stride1,   // [w3] -> 128
-+//  unsigned int src_stride2,   // [w4]
-+//  unsigned int _x,            // [w5]
-+//  unsigned int y,             // [w6]
-+//  unsigned int _w,            // [w7]
-+//  unsigned int h);            // [sp, #0]
-+
-+function ff_rpi_sand30_lines_to_planar_y16, export=1
-+    stp x19, x20, [sp, #-48]!
-+    stp x21, x22, [sp, #16]
-+    stp x23, x24, [sp, #32]
-+
-+    // w6 = argument h
-+    ldr w6, [sp, #48]
-+
-+    // slice_inc = ((stride2 - 1) * stride1)
-+    mov w5, w4
-+    sub w5, w5, #1
-+    lsl w5, w5, #7
-+
-+    // total number of bytes per row = (width / 3) * 4
-+    mov w8, w7
-+    mov w9, #3
-+    udiv w8, w8, w9
-+    lsl w8, w8, #2
-+
-+    // number of full 128 byte blocks to be processed
-+    mov w9, #96
-+    udiv w9, w7, w9 // = (width * 4) / (3*128) = width/96
-+
-+    // w10 = number of full integers to process (4 bytes)
-+    // w11 = remaning zero to two 10bit values still to copy over
-+    mov w12, #96
-+    mul w12, w9, w12
-+    sub w12, w7, w12  // width - blocks*96 = remaining points per row
-+    mov w11, #3
-+    udiv w10, w12, w11 // full integers to process = w12 / 3 
-+    mul w11, w10, w11  // #integers *3
-+    sub w11, w12, w11  // remaining 0-2 points = remaining points - integers*3
-+
-+    // increase w9 by one if w10+w11 is not zero, and decrease the row count by one
-+    // this is to efficiently copy incomplete blocks at the end of the rows
-+    // the last row is handled explicitly to avoid writing out of bounds
-+    add w22, w10, w11
-+    cmp w22, #0
-+    cset w22, ne // 1 iff w10+w11 not zero, 0 otherwise
-+    add w9, w9, w22
-+    sub w6, w6, #1
-+
-+    // store the number of bytes in w20 which we copy too much for every row
-+    // when the width of the frame is not a multiple of 96 (128bytes storing 96 10bit values)
-+    mov w20, #96*2
-+    mul w20, w20, w9
-+    sub w20, w1, w20
-+
-+    mov w23, #0 // flag to check whether the last line had already been processed
-+    
-+    // bitmask to clear the uppper 6bits of the result values
-+    mov x19, #0x03ff03ff03ff03ff
-+    dup v22.2d, x19
-+
-+    // row counter = 0
-+    eor w12, w12, w12
-+row_loop_y16:
-+    cmp w12, w6               // jump to row_loop_y16_fin if we processed all rows
-+    bge row_loop_y16_fin
-+
-+    mov x13, x2               // row src
-+    eor w14, w14, w14         // full block counter
-+block_loop_y16:
-+    cmp w14, w9
-+    bge block_loop_y16_fin
-+
-+    // load 64 bytes
-+    ld1 { v0.4s,  v1.4s, v2.4s, v3.4s }, [x13], #64
-+   
-+    // process v0 and v1
-+    xtn v16.4h, v0.4s
-+    ushr v0.4s, v0.4s, #10
-+    xtn v17.4h, v0.4s
-+    ushr v0.4s, v0.4s, #10
-+    xtn v18.4h, v0.4s
-+   
-+    xtn2 v16.8h, v1.4s
-+    and v16.16b, v16.16b, v22.16b
-+    ushr v1.4s, v1.4s, #10
-+    xtn2 v17.8h, v1.4s
-+    and v17.16b, v17.16b, v22.16b
-+    ushr v1.4s, v1.4s, #10
-+    xtn2 v18.8h, v1.4s
-+    and v18.16b, v18.16b, v22.16b
-+
-+    st3 { v16.8h, v17.8h, v18.8h }, [x0], #48
-+
-+    // process v2 and v3
-+    xtn v23.4h, v2.4s
-+    ushr v2.4s, v2.4s, #10
-+    xtn v24.4h, v2.4s
-+    ushr v2.4s, v2.4s, #10
-+    xtn v25.4h, v2.4s
-+    
-+    xtn2 v23.8h, v3.4s
-+    and v23.16b, v23.16b, v22.16b
-+    ushr v3.4s, v3.4s, #10
-+    xtn2 v24.8h, v3.4s
-+    and v24.16b, v24.16b, v22.16b
-+    ushr v3.4s, v3.4s, #10
-+    xtn2 v25.8h, v3.4s
-+    and v25.16b, v25.16b, v22.16b
-+
-+    st3 { v23.8h, v24.8h, v25.8h }, [x0], #48
-+
-+    // load the second half of the block -> 64 bytes into registers v4-v7
-+    ld1 { v4.4s,  v5.4s,  v6.4s,  v7.4s }, [x13], #64
-+    
-+    // process v4 and v5
-+    xtn v16.4h, v4.4s
-+    ushr v4.4s, v4.4s, #10
-+    xtn v17.4h, v4.4s
-+    ushr v4.4s, v4.4s, #10
-+    xtn v18.4h, v4.4s
-+   
-+    xtn2 v16.8h, v5.4s 
-+    and v16.16b, v16.16b, v22.16b
-+    ushr v5.4s, v5.4s, #10
-+    xtn2 v17.8h, v5.4s
-+    and v17.16b, v17.16b, v22.16b
-+    ushr v5.4s, v5.4s, #10
-+    xtn2 v18.8h, v5.4s
-+    and v18.16b, v18.16b, v22.16b
-+
-+    st3 { v16.8h, v17.8h, v18.8h }, [x0], #48
-+
-+    // v6 and v7
-+    xtn v23.4h, v6.4s
-+    ushr v6.4s, v6.4s, #10
-+    xtn v24.4h, v6.4s
-+    ushr v6.4s, v6.4s, #10
-+    xtn v25.4h, v6.4s
-+   
-+    xtn2 v23.8h, v7.4s 
-+    and v23.16b, v23.16b, v22.16b
-+    ushr v7.4s, v7.4s, #10
-+    xtn2 v24.8h, v7.4s
-+    and v24.16b, v24.16b, v22.16b
-+    ushr v7.4s, v7.4s, #10
-+    xtn2 v25.8h, v7.4s
-+    and v25.16b, v25.16b, v22.16b
-+
-+    st3 { v23.8h, v24.8h, v25.8h }, [x0], #48
-+ 
-+    add x13, x13, x5          // row src += slice_inc
-+    add w14, w14, #1
-+    b block_loop_y16
-+block_loop_y16_fin:
-+
-+    
-+
-+
-+    add x2, x2, #128          // src += stride1 (start of the next row)
-+    add x0, x0, w20, sxtw     // subtract the bytes we copied too much from dst
-+    add w12, w12, #1
-+    b row_loop_y16
-+row_loop_y16_fin:
-+
-+    // check whether we have incomplete blocks at the end of every row
-+    // in that case decrease row block count by one
-+    // change height back to it's original value (meaning increase it by 1)
-+    // and jump back to another iteration of row_loop_y16
-+
-+    cmp w23, #1
-+    beq row_loop_y16_fin2 // don't continue here if we already processed the last row
-+    add w6, w6, #1    // increase height to the original value
-+    sub w9, w9, w22   // block count - 1 or 0, depending on the remaining bytes count
-+    mov w23, #1
-+    b row_loop_y16
-+row_loop_y16_fin2:
-+
-+    sub x0, x0, w20, sxtw // with the last row we didn't actually move the dst ptr to far ahead, therefore readd the diference
-+
-+    // now we've got to handle the last block in the last row
-+    eor w12, w12, w12 // w12 = 0 = counter
-+integer_loop_y16:
-+    cmp w12, w10
-+    bge integer_loop_y16_fin
-+    ldr w14, [x13], #4
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+    lsr w14, w14, #10
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+    lsr w14, w14, #10
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+    add w12, w12, #1
-+    b integer_loop_y16
-+integer_loop_y16_fin:
-+
-+final_values_y16:
-+    // remaining point count = w11
-+    ldr w14, [x13], #4
-+    cmp w11, #0
-+    beq final_values_y16_fin
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+    cmp w11, #1
-+    beq final_values_y16_fin
-+    lsr w14, w14, #10
-+    and w15, w14, #0x3ff
-+    strh w15, [x0], #2
-+final_values_y16_fin:
-+
-+    ldp x23, x24, [sp, #32]
-+    ldp x21, x22, [sp, #16]
-+    ldp x19, x20, [sp], #48
-+    ret
-+endfunc
-+
-+//void ff_rpi_sand30_lines_to_planar_c16(
-+//  uint8_t * dst_u,            // [x0]
-+//  unsigned int dst_stride_u,  // [w1] == _w*2
-+//  uint8_t * dst_v,            // [x2]
-+//  unsigned int dst_stride_v,  // [w3] == _w*2
-+//  const uint8_t * src,        // [x4]
-+//  unsigned int stride1,       // [w5] == 128
-+//  unsigned int stride2,       // [w6] 
-+//  unsigned int _x,            // [w7] == 0
-+//  unsigned int y,             // [sp, #0] == 0
-+//  unsigned int _w,            // [sp, #8] -> w3
-+//  unsigned int h);            // [sp, #16] -> w7
-+
-+.macro rpi_sand30_lines_to_planar_c16_block_half
-+    ld1 { v0.4s,  v1.4s, v2.4s, v3.4s }, [x13], #64
-+
-+    xtn v4.4h, v0.4s
-+    ushr v0.4s, v0.4s, #10
-+    xtn v5.4h, v0.4s
-+    ushr v0.4s, v0.4s, #10
-+    xtn v6.4h, v0.4s
-+    xtn2 v4.8h, v1.4s
-+    ushr v1.4s, v1.4s, #10
-+    xtn2 v5.8h, v1.4s
-+    ushr v1.4s, v1.4s, #10
-+    xtn2 v6.8h, v1.4s
-+    and v4.16b, v4.16b, v16.16b
-+    and v5.16b, v5.16b, v16.16b
-+    and v6.16b, v6.16b, v16.16b
-+    st3 { v4.8h, v5.8h, v6.8h }, [sp], #48
-+    
-+    xtn v4.4h, v2.4s
-+    ushr v2.4s, v2.4s, #10
-+    xtn v5.4h, v2.4s
-+    ushr v2.4s, v2.4s, #10
-+    xtn v6.4h, v2.4s
-+    xtn2 v4.8h, v3.4s
-+    ushr v3.4s, v3.4s, #10
-+    xtn2 v5.8h, v3.4s
-+    ushr v3.4s, v3.4s, #10
-+    xtn2 v6.8h, v3.4s
-+    and v4.16b, v4.16b, v16.16b
-+    and v5.16b, v5.16b, v16.16b
-+    and v6.16b, v6.16b, v16.16b
-+    st3 { v4.8h, v5.8h, v6.8h }, [sp]
-+    sub sp, sp, #48
-+.endm
-+
-+function ff_rpi_sand30_lines_to_planar_c16, export=1
-+    stp x19, x20, [sp, #-48]!
-+    stp x21, x22, [sp, #16]
-+    stp x23, x24, [sp, #32]
-+
-+    ldr w3, [sp, #48+8]    // w3 = width
-+    ldr w7, [sp, #48+16]   // w7 = height
-+
-+    // reserve space on the stack for intermediate results
-+    sub sp, sp, #256
-+
-+    // number of 128byte blocks per row, w8 = width / 48
-+    mov w9, #48
-+    udiv w8, w3, w9
-+
-+    // remaining pixels (rem_pix) per row, w9 = width - w8 * 48
-+    mul w9, w8, w9
-+    sub w9, w3, w9
-+
-+    // row offset, the beginning of the next row to process
-+    eor w10, w10, w10
-+
-+    // offset to the beginning of the next block, w11 = stride2 * 128 - 128
-+    lsl w11, w6, #7
-+    sub w11, w11, #128
-+
-+    // decrease the height by one and in case of remaining pixels increase the block count by one
-+    sub w7, w7, #1
-+    cmp w9, #0
-+    cset w19, ne    // w19 == 1 iff reamining pixels != 0
-+    add w8, w8, w19
-+
-+    // bytes we have to move dst back by at the end of every row
-+    mov w21, #48*2
-+    mul w21, w21, w8
-+    sub w21, w1, w21
-+
-+    mov w20, #0     // w20 = flag, last row processed
-+
-+    mov x12, #0x03ff03ff03ff03ff
-+    dup v16.2d, x12
-+
-+    // iterate through rows, row counter = w12 = 0
-+    eor w12, w12, w12
-+row_loop_c16:
-+    cmp w12, w7
-+    bge row_loop_c16_fin
-+
-+    // address of row data = src + row_offset
-+    mov x13, x4
-+    add x13, x13, x10
-+
-+    eor w14, w14, w14
-+block_loop_c16:
-+    cmp w14, w8
-+    bge block_loop_c16_fin
-+
-+    rpi_sand30_lines_to_planar_c16_block_half
-+
-+    ld2 { v0.8h, v1.8h }, [sp], #32
-+    ld2 { v2.8h, v3.8h }, [sp], #32
-+    ld2 { v4.8h, v5.8h }, [sp]
-+    sub sp, sp, #64
-+
-+    st1 { v0.8h }, [x0], #16
-+    st1 { v2.8h }, [x0], #16
-+    st1 { v4.8h }, [x0], #16
-+    st1 { v1.8h }, [x2], #16
-+    st1 { v3.8h }, [x2], #16
-+    st1 { v5.8h }, [x2], #16
-+
-+    rpi_sand30_lines_to_planar_c16_block_half
-+
-+    ld2 { v0.8h, v1.8h }, [sp], #32
-+    ld2 { v2.8h, v3.8h }, [sp], #32
-+    ld2 { v4.8h, v5.8h }, [sp]
-+    sub sp, sp, #64
-+
-+    st1 { v0.8h }, [x0], #16
-+    st1 { v2.8h }, [x0], #16
-+    st1 { v4.8h }, [x0], #16
-+    st1 { v1.8h }, [x2], #16
-+    st1 { v3.8h }, [x2], #16
-+    st1 { v5.8h }, [x2], #16
-+
-+    add x13, x13, x11 // offset to next block
-+    add w14, w14, #1
-+    b block_loop_c16
-+block_loop_c16_fin:
-+
-+    add w10, w10, #128
-+    add w12, w12, #1
-+    add x0, x0, w21, sxtw  // move dst pointers back by x21
-+    add x2, x2, w21, sxtw
-+    b row_loop_c16
-+row_loop_c16_fin:
-+
-+    cmp w20, #1
-+    beq row_loop_c16_fin2
-+    mov w20, #1
-+    sub w8, w8, w19 // decrease block count by w19
-+    add w7, w7, #1 // increase height
-+    b row_loop_c16
-+
-+row_loop_c16_fin2:
-+    sub x0, x0, w21, sxtw // readd x21 in case of the last row
-+    sub x2, x2, w21, sxtw // so that we can write out the few remaining pixels
-+
-+    // last incomplete block to be finished
-+    // read operations are fine, stride2 is more than large enough even if rem_pix is 0
-+    rpi_sand30_lines_to_planar_c16_block_half
-+    ld2 { v0.8h, v1.8h }, [sp], #32
-+    ld2 { v2.8h, v3.8h }, [sp], #32
-+    ld2 { v4.8h, v5.8h }, [sp], #32
-+    rpi_sand30_lines_to_planar_c16_block_half
-+    ld2 { v0.8h, v1.8h }, [sp], #32
-+    ld2 { v2.8h, v3.8h }, [sp], #32
-+    ld2 { v4.8h, v5.8h }, [sp]
-+    sub sp, sp, #160
-+
-+    mov x4, sp
-+    eor w20, w20, w20
-+rem_pix_c16_loop:
-+    cmp w20, w9
-+    bge rem_pix_c16_fin
-+
-+    ldr w22, [x4], #4
-+    str w22, [x0], #2
-+    lsr w22, w22, #16
-+    str w22, [x2], #2 
-+
-+    add w20, w20, #1
-+    b rem_pix_c16_loop
-+rem_pix_c16_fin:
-+
-+    add sp, sp, #256
-+
-+    ldp x23, x24, [sp, #32]
-+    ldp x21, x22, [sp, #16]
-+    ldp x19, x20, [sp], #48
-+    ret
-+endfunc
-+
-+
-+
-+//void ff_rpi_sand30_lines_to_planar_p010(
-+//  uint8_t * dest,
-+//  unsigned int dst_stride,
-+//  const uint8_t * src,
-+//  unsigned int src_stride1,
-+//  unsigned int src_stride2,
-+//  unsigned int _x,
-+//  unsigned int y,
-+//  unsigned int _w,
-+//  unsigned int h);
-+
-diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h
-new file mode 100644
-index 000000000000..b3aa481ea497
---- /dev/null
-+++ b/libavutil/aarch64/rpi_sand_neon.h
-@@ -0,0 +1,55 @@
-+/*
-+Copyright (c) 2021 Michael Eiler
-+
-+Redistribution and use in source and binary forms, with or without
-+modification, are permitted provided that the following conditions are met:
-+    * Redistributions of source code must retain the above copyright
-+      notice, this list of conditions and the following disclaimer.
-+    * Redistributions in binary form must reproduce the above copyright
-+      notice, this list of conditions and the following disclaimer in the
-+      documentation and/or other materials provided with the distribution.
-+    * Neither the name of the copyright holder nor the
-+      names of its contributors may be used to endorse or promote products
-+      derived from this software without specific prior written permission.
-+
-+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
-+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-+
-+Authors: Michael Eiler <eiler.mike@gmail.com>
-+*/
-+
-+#pragma once
-+
-+#ifdef __cplusplus
-+extern "C" {
-+#endif
-+
-+void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
-+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
-+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
-+
-+void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u,
-+  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src,
-+  unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y,
-+  unsigned int _w, unsigned int h);
-+
-+void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride,
-+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
-+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
-+
-+void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u,
-+  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
-+  unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
-+
-+#ifdef __cplusplus
-+}
-+#endif
-+
-diff --git a/libavutil/rpi_sand_fn_pw.h b/libavutil/rpi_sand_fn_pw.h
-index 0324f6826dde..0d5d203dc3cd 100644
---- a/libavutil/rpi_sand_fn_pw.h
-+++ b/libavutil/rpi_sand_fn_pw.h
-@@ -54,7 +54,7 @@ void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride,
-     const unsigned int w = _w;
-     const unsigned int mask = stride1 - 1;
- 
--#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64)
-+#if PW == 1 && HAVE_SAND_ASM
-     if (_x == 0) {
-         ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride,
-                                      src, stride1, stride2, _x, y, _w, h);
-@@ -106,7 +106,7 @@ void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_strid
-     const unsigned int w = _w * 2;
-     const unsigned int mask = stride1 - 1;
- 
--#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64)
-+#if PW == 1 && HAVE_SAND_ASM
-     if (_x == 0) {
-         ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v,
-                                      src, stride1, stride2, _x, y, _w, h);
-diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
-index ed0261b02f07..1f543e935701 100644
---- a/libavutil/rpi_sand_fns.c
-+++ b/libavutil/rpi_sand_fns.c
-@@ -37,6 +37,9 @@ Authors: John Cox
- #if ARCH_ARM && HAVE_NEON
- #include "arm/rpi_sand_neon.h"
- #define HAVE_SAND_ASM 1
-+#elif ARCH_AARCH64 && HAVE_NEON
-+#include "aarch64/rpi_sand_neon.h"
-+#define HAVE_SAND_ASM 1
- #else
- #define HAVE_SAND_ASM 0
- #endif
-
-From c45ddc15e96adf8d90eb0c849d60499849213a12 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 11:56:02 +0100
-Subject: [PATCH 004/186] Add raw encoding for sand
-
----
- libavcodec/raw.c    |  6 +++
- libavcodec/rawenc.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-
- 2 files changed, 96 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/raw.c b/libavcodec/raw.c
-index 1e5b48d1e06c..1e689f9ee0b8 100644
---- a/libavcodec/raw.c
-+++ b/libavcodec/raw.c
-@@ -295,6 +295,12 @@ static const PixelFormatTag raw_pix_fmt_tags[] = {
-     { AV_PIX_FMT_RGB565LE,MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
-     { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
- 
-+    /* RPI (Might as well define for everything) */
-+    { AV_PIX_FMT_SAND128,     MKTAG('S', 'A', 'N', 'D') },
-+    { AV_PIX_FMT_RPI4_8,      MKTAG('S', 'A', 'N', 'D') },
-+    { AV_PIX_FMT_SAND64_10,   MKTAG('S', 'N', 'D', 'A') },
-+    { AV_PIX_FMT_RPI4_10,     MKTAG('S', 'N', 'D', 'B') },
-+
-     { AV_PIX_FMT_NONE, 0 },
- };
- 
-diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c
-index 8c577006d922..594a77c42a64 100644
---- a/libavcodec/rawenc.c
-+++ b/libavcodec/rawenc.c
-@@ -24,6 +24,7 @@
-  * Raw Video Encoder
-  */
- 
-+#include "config.h"
- #include "avcodec.h"
- #include "codec_internal.h"
- #include "encode.h"
-@@ -33,6 +34,10 @@
- #include "libavutil/intreadwrite.h"
- #include "libavutil/imgutils.h"
- #include "libavutil/internal.h"
-+#include "libavutil/avassert.h"
-+#if CONFIG_SAND
-+#include "libavutil/rpi_sand_fns.h"
-+#endif
- 
- static av_cold int raw_encode_init(AVCodecContext *avctx)
- {
-@@ -46,12 +51,95 @@ static av_cold int raw_encode_init(AVCodecContext *avctx)
-     return 0;
- }
- 
-+#if CONFIG_SAND
-+static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
-+                      const AVFrame *frame)
-+{
-+    const int width = av_frame_cropped_width(frame);
-+    const int height = av_frame_cropped_height(frame);
-+    const int x0 = frame->crop_left;
-+    const int y0 = frame->crop_top;
-+    const int size = width * height * 3 / 2;
-+    uint8_t * dst;
-+    int ret;
-+
-+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
-+        return ret;
-+
-+    dst = pkt->data;
-+
-+    av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
-+    dst += width * height;
-+    av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2,
-+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2);
-+    return 0;
-+}
-+
-+static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
-+                      const AVFrame *frame)
-+{
-+    const int width = av_frame_cropped_width(frame);
-+    const int height = av_frame_cropped_height(frame);
-+    const int x0 = frame->crop_left;
-+    const int y0 = frame->crop_top;
-+    const int size = width * height * 3;
-+    uint8_t * dst;
-+    int ret;
-+
-+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
-+        return ret;
-+
-+    dst = pkt->data;
-+
-+    av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height);
-+    dst += width * height * 2;
-+    av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width,
-+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2);
-+    return 0;
-+}
-+
-+static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
-+                      const AVFrame *frame)
-+{
-+    const int width = av_frame_cropped_width(frame);
-+    const int height = av_frame_cropped_height(frame);
-+    const int x0 = frame->crop_left;
-+    const int y0 = frame->crop_top;
-+    const int size = width * height * 3;
-+    uint8_t * dst;
-+    int ret;
-+
-+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
-+        return ret;
-+
-+    dst = pkt->data;
-+
-+    av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
-+    dst += width * height * 2;
-+    av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width,
-+                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2);
-+    return 0;
-+}
-+#endif
-+
-+
- static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
-                       const AVFrame *frame, int *got_packet)
- {
--    int ret = av_image_get_buffer_size(frame->format,
--                                       frame->width, frame->height, 1);
-+    int ret;
- 
-+#if CONFIG_SAND
-+    if (av_rpi_is_sand_frame(frame)) {
-+        ret = av_rpi_is_sand8_frame(frame) ? raw_sand8_as_yuv420(avctx, pkt, frame) :
-+            av_rpi_is_sand16_frame(frame) ? raw_sand16_as_yuv420(avctx, pkt, frame) :
-+            av_rpi_is_sand30_frame(frame) ? raw_sand30_as_yuv420(avctx, pkt, frame) : -1;
-+        *got_packet = (ret == 0);
-+        return ret;
-+    }
-+#endif
-+
-+    ret = av_image_get_buffer_size(frame->format,
-+                                       frame->width, frame->height, 1);
-     if (ret < 0)
-         return ret;
- 
-
-From 3e02e6190c567a58c8153ba2627f61677b58d6fb Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 12:02:09 +0100
-Subject: [PATCH 005/186] Deal with the lack of trivial sand cropping
-
----
- fftools/ffmpeg.c        |  4 ++--
- fftools/ffmpeg_filter.c |  4 ++--
- libavutil/frame.c       | 11 +++++++++++
- libavutil/frame.h       | 10 ++++++++++
- 4 files changed, 25 insertions(+), 4 deletions(-)
-
-diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
-index c819d30ca523..ca5431aeb401 100644
---- a/fftools/ffmpeg.c
-+++ b/fftools/ffmpeg.c
-@@ -1996,8 +1996,8 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame, int keep_ref
-                        av_channel_layout_compare(&ifilter->ch_layout, &frame->ch_layout);
-         break;
-     case AVMEDIA_TYPE_VIDEO:
--        need_reinit |= ifilter->width  != frame->width ||
--                       ifilter->height != frame->height;
-+        need_reinit |= ifilter->width  != av_frame_cropped_width(frame) ||
-+                       ifilter->height != av_frame_cropped_height(frame);
-         break;
-     }
- 
-diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
-index 686a33c2bae7..cfe3351c522f 100644
---- a/fftools/ffmpeg_filter.c
-+++ b/fftools/ffmpeg_filter.c
-@@ -1283,8 +1283,8 @@ int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame)
- 
-     ifilter->format = frame->format;
- 
--    ifilter->width               = frame->width;
--    ifilter->height              = frame->height;
-+    ifilter->width               = av_frame_cropped_width(frame);
-+    ifilter->height              = av_frame_cropped_height(frame);
-     ifilter->sample_aspect_ratio = frame->sample_aspect_ratio;
- 
-     ifilter->sample_rate         = frame->sample_rate;
-diff --git a/libavutil/frame.c b/libavutil/frame.c
-index 9545477acc95..48621e40989f 100644
---- a/libavutil/frame.c
-+++ b/libavutil/frame.c
-@@ -16,6 +16,8 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
-+#include "config.h"
-+
- #include "channel_layout.h"
- #include "avassert.h"
- #include "buffer.h"
-@@ -27,6 +29,9 @@
- #include "mem.h"
- #include "samplefmt.h"
- #include "hwcontext.h"
-+#if CONFIG_SAND
-+#include "rpi_sand_fns.h"
-+#endif
- 
- #if FF_API_OLD_CHANNEL_LAYOUT
- #define CHECK_CHANNELS_CONSISTENCY(frame) \
-@@ -874,6 +879,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags)
-         (frame->crop_top + frame->crop_bottom) >= frame->height)
-         return AVERROR(ERANGE);
- 
-+#if CONFIG_SAND
-+    // Sand cannot be cropped - do not try
-+    if (av_rpi_is_sand_format(frame->format))
-+        return 0;
-+#endif
-+
-     desc = av_pix_fmt_desc_get(frame->format);
-     if (!desc)
-         return AVERROR_BUG;
-diff --git a/libavutil/frame.h b/libavutil/frame.h
-index 25802695493a..3a9d323325a9 100644
---- a/libavutil/frame.h
-+++ b/libavutil/frame.h
-@@ -957,6 +957,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags);
-  */
- const char *av_frame_side_data_name(enum AVFrameSideDataType type);
- 
-+
-+static inline int av_frame_cropped_width(const AVFrame * const frame)
-+{
-+    return frame->width - (frame->crop_left + frame->crop_right);
-+}
-+static inline int av_frame_cropped_height(const AVFrame * const frame)
-+{
-+    return frame->height - (frame->crop_top + frame->crop_bottom);
-+}
-+
- /**
-  * @}
-  */
-
-From c7d8474ffa5689abf99367c43ec2d39a1957f564 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 12:31:16 +0100
-Subject: [PATCH 006/186] Add an unsand filter
-
----
- configure                |   1 +
- libavfilter/Makefile     |   1 +
- libavfilter/allfilters.c |   1 +
- libavfilter/buffersrc.c  |   2 +-
- libavfilter/vf_unsand.c  | 228 +++++++++++++++++++++++++++++++++++++++
- 5 files changed, 232 insertions(+), 1 deletion(-)
- create mode 100644 libavfilter/vf_unsand.c
-
-diff --git a/configure b/configure
-index 5a5ada20711f..986f51b75b78 100755
---- a/configure
-+++ b/configure
-@@ -3754,6 +3754,7 @@ tonemap_opencl_filter_deps="opencl const_nan"
- transpose_opencl_filter_deps="opencl"
- transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
- transpose_vulkan_filter_deps="vulkan spirv_compiler"
-+unsand_filter_select="sand"
- unsharp_opencl_filter_deps="opencl"
- uspp_filter_deps="gpl avcodec"
- vaguedenoiser_filter_deps="gpl"
-diff --git a/libavfilter/Makefile b/libavfilter/Makefile
-index b3d3d981dd46..c14fc995a0b5 100644
---- a/libavfilter/Makefile
-+++ b/libavfilter/Makefile
-@@ -518,6 +518,7 @@ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER)        += vf_transpose_vaapi.o vaapi_vpp.o
- OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER)       += vf_transpose_vulkan.o vulkan.o vulkan_filter.o
- OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
- OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
-+OBJS-$(CONFIG_UNSAND_FILTER)                 += vf_unsand.o
- OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
- OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
-                                                 opencl/unsharp.o
-diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
-index d7db46c2af92..b990a001529b 100644
---- a/libavfilter/allfilters.c
-+++ b/libavfilter/allfilters.c
-@@ -490,6 +490,7 @@ extern const AVFilter ff_vf_trim;
- extern const AVFilter ff_vf_unpremultiply;
- extern const AVFilter ff_vf_unsharp;
- extern const AVFilter ff_vf_unsharp_opencl;
-+extern const AVFilter ff_vf_unsand;
- extern const AVFilter ff_vf_untile;
- extern const AVFilter ff_vf_uspp;
- extern const AVFilter ff_vf_v360;
-diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
-index ba17450b9378..0dbe5d23355c 100644
---- a/libavfilter/buffersrc.c
-+++ b/libavfilter/buffersrc.c
-@@ -201,7 +201,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
- 
-         switch (ctx->outputs[0]->type) {
-         case AVMEDIA_TYPE_VIDEO:
--            CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
-+            CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame),
-                                      frame->format, frame->pts);
-             break;
-         case AVMEDIA_TYPE_AUDIO:
-diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c
-new file mode 100644
-index 000000000000..7100f2fc9b1f
---- /dev/null
-+++ b/libavfilter/vf_unsand.c
-@@ -0,0 +1,228 @@
-+/*
-+ * Copyright (c) 2007 Bobby Bingham
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+/**
-+ * @file
-+ * format and noformat video filters
-+ */
-+
-+#include <string.h>
-+
-+#include "libavutil/internal.h"
-+#include "libavutil/mem.h"
-+#include "libavutil/pixdesc.h"
-+#include "libavutil/opt.h"
-+#include "libavutil/rpi_sand_fns.h"
-+
-+#include "avfilter.h"
-+#include "formats.h"
-+#include "internal.h"
-+#include "video.h"
-+
-+typedef struct UnsandContext {
-+    const AVClass *class;
-+} UnsandContext;
-+
-+static av_cold void uninit(AVFilterContext *ctx)
-+{
-+//    UnsandContext *s = ctx->priv;
-+}
-+
-+static av_cold int init(AVFilterContext *ctx)
-+{
-+//    UnsandContext *s = ctx->priv;
-+
-+    return 0;
-+}
-+
-+
-+static int filter_frame(AVFilterLink *link, AVFrame *in)
-+{
-+    AVFilterLink * const outlink = link->dst->outputs[0];
-+    AVFrame *out = NULL;
-+    int rv = 0;
-+
-+    if (outlink->format == in->format) {
-+        // If nothing to do then do nothing
-+        out = in;
-+    }
-+    else
-+    {
-+        if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL)
-+        {
-+            rv = AVERROR(ENOMEM);
-+            goto fail;
-+        }
-+        if (av_rpi_sand_to_planar_frame(out, in) != 0)
-+        {
-+            rv = -1;
-+            goto fail;
-+        }
-+
-+        av_frame_free(&in);
-+    }
-+
-+    return ff_filter_frame(outlink, out);
-+
-+fail:
-+    av_frame_free(&out);
-+    av_frame_free(&in);
-+    return rv;
-+}
-+
-+#if 0
-+static void dump_fmts(const AVFilterFormats * fmts)
-+{
-+    int i;
-+    if (fmts== NULL) {
-+        printf("NULL\n");
-+        return;
-+    }
-+    for (i = 0; i < fmts->nb_formats; ++i) {
-+        printf(" %d", fmts->formats[i]);
-+    }
-+    printf("\n");
-+}
-+#endif
-+
-+static int query_formats(AVFilterContext *ctx)
-+{
-+//    UnsandContext *s = ctx->priv;
-+    int ret;
-+
-+    // If we aren't connected at both ends then just do nothing
-+    if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL)
-+        return 0;
-+
-+    // Our output formats depend on our input formats and we can't/don't
-+    // want to convert between bit depths so we need to wait for the source
-+    // to have an opinion before we do
-+    if (ctx->inputs[0]->incfg.formats == NULL)
-+        return AVERROR(EAGAIN);
-+
-+    // Accept anything
-+    if (ctx->inputs[0]->outcfg.formats == NULL &&
-+        (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0)
-+        return ret;
-+
-+    // Filter out sand formats
-+
-+    // Generate a container if we don't already have one
-+    if (ctx->outputs[0]->incfg.formats == NULL)
-+    {
-+        // Somewhat rubbish way of ensuring we have a good structure
-+        const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE};
-+        AVFilterFormats *formats = ff_make_format_list(out_fmts);
-+
-+        if (formats == NULL)
-+            return AVERROR(ENOMEM);
-+        if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0)
-+            return ret;
-+    }
-+
-+    // Replace old format list with new filtered list derived from what our
-+    // input says it can do
-+    {
-+        const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats;
-+        AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats;
-+        enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats);
-+        int i;
-+        int n = 0;
-+        int seen_420p = 0;
-+        int seen_420p10 = 0;
-+
-+        for (i = 0; i < src_ff->nb_formats; ++i) {
-+            const enum AVPixelFormat f = src_ff->formats[i];
-+
-+            switch (f){
-+                case AV_PIX_FMT_YUV420P:
-+                case AV_PIX_FMT_SAND128:
-+                case AV_PIX_FMT_RPI4_8:
-+                    if (!seen_420p) {
-+                        seen_420p = 1;
-+                        dst_fmts[n++] = AV_PIX_FMT_YUV420P;
-+                    }
-+                    break;
-+                case AV_PIX_FMT_SAND64_10:
-+                case AV_PIX_FMT_YUV420P10:
-+                case AV_PIX_FMT_RPI4_10:
-+                    if (!seen_420p10) {
-+                        seen_420p10 = 1;
-+                        dst_fmts[n++] = AV_PIX_FMT_YUV420P10;
-+                    }
-+                    break;
-+                default:
-+                    dst_fmts[n++] = f;
-+                    break;
-+            }
-+        }
-+
-+        av_freep(&dst_ff->formats);
-+        dst_ff->formats = dst_fmts;
-+        dst_ff->nb_formats = n;
-+    }
-+
-+//    printf("Unsand: %s calc: ", __func__);
-+//    dump_fmts(ctx->outputs[0]->incfg.formats);
-+
-+    return 0;
-+}
-+
-+
-+#define OFFSET(x) offsetof(UnsandContext, x)
-+static const AVOption unsand_options[] = {
-+    { NULL }
-+};
-+
-+
-+AVFILTER_DEFINE_CLASS(unsand);
-+
-+static const AVFilterPad avfilter_vf_unsand_inputs[] = {
-+    {
-+        .name             = "default",
-+        .type             = AVMEDIA_TYPE_VIDEO,
-+        .filter_frame = filter_frame,
-+    },
-+    { NULL }
-+};
-+
-+static const AVFilterPad avfilter_vf_unsand_outputs[] = {
-+    {
-+        .name = "default",
-+        .type = AVMEDIA_TYPE_VIDEO
-+    },
-+};
-+
-+AVFilter ff_vf_unsand = {
-+    .name          = "unsand",
-+    .description   = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"),
-+
-+    .init          = init,
-+    .uninit        = uninit,
-+
-+    FILTER_QUERY_FUNC(query_formats),
-+
-+    .priv_size     = sizeof(UnsandContext),
-+    .priv_class    = &unsand_class,
-+
-+    FILTER_INPUTS(avfilter_vf_unsand_inputs),
-+    FILTER_OUTPUTS(avfilter_vf_unsand_outputs),
-+};
-+
-
-From d154e34686db628b84d74d0808b080c1d1ce5c41 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 12:37:07 +0100
-Subject: [PATCH 007/186] Reduce mmal compile warnings
-
----
- libavcodec/mmaldec.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
-index 3092f5851077..6f41b41ac4c0 100644
---- a/libavcodec/mmaldec.c
-+++ b/libavcodec/mmaldec.c
-@@ -24,6 +24,9 @@
-  * MMAL Video Decoder
-  */
- 
-+#pragma GCC diagnostic push
-+// Many many redundant decls in the header files
-+#pragma GCC diagnostic ignored "-Wredundant-decls"
- #include <bcm_host.h>
- #include <interface/mmal/mmal.h>
- #include <interface/mmal/mmal_parameters_video.h>
-@@ -31,6 +34,7 @@
- #include <interface/mmal/util/mmal_util_params.h>
- #include <interface/mmal/util/mmal_default_components.h>
- #include <interface/mmal/vc/mmal_vc_api.h>
-+#pragma GCC diagnostic pop
- #include <stdatomic.h>
- 
- #include "avcodec.h"
-
-From 1a7988f63e4137ebcc345470dcde20b62e71bdec Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 17:56:16 +0100
-Subject: [PATCH 008/186] Add chroma location to hevc parse
-
----
- libavcodec/hevc_parser.c | 13 +++++++++++++
- libavcodec/hevcdec.c     | 13 +++++++++++++
- 2 files changed, 26 insertions(+)
-
-diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c
-index 59f9a0ff3e57..4ae7222e8b8d 100644
---- a/libavcodec/hevc_parser.c
-+++ b/libavcodec/hevc_parser.c
-@@ -97,6 +97,19 @@ static int hevc_parse_slice_header(AVCodecParserContext *s, H2645NAL *nal,
-     avctx->profile  = ps->sps->ptl.general_ptl.profile_idc;
-     avctx->level    = ps->sps->ptl.general_ptl.level_idc;
- 
-+    if (ps->sps->chroma_format_idc == 1) {
-+        avctx->chroma_sample_location = ps->sps->vui.common.chroma_loc_info_present_flag ?
-+            ps->sps->vui.common.chroma_sample_loc_type_top_field + 1 :
-+            AVCHROMA_LOC_LEFT;
-+    }
-+    else if (ps->sps->chroma_format_idc == 2 ||
-+             ps->sps->chroma_format_idc == 3) {
-+        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
-+    }
-+    else {
-+        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
-+    }
-+
-     if (ps->vps->vps_timing_info_present_flag) {
-         num = ps->vps->vps_num_units_in_tick;
-         den = ps->vps->vps_time_scale;
-diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
-index 0e2844f47cef..88482fd5215b 100644
---- a/libavcodec/hevcdec.c
-+++ b/libavcodec/hevcdec.c
-@@ -347,6 +347,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
-     else
-         avctx->color_range = AVCOL_RANGE_MPEG;
- 
-+    if (sps->chroma_format_idc == 1) {
-+        avctx->chroma_sample_location = sps->vui.common.chroma_loc_info_present_flag ?
-+            sps->vui.common.chroma_sample_loc_type_top_field + 1 :
-+            AVCHROMA_LOC_LEFT;
-+    }
-+    else if (sps->chroma_format_idc == 2 ||
-+             sps->chroma_format_idc == 3) {
-+        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
-+    }
-+    else {
-+        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
-+    }
-+
-     if (sps->vui.common.colour_description_present_flag) {
-         avctx->color_primaries = sps->vui.common.colour_primaries;
-         avctx->color_trc       = sps->vui.common.transfer_characteristics;
-
-From 8e5f8555b5908ca720c4ffd8b3a784d956883317 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 26 Sep 2022 18:20:50 +0100
-Subject: [PATCH 009/186] hwaccel: Add .abort_frame & use in hevcdec
-
----
- libavcodec/avcodec.h | 11 +++++++++++
- libavcodec/hevcdec.c |  7 ++++++-
- 2 files changed, 17 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
-index 39881a1d2bcf..32bc78e2be25 100644
---- a/libavcodec/avcodec.h
-+++ b/libavcodec/avcodec.h
-@@ -2221,6 +2221,17 @@ typedef struct AVHWAccel {
-      * that avctx->hwaccel_priv_data is invalid.
-      */
-     int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
-+
-+    /**
-+     * Called if parsing fails
-+     *
-+     * An error has occured, end_frame will not be called
-+     * start_frame & decode_slice may or may not have been called
-+     * Optional
-+     *
-+     * @param avctx the codec context
-+     */
-+    void (*abort_frame)(AVCodecContext *avctx);
- } AVHWAccel;
- 
- /**
-diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
-index 88482fd5215b..4ee564f3e028 100644
---- a/libavcodec/hevcdec.c
-+++ b/libavcodec/hevcdec.c
-@@ -3378,8 +3378,13 @@ static int hevc_decode_frame(AVCodecContext *avctx, AVFrame *rframe,
- 
-     s->ref = NULL;
-     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
--    if (ret < 0)
-+    if (ret < 0) {
-+        // Ensure that hwaccel knows this frame is over
-+        if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame)
-+            s->avctx->hwaccel->abort_frame(s->avctx);
-+
-         return ret;
-+    }
- 
-     if (avctx->hwaccel) {
-         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
-
-From f09419a30e1beae74ae167d8b33de4214cae6f0a Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 26 Sep 2022 18:26:17 +0100
-Subject: [PATCH 010/186] hwaccel: Add CAP_MT_SAFE for accels that can use
- multi-thread
-
----
- libavcodec/hwconfig.h      | 1 +
- libavcodec/pthread_frame.c | 7 +++++--
- 2 files changed, 6 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
-index 721424912c46..c43ad55245ac 100644
---- a/libavcodec/hwconfig.h
-+++ b/libavcodec/hwconfig.h
-@@ -24,6 +24,7 @@
- 
- 
- #define HWACCEL_CAP_ASYNC_SAFE      (1 << 0)
-+#define HWACCEL_CAP_MT_SAFE         (1 << 1)
- 
- 
- typedef struct AVCodecHWConfigInternal {
-diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
-index d9d5afaa82d8..2cc89a41f55f 100644
---- a/libavcodec/pthread_frame.c
-+++ b/libavcodec/pthread_frame.c
-@@ -204,7 +204,8 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
- 
-         /* if the previous thread uses hwaccel then we take the lock to ensure
-          * the threads don't run concurrently */
--        if (avctx->hwaccel) {
-+        if (avctx->hwaccel &&
-+            !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
-             pthread_mutex_lock(&p->parent->hwaccel_mutex);
-             p->hwaccel_serializing = 1;
-         }
-@@ -590,7 +591,9 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
- 
-     if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return;
- 
--    if (avctx->hwaccel && !p->hwaccel_serializing) {
-+    if (avctx->hwaccel &&
-+        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
-+        !p->hwaccel_serializing) {
-         pthread_mutex_lock(&p->parent->hwaccel_mutex);
-         p->hwaccel_serializing = 1;
-     }
-
-From 8142c4a1774c80ce059ddff3d6e4377803e3081d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 17:59:08 +0100
-Subject: [PATCH 011/186] Weak link utils
-
----
- libavcodec/weak_link.c | 102 +++++++++++++++++++++++++++++++++++++++++
- libavcodec/weak_link.h |  23 ++++++++++
- 2 files changed, 125 insertions(+)
- create mode 100644 libavcodec/weak_link.c
- create mode 100644 libavcodec/weak_link.h
-
-diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c
-new file mode 100644
-index 000000000000..f234a985b9c1
---- /dev/null
-+++ b/libavcodec/weak_link.c
-@@ -0,0 +1,102 @@
-+#include <stdlib.h>
-+#include <pthread.h>
-+#include <stdatomic.h>
-+#include "weak_link.h"
-+
-+struct ff_weak_link_master {
-+    atomic_int ref_count;    /* 0 is single ref for easier atomics */
-+    pthread_rwlock_t lock;
-+    void * ptr;
-+};
-+
-+static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c)
-+{
-+    return (struct ff_weak_link_master *)c;
-+}
-+
-+struct ff_weak_link_master * ff_weak_link_new(void * p)
-+{
-+    struct ff_weak_link_master * w = malloc(sizeof(*w));
-+    if (!w)
-+        return NULL;
-+    w->ptr = p;
-+    if (pthread_rwlock_init(&w->lock, NULL)) {
-+        free(w);
-+        return NULL;
-+    }
-+    return w;
-+}
-+
-+static void weak_link_do_unref(struct ff_weak_link_master * const w)
-+{
-+    int n = atomic_fetch_sub(&w->ref_count, 1);
-+    if (n)
-+        return;
-+
-+    pthread_rwlock_destroy(&w->lock);
-+    free(w);
-+}
-+
-+// Unref & break link
-+void ff_weak_link_break(struct ff_weak_link_master ** ppLink)
-+{
-+    struct ff_weak_link_master * const w = *ppLink;
-+    if (!w)
-+        return;
-+
-+    *ppLink = NULL;
-+    pthread_rwlock_wrlock(&w->lock);
-+    w->ptr = NULL;
-+    pthread_rwlock_unlock(&w->lock);
-+
-+    weak_link_do_unref(w);
-+}
-+
-+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w)
-+{
-+    if (!w)
-+        return NULL;
-+    atomic_fetch_add(&w->ref_count, 1);
-+    return (struct ff_weak_link_client*)w;
-+}
-+
-+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink)
-+{
-+    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
-+    if (!w)
-+        return;
-+
-+    *ppLink = NULL;
-+    weak_link_do_unref(w);
-+}
-+
-+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink)
-+{
-+    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
-+
-+    if (!w)
-+        return NULL;
-+
-+    if (pthread_rwlock_rdlock(&w->lock))
-+        goto broken;
-+
-+    if (w->ptr)
-+        return w->ptr;
-+
-+    pthread_rwlock_unlock(&w->lock);
-+
-+broken:
-+    *ppLink = NULL;
-+    weak_link_do_unref(w);
-+    return NULL;
-+}
-+
-+// Ignores a NULL c (so can be on the return path of both broken & live links)
-+void ff_weak_link_unlock(struct ff_weak_link_client * c)
-+{
-+    struct ff_weak_link_master * const w = weak_link_x(c);
-+    if (w)
-+        pthread_rwlock_unlock(&w->lock);
-+}
-+
-+
-diff --git a/libavcodec/weak_link.h b/libavcodec/weak_link.h
-new file mode 100644
-index 000000000000..415b6a27a05c
---- /dev/null
-+++ b/libavcodec/weak_link.h
-@@ -0,0 +1,23 @@
-+struct ff_weak_link_master;
-+struct ff_weak_link_client;
-+
-+struct ff_weak_link_master * ff_weak_link_new(void * p);
-+void ff_weak_link_break(struct ff_weak_link_master ** ppLink);
-+
-+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w);
-+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink);
-+
-+// Returns NULL if link broken - in this case it will also zap
-+//   *ppLink and unref the weak_link.
-+// Returns NULL if *ppLink is NULL (so a link once broken stays broken)
-+//
-+// The above does mean that there is a race if this is called simultainiously
-+// by two threads using the same weak_link_client (so don't do that)
-+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink);
-+void ff_weak_link_unlock(struct ff_weak_link_client * c);
-+
-+
-+
-+
-+
-+
-
-From 2610fffb3ca25f1a531876c80bf4c4b43c934386 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 19:23:26 +0100
-Subject: [PATCH 012/186] Add v4l2_req V4L2 request H265 drm_prime decode
-
-Has the abiliy to switch between kernel API versions at runtime. This
-could be removed later once teher is no chance of usage on an old
-kernel.
----
- configure                       |   14 +
- libavcodec/Makefile             |    4 +
- libavcodec/hevc-ctrls-v1.h      |  229 +++++
- libavcodec/hevc-ctrls-v2.h      |  257 +++++
- libavcodec/hevcdec.c            |   10 +
- libavcodec/hwaccels.h           |    1 +
- libavcodec/hwconfig.h           |    2 +
- libavcodec/v4l2_req_decode_q.c  |   84 ++
- libavcodec/v4l2_req_decode_q.h  |   25 +
- libavcodec/v4l2_req_devscan.c   |  449 +++++++++
- libavcodec/v4l2_req_devscan.h   |   23 +
- libavcodec/v4l2_req_dmabufs.c   |  266 ++++++
- libavcodec/v4l2_req_dmabufs.h   |   40 +
- libavcodec/v4l2_req_hevc_v1.c   |    3 +
- libavcodec/v4l2_req_hevc_v2.c   |    3 +
- libavcodec/v4l2_req_hevc_vx.c   | 1213 +++++++++++++++++++++++
- libavcodec/v4l2_req_media.c     | 1596 +++++++++++++++++++++++++++++++
- libavcodec/v4l2_req_media.h     |  151 +++
- libavcodec/v4l2_req_pollqueue.c |  361 +++++++
- libavcodec/v4l2_req_pollqueue.h |   18 +
- libavcodec/v4l2_req_utils.h     |   27 +
- libavcodec/v4l2_request_hevc.c  |  297 ++++++
- libavcodec/v4l2_request_hevc.h  |  102 ++
- 23 files changed, 5175 insertions(+)
- create mode 100644 libavcodec/hevc-ctrls-v1.h
- create mode 100644 libavcodec/hevc-ctrls-v2.h
- create mode 100644 libavcodec/v4l2_req_decode_q.c
- create mode 100644 libavcodec/v4l2_req_decode_q.h
- create mode 100644 libavcodec/v4l2_req_devscan.c
- create mode 100644 libavcodec/v4l2_req_devscan.h
- create mode 100644 libavcodec/v4l2_req_dmabufs.c
- create mode 100644 libavcodec/v4l2_req_dmabufs.h
- create mode 100644 libavcodec/v4l2_req_hevc_v1.c
- create mode 100644 libavcodec/v4l2_req_hevc_v2.c
- create mode 100644 libavcodec/v4l2_req_hevc_vx.c
- create mode 100644 libavcodec/v4l2_req_media.c
- create mode 100644 libavcodec/v4l2_req_media.h
- create mode 100644 libavcodec/v4l2_req_pollqueue.c
- create mode 100644 libavcodec/v4l2_req_pollqueue.h
- create mode 100644 libavcodec/v4l2_req_utils.h
- create mode 100644 libavcodec/v4l2_request_hevc.c
- create mode 100644 libavcodec/v4l2_request_hevc.h
-
-diff --git a/configure b/configure
-index 986f51b75b78..c09144673050 100755
---- a/configure
-+++ b/configure
-@@ -281,6 +281,7 @@ External library support:
-                            if openssl, gnutls or mbedtls is not used [no]
-   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
-   --enable-libuavs3d       enable AVS3 decoding via libuavs3d [no]
-+  --enable-libudev         enable libudev [no]
-   --enable-libv4l2         enable libv4l2/v4l-utils [no]
-   --enable-libvidstab      enable video stabilization using vid.stab [no]
-   --enable-libvmaf         enable vmaf filter via libvmaf [no]
-@@ -351,6 +352,7 @@ External library support:
    --enable-omx-rpi         enable OpenMAX IL code for Raspberry Pi [no]
    --enable-rkmpp           enable Rockchip Media Process Platform code [no]
    --disable-v4l2-m2m       disable V4L2 mem2mem code [autodetect]
@@ -5268,15 +35,25 @@ index 986f51b75b78..c09144673050 100755
    --disable-vaapi          disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
    --disable-vdpau          disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
    --disable-videotoolbox   disable VideoToolbox code [autodetect]
-@@ -1858,6 +1860,7 @@ EXTERNAL_LIBRARY_LIST="
-     libtheora
-     libtwolame
-     libuavs3d
+@@ -1842,7 +1848,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST="
+     avfoundation
+     bzlib
+     coreimage
++    epoxy
+     iconv
 +    libudev
-     libv4l2
-     libvmaf
-     libvorbis
-@@ -1914,6 +1917,7 @@ HWACCEL_LIBRARY_LIST="
+     libxcb
+     libxcb_shm
+     libxcb_shape
+@@ -1916,6 +1924,7 @@ EXTERNAL_LIBRARY_LIST="
+     libcodec2
+     libdav1d
+     libdc1394
++    libdrm
+     libflite
+     libfontconfig
+     libfreetype
+@@ -2025,6 +2034,7 @@ HWACCEL_LIBRARY_LIST="
      mmal
      omx
      opencl
@@ -5284,15 +61,38 @@ index 986f51b75b78..c09144673050 100755
  "
  
  DOCUMENT_LIST="
-@@ -3002,6 +3006,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder ID3D11VideoContext"
- dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
+@@ -2042,10 +2052,14 @@ FEATURE_LIST="
+     omx_rpi
+     runtime_cpudetect
+     safe_bitstream_reader
++    sand
+     shared
+     small
+     static
+     swscale_alpha
++    v4l2_req_hevc_vx
++    vout_drm
++    vout_egl
+ "
+ 
+ # this list should be kept in linking order
+@@ -2621,6 +2635,7 @@ CONFIG_EXTRA="
+     rtpdec
+     rtpenc_chain
+     rv34dsp
++    sand
+     scene_sad
+     sinewin
+     snappy
+@@ -3146,6 +3161,7 @@ dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
  ffnvcodec_deps_any="libdl LoadLibrary"
+ mediacodec_deps="android mediandk"
  nvdec_deps="ffnvcodec"
 +v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev"
  vaapi_x11_deps="xlib_x11"
  videotoolbox_hwaccel_deps="videotoolbox pthreads"
  videotoolbox_hwaccel_extralibs="-framework QuartzCore"
-@@ -3045,6 +3050,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC"
+@@ -3200,6 +3216,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC"
  hevc_dxva2_hwaccel_select="hevc_decoder"
  hevc_nvdec_hwaccel_deps="nvdec"
  hevc_nvdec_hwaccel_select="hevc_decoder"
@@ -5301,56 +101,226 @@ index 986f51b75b78..c09144673050 100755
  hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
  hevc_vaapi_hwaccel_select="hevc_decoder"
  hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
-@@ -6696,6 +6703,7 @@ enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame
-                              { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
-                                die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
- enabled libuavs3d         && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uavs3d.h uavs3d_decode
-+enabled libudev           && require_pkg_config libudev libudev libudev.h udev_new
- enabled libv4l2           && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
- enabled libvidstab        && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
- enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init
-@@ -6798,6 +6806,10 @@ enabled rkmpp             && { require_pkg_config rkmpp rockchip_mpp  rockchip/r
+@@ -3746,8 +3764,11 @@ sndio_indev_deps="sndio"
+ sndio_outdev_deps="sndio"
+ v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
+ v4l2_indev_suggest="libv4l2"
++v4l2_outdev_deps="libdrm"
+ v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
+ v4l2_outdev_suggest="libv4l2"
++vout_drm_outdev_deps="libdrm"
++vout_egl_outdev_deps="xlib epoxy"
+ vfwcap_indev_deps="vfw32 vfwcap_defines"
+ xcbgrab_indev_deps="libxcb"
+ xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
+@@ -3854,6 +3875,7 @@ cropdetect_filter_deps="gpl"
+ deinterlace_qsv_filter_deps="libmfx"
+ deinterlace_qsv_filter_select="qsvvpp"
+ deinterlace_vaapi_filter_deps="vaapi"
++deinterlace_v4l2m2m_filter_deps="libdrm v4l2_m2m"
+ delogo_filter_deps="gpl"
+ denoise_vaapi_filter_deps="vaapi"
+ derain_filter_select="dnn"
+@@ -3962,6 +3984,7 @@ transpose_opencl_filter_deps="opencl"
+ transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
+ transpose_vt_filter_deps="videotoolbox VTPixelRotationSessionCreate"
+ transpose_vulkan_filter_deps="vulkan spirv_compiler"
++unsand_filter_select="sand"
+ unsharp_opencl_filter_deps="opencl"
+ uspp_filter_deps="gpl avcodec"
+ vaguedenoiser_filter_deps="gpl"
+@@ -3973,6 +3996,7 @@ libvmaf_cuda_filter_deps="libvmaf libvmaf_cuda ffnvcodec"
+ zmq_filter_deps="libzmq"
+ zoompan_filter_deps="swscale"
+ zscale_filter_deps="libzimg const_nan"
++scale_v4l2m2m_filter_deps="libdrm v4l2_m2m"
+ scale_vaapi_filter_deps="vaapi"
+ scale_vt_filter_deps="videotoolbox VTPixelTransferSessionCreate"
+ scale_vulkan_filter_deps="vulkan spirv_compiler"
+@@ -6623,6 +6647,12 @@ if enabled xlib; then
+         disable xlib
+ fi
+ 
++enabled libudev &&
++    check_pkg_config libudev libudev libudev.h udev_new
++
++enabled epoxy &&
++    check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version
++
+ check_headers direct.h
+ check_headers dirent.h
+ check_headers dxgidebug.h
+@@ -7113,8 +7143,16 @@ enabled rkmpp             && { require_pkg_config rkmpp rockchip_mpp  rockchip/r
                                 { enabled libdrm ||
                                   die "ERROR: rkmpp requires --enable-libdrm"; }
                               }
 +enabled v4l2_request      && { enabled libdrm ||
 +                               die "ERROR: v4l2-request requires --enable-libdrm"; } &&
 +                             { enabled libudev ||
-+                               die "ERROR: v4l2-request requires --enable-libudev"; }
- enabled vapoursynth       && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init
++                               die "ERROR: v4l2-request requires libudev"; }
+ enabled vapoursynth       && require_headers "vapoursynth/VSScript4.h vapoursynth/VapourSynth4.h"
  
++enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; }
++
++enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } &&
++                    { enabled xlib  || die "ERROR: vout_egl requires xlib"; }
  
-@@ -6880,6 +6892,8 @@ if enabled v4l2_m2m; then
+ if enabled gcrypt; then
+     GCRYPT_CONFIG="${cross_prefix}libgcrypt-config"
+@@ -7197,6 +7235,10 @@ if enabled v4l2_m2m; then
      check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;"
  fi
  
 +check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
 +check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;"
++disable v4l2_req_hevc_vx
++
  check_headers sys/videoio.h
  test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
  
+@@ -7709,6 +7751,9 @@ enabled threads || warn \
+     "that the libraries from this build MUST NOT be used in a multi-threaded"\
+     "environment."
+ 
++# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done
++enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx
++
+ case $target_os in
+ haiku)
+     disable memalign
+diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
+index 733d551fa419..b3a3b00580a2 100644
+--- a/fftools/ffmpeg.h
++++ b/fftools/ffmpeg.h
+@@ -715,6 +715,8 @@ extern enum VideoSyncMethod video_sync_method;
+ extern float frame_drop_threshold;
+ extern int do_benchmark;
+ extern int do_benchmark_all;
++extern int no_cvt_hw;
++extern int do_deinterlace;
+ extern int do_hex_dump;
+ extern int do_pkt_dump;
+ extern int copy_ts;
+diff --git a/fftools/ffmpeg_dec.c b/fftools/ffmpeg_dec.c
+index 2723a0312e92..aee41267fb9c 100644
+--- a/fftools/ffmpeg_dec.c
++++ b/fftools/ffmpeg_dec.c
+@@ -392,7 +392,7 @@ static int video_frame_process(DecoderPriv *dp, AVFrame *frame,
+     }
+ #endif
+ 
+-    if (frame->format == dp->hwaccel_pix_fmt) {
++    if (!no_cvt_hw && frame->format == dp->hwaccel_pix_fmt) {
+         int err = hwaccel_retrieve_data(dp->dec_ctx, frame);
+         if (err < 0)
+             return err;
+@@ -1333,12 +1333,15 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat
+             break;
+ 
+         if (dp->hwaccel_id == HWACCEL_GENERIC ||
+-            dp->hwaccel_id == HWACCEL_AUTO) {
++            dp->hwaccel_id == HWACCEL_AUTO ||
++			no_cvt_hw) {
+             for (int i = 0;; i++) {
+                 config = avcodec_get_hw_config(s->codec, i);
+                 if (!config)
+                     break;
+-                if (!(config->methods &
++                if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL))
++                    av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p);
++                else if (!(config->methods &
+                       AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
+                     continue;
+                 if (config->pix_fmt == *p)
+diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
+index 7ec328e04eaa..de873732958b 100644
+--- a/fftools/ffmpeg_filter.c
++++ b/fftools/ffmpeg_filter.c
+@@ -2782,8 +2782,8 @@ static int send_frame(FilterGraph *fg, FilterGraphThread *fgt,
+         break;
+     case AVMEDIA_TYPE_VIDEO:
+         if (ifp->format != frame->format ||
+-            ifp->width  != frame->width ||
+-            ifp->height != frame->height ||
++            ifp->width  != av_frame_cropped_width(frame) ||
++            ifp->height != av_frame_cropped_height(frame) ||
+             ifp->color_space != frame->colorspace ||
+             ifp->color_range != frame->color_range)
+             need_reinit |= VIDEO_CHANGED;
+@@ -2804,6 +2804,9 @@ static int send_frame(FilterGraph *fg, FilterGraphThread *fgt,
+         (ifp->hw_frames_ctx && ifp->hw_frames_ctx->data != frame->hw_frames_ctx->data))
+         need_reinit |= HWACCEL_CHANGED;
+ 
++    if (no_cvt_hw && fgt->graph)
++        need_reinit = 0;
++
+     if (need_reinit) {
+         ret = ifilter_parameters_from_frame(ifilter, frame);
+         if (ret < 0)
+diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c
+index 5d4c06c28e85..f39bd29b6e4a 100644
+--- a/fftools/ffmpeg_hw.c
++++ b/fftools/ffmpeg_hw.c
+@@ -73,6 +73,8 @@ static char *hw_device_default_name(enum AVHWDeviceType type)
+     char *name;
+     size_t index_pos;
+     int index, index_limit = 1000;
++    if (!type_name)
++        return NULL;
+     index_pos = strlen(type_name);
+     name = av_malloc(index_pos + 4);
+     if (!name)
+diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
+index f639a1cf0aab..9670585821e8 100644
+--- a/fftools/ffmpeg_opt.c
++++ b/fftools/ffmpeg_opt.c
+@@ -62,6 +62,7 @@ enum VideoSyncMethod video_sync_method = VSYNC_AUTO;
+ float frame_drop_threshold = 0;
+ int do_benchmark      = 0;
+ int do_benchmark_all  = 0;
++int no_cvt_hw         = 0;
+ int do_hex_dump       = 0;
+ int do_pkt_dump       = 0;
+ int copy_ts           = 0;
+@@ -1574,8 +1575,11 @@ const OptionDef options[] = {
+     { "benchmark_all",          OPT_TYPE_BOOL, OPT_EXPERT,
+         { &do_benchmark_all },
+       "add timings for each task" },
+-    { "progress",               OPT_TYPE_FUNC, OPT_FUNC_ARG | OPT_EXPERT,
+-        { .func_arg = opt_progress },
++    { "no_cvt_hw",      		OPT_TYPE_BOOL, OPT_EXPERT,
++		{ &no_cvt_hw },
++      "do not auto-convert hw frames to sw" },
++	{ "progress",               OPT_TYPE_FUNC, OPT_FUNC_ARG | OPT_EXPERT,
++		{ .func_arg = opt_progress },
+       "write program-readable progress information", "url" },
+     { "stdin",                  OPT_TYPE_BOOL, OPT_EXPERT,
+         { &stdin_interaction },
 diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index 389253f5d08e..2d440b56486b 100644
+index a4fcce3b4215..2ce880c3db63 100644
 --- a/libavcodec/Makefile
 +++ b/libavcodec/Makefile
-@@ -170,6 +170,8 @@ OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
+@@ -175,7 +175,10 @@ OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
+ OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
  OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
  OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
- OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
+-OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
++OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\
++                                          weak_link.o v4l2_req_dmabufs.o
 +OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\
 +					  v4l2_req_devscan.o weak_link.o
  OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
  OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
  
-@@ -996,6 +998,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)       += dxva2_hevc.o
- OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
+@@ -1025,6 +1028,8 @@ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
+ OBJS-$(CONFIG_HEVC_D3D12VA_HWACCEL)       += dxva2_hevc.o d3d12va_hevc.o
  OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
  OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec.o
-+OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o\
-+                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o
++OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o
++OBJS-$(CONFIG_V4L2_REQ_HEVC_VX)           += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o
  OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
  OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o h265_profile_level.o
- OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
+ OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL)        += vulkan_decode.o vulkan_hevc.o
 diff --git a/libavcodec/hevc-ctrls-v1.h b/libavcodec/hevc-ctrls-v1.h
 new file mode 100644
 index 000000000000..72cbba0953dc
@@ -5849,13997 +819,6 @@ index 000000000000..7cbbbf055f47
 +};
 +
 +#endif
-diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
-index 4ee564f3e028..e892436f9405 100644
---- a/libavcodec/hevcdec.c
-+++ b/libavcodec/hevcdec.c
-@@ -416,6 +416,7 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
- #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
-                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
-                      CONFIG_HEVC_NVDEC_HWACCEL + \
-+                     CONFIG_HEVC_V4L2REQUEST_HWACCEL + \
-                      CONFIG_HEVC_VAAPI_HWACCEL + \
-                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
-                      CONFIG_HEVC_VDPAU_HWACCEL)
-@@ -442,6 +443,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
- #endif
- #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
-         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
-+#endif
-+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
-+        *fmt++ = AV_PIX_FMT_DRM_PRIME;
- #endif
-         break;
-     case AV_PIX_FMT_YUV420P10:
-@@ -463,6 +467,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
- #endif
- #if CONFIG_HEVC_NVDEC_HWACCEL
-         *fmt++ = AV_PIX_FMT_CUDA;
-+#endif
-+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
-+        *fmt++ = AV_PIX_FMT_DRM_PRIME;
- #endif
-         break;
-     case AV_PIX_FMT_YUV444P:
-@@ -3752,6 +3759,9 @@ const FFCodec ff_hevc_decoder = {
- #if CONFIG_HEVC_NVDEC_HWACCEL
-                                HWACCEL_NVDEC(hevc),
- #endif
-+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
-+                               HWACCEL_V4L2REQUEST(hevc),
-+#endif
- #if CONFIG_HEVC_VAAPI_HWACCEL
-                                HWACCEL_VAAPI(hevc),
- #endif
-diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
-index aca55831f32f..f32d1c4ec4f8 100644
---- a/libavcodec/hwaccels.h
-+++ b/libavcodec/hwaccels.h
-@@ -40,6 +40,7 @@ extern const AVHWAccel ff_hevc_d3d11va_hwaccel;
- extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
- extern const AVHWAccel ff_hevc_dxva2_hwaccel;
- extern const AVHWAccel ff_hevc_nvdec_hwaccel;
-+extern const AVHWAccel ff_hevc_v4l2request_hwaccel;
- extern const AVHWAccel ff_hevc_vaapi_hwaccel;
- extern const AVHWAccel ff_hevc_vdpau_hwaccel;
- extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
-diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
-index c43ad55245ac..b8aa38307169 100644
---- a/libavcodec/hwconfig.h
-+++ b/libavcodec/hwconfig.h
-@@ -71,6 +71,8 @@ typedef struct AVCodecHWConfigInternal {
-     HW_CONFIG_HWACCEL(1, 1, 0, D3D11,        D3D11VA,      ff_ ## codec ## _d3d11va2_hwaccel)
- #define HWACCEL_NVDEC(codec) \
-     HW_CONFIG_HWACCEL(1, 1, 0, CUDA,         CUDA,         ff_ ## codec ## _nvdec_hwaccel)
-+#define HWACCEL_V4L2REQUEST(codec) \
-+    HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME,    DRM,          ff_ ## codec ## _v4l2request_hwaccel)
- #define HWACCEL_VAAPI(codec) \
-     HW_CONFIG_HWACCEL(1, 1, 1, VAAPI,        VAAPI,        ff_ ## codec ## _vaapi_hwaccel)
- #define HWACCEL_VDPAU(codec) \
-diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c
-new file mode 100644
-index 000000000000..5b3fb958fac3
---- /dev/null
-+++ b/libavcodec/v4l2_req_decode_q.c
-@@ -0,0 +1,84 @@
-+#include <memory.h>
-+#include <semaphore.h>
-+#include <pthread.h>
-+
-+#include "v4l2_req_decode_q.h"
-+
-+int decode_q_in_q(const req_decode_ent * const d)
-+{
-+    return d->in_q;
-+}
-+
-+void decode_q_add(req_decode_q * const q, req_decode_ent * const d)
-+{
-+    pthread_mutex_lock(&q->q_lock);
-+    if (!q->head) {
-+        q->head = d;
-+        q->tail = d;
-+        d->prev = NULL;
-+    }
-+    else {
-+        q->tail->next = d;
-+        d->prev = q->tail;
-+        q->tail = d;
-+    }
-+    d->next = NULL;
-+    d->in_q = 1;
-+    pthread_mutex_unlock(&q->q_lock);
-+}
-+
-+// Remove entry from Q - if head wake-up anything that was waiting
-+void decode_q_remove(req_decode_q * const q, req_decode_ent * const d)
-+{
-+    int try_signal = 0;
-+
-+    if (!d->in_q)
-+        return;
-+
-+    pthread_mutex_lock(&q->q_lock);
-+    if (d->prev)
-+        d->prev->next = d->next;
-+    else {
-+        try_signal = 1;  // Only need to signal if we were head
-+        q->head = d->next;
-+    }
-+
-+    if (d->next)
-+        d->next->prev = d->prev;
-+    else
-+        q->tail = d->prev;
-+
-+    // Not strictly needed but makes debug easier
-+    d->next = NULL;
-+    d->prev = NULL;
-+    d->in_q = 0;
-+    pthread_mutex_unlock(&q->q_lock);
-+
-+    if (try_signal)
-+        pthread_cond_broadcast(&q->q_cond);
-+}
-+
-+void decode_q_wait(req_decode_q * const q, req_decode_ent * const d)
-+{
-+    pthread_mutex_lock(&q->q_lock);
-+
-+    while (q->head != d)
-+        pthread_cond_wait(&q->q_cond, &q->q_lock);
-+
-+    pthread_mutex_unlock(&q->q_lock);
-+}
-+
-+void decode_q_uninit(req_decode_q * const q)
-+{
-+    pthread_mutex_destroy(&q->q_lock);
-+    pthread_cond_destroy(&q->q_cond);
-+}
-+
-+void decode_q_init(req_decode_q * const q)
-+{
-+    memset(q, 0, sizeof(*q));
-+    pthread_mutex_init(&q->q_lock, NULL);
-+    pthread_cond_init(&q->q_cond, NULL);
-+}
-+
-+
-diff --git a/libavcodec/v4l2_req_decode_q.h b/libavcodec/v4l2_req_decode_q.h
-new file mode 100644
-index 000000000000..af7bbe1de462
---- /dev/null
-+++ b/libavcodec/v4l2_req_decode_q.h
-@@ -0,0 +1,25 @@
-+#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H
-+#define AVCODEC_V4L2_REQ_DECODE_Q_H
-+
-+typedef struct req_decode_ent {
-+    struct req_decode_ent * next;
-+    struct req_decode_ent * prev;
-+    int in_q;
-+} req_decode_ent;
-+
-+typedef struct req_decode_q {
-+    pthread_mutex_t q_lock;
-+    pthread_cond_t q_cond;
-+    req_decode_ent * head;
-+    req_decode_ent * tail;
-+} req_decode_q;
-+
-+int decode_q_in_q(const req_decode_ent * const d);
-+void decode_q_add(req_decode_q * const q, req_decode_ent * const d);
-+void decode_q_remove(req_decode_q * const q, req_decode_ent * const d);
-+void decode_q_wait(req_decode_q * const q, req_decode_ent * const d);
-+void decode_q_uninit(req_decode_q * const q);
-+void decode_q_init(req_decode_q * const q);
-+
-+#endif
-+
-diff --git a/libavcodec/v4l2_req_devscan.c b/libavcodec/v4l2_req_devscan.c
-new file mode 100644
-index 000000000000..cfa94d55c49b
---- /dev/null
-+++ b/libavcodec/v4l2_req_devscan.c
-@@ -0,0 +1,449 @@
-+#include <errno.h>
-+#include <fcntl.h>
-+#include <libudev.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <unistd.h>
-+
-+#include <sys/ioctl.h>
-+#include <sys/sysmacros.h>
-+
-+#include <linux/media.h>
-+#include <linux/videodev2.h>
-+
-+#include "v4l2_req_devscan.h"
-+#include "v4l2_req_utils.h"
-+
-+struct decdev {
-+    enum v4l2_buf_type src_type;
-+    uint32_t src_fmt_v4l2;
-+    const char * vname;
-+    const char * mname;
-+};
-+
-+struct devscan {
-+    struct decdev env;
-+    unsigned int dev_size;
-+    unsigned int dev_count;
-+    struct decdev *devs;
-+};
-+
-+static int video_src_pixfmt_supported(uint32_t fmt)
-+{
-+    return 1;
-+}
-+
-+static void v4l2_setup_format(struct v4l2_format *format, unsigned int type,
-+                  unsigned int width, unsigned int height,
-+                  unsigned int pixelformat)
-+{
-+    unsigned int sizeimage;
-+
-+    memset(format, 0, sizeof(*format));
-+    format->type = type;
-+
-+    sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0;
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
-+        format->fmt.pix_mp.width = width;
-+        format->fmt.pix_mp.height = height;
-+        format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage;
-+        format->fmt.pix_mp.pixelformat = pixelformat;
-+    } else {
-+        format->fmt.pix.width = width;
-+        format->fmt.pix.height = height;
-+        format->fmt.pix.sizeimage = sizeimage;
-+        format->fmt.pix.pixelformat = pixelformat;
-+    }
-+}
-+
-+static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat,
-+            unsigned int width, unsigned int height)
-+{
-+    struct v4l2_format format;
-+
-+    v4l2_setup_format(&format, type, width, height, pixelformat);
-+
-+    return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0;
-+}
-+
-+static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities)
-+{
-+    struct v4l2_capability capability = { 0 };
-+    int rc;
-+
-+    rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability);
-+    if (rc < 0)
-+        return -errno;
-+
-+    if (capabilities != NULL) {
-+        if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0)
-+            *capabilities = capability.device_caps;
-+        else
-+            *capabilities = capability.capabilities;
-+    }
-+
-+    return 0;
-+}
-+
-+static int devscan_add(struct devscan *const scan,
-+                       enum v4l2_buf_type src_type,
-+                       uint32_t src_fmt_v4l2,
-+                       const char * vname,
-+                       const char * mname)
-+{
-+    struct decdev *d;
-+
-+    if (scan->dev_size <= scan->dev_count) {
-+        unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2;
-+        d = realloc(scan->devs, n * sizeof(*d));
-+        if (!d)
-+            return -ENOMEM;
-+        scan->devs = d;
-+        scan->dev_size = n;
-+    }
-+
-+    d = scan->devs + scan->dev_count;
-+    d->src_type = src_type;
-+    d->src_fmt_v4l2 = src_fmt_v4l2;
-+    d->vname = strdup(vname);
-+    if (!d->vname)
-+        return -ENOMEM;
-+    d->mname = strdup(mname);
-+    if (!d->mname) {
-+        free((char *)d->vname);
-+        return -ENOMEM;
-+    }
-+    ++scan->dev_count;
-+    return 0;
-+}
-+
-+void devscan_delete(struct devscan **const pScan)
-+{
-+    unsigned int i;
-+    struct devscan * const scan = *pScan;
-+
-+    if (!scan)
-+        return;
-+    *pScan = NULL;
-+
-+    for (i = 0; i < scan->dev_count; ++i) {
-+        free((char*)scan->devs[i].mname);
-+        free((char*)scan->devs[i].vname);
-+    }
-+    free(scan->devs);
-+    free(scan);
-+}
-+
-+#define REQ_BUF_CAPS (\
-+    V4L2_BUF_CAP_SUPPORTS_DMABUF |\
-+    V4L2_BUF_CAP_SUPPORTS_REQUESTS |\
-+    V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
-+
-+static void probe_formats(void * const dc,
-+              struct devscan *const scan,
-+              const int fd,
-+              const unsigned int type_v4l2,
-+              const char *const mpath,
-+              const char *const vpath)
-+{
-+    unsigned int i;
-+    for (i = 0;; ++i) {
-+        struct v4l2_fmtdesc fmtdesc = {
-+            .index = i,
-+            .type = type_v4l2
-+        };
-+        struct v4l2_requestbuffers rbufs = {
-+            .count = 0,
-+            .type = type_v4l2,
-+            .memory = V4L2_MEMORY_MMAP
-+        };
-+        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
-+            if (errno == EINTR)
-+                continue;
-+            if (errno != EINVAL)
-+                request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2);
-+            return;
-+        }
-+        if (!video_src_pixfmt_supported(fmtdesc.pixelformat))
-+            continue;
-+
-+        if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) {
-+            request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat);
-+            continue;
-+        }
-+
-+        while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) {
-+            if (errno != EINTR) {
-+                request_debug(dc, "%s: Reqbufs failed\n", vpath);
-+                continue;
-+            }
-+        }
-+
-+        if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) {
-+            request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities);
-+            continue;
-+        }
-+
-+        request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n",
-+                 mpath, vpath, fmtdesc.pixelformat, type_v4l2);
-+        devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath);
-+    }
-+}
-+
-+
-+static int probe_video_device(void * const dc,
-+                   struct udev_device *const device,
-+                   struct devscan *const scan,
-+                   const char *const mpath)
-+{
-+    int ret;
-+    unsigned int capabilities = 0;
-+    int video_fd = -1;
-+
-+    const char *path = udev_device_get_devnode(device);
-+    if (!path) {
-+        request_err(dc, "%s: get video device devnode failed\n", __func__);
-+        ret = -EINVAL;
-+        goto fail;
-+    }
-+
-+    video_fd = open(path, O_RDWR, 0);
-+    if (video_fd == -1) {
-+        ret = -errno;
-+        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno);
-+        goto fail;
-+    }
-+
-+    ret = v4l2_query_capabilities(video_fd, &capabilities);
-+    if (ret < 0) {
-+        request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret);
-+        goto fail;
-+    }
-+
-+    request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities);
-+
-+    if (!(capabilities & V4L2_CAP_STREAMING)) {
-+        request_debug(dc, "%s: missing required streaming capability\n", __func__);
-+        ret = -EINVAL;
-+        goto fail;
-+    }
-+
-+    if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) {
-+        request_debug(dc, "%s: missing required mem2mem capability\n", __func__);
-+        ret = -EINVAL;
-+        goto fail;
-+    }
-+
-+    /* Should check capture formats too... */
-+    if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0)
-+        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path);
-+    if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0)
-+        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path);
-+
-+    close(video_fd);
-+    return 0;
-+
-+fail:
-+    if (video_fd >= 0)
-+        close(video_fd);
-+    return ret;
-+}
-+
-+static int probe_media_device(void * const dc,
-+                   struct udev_device *const device,
-+                   struct devscan *const scan)
-+{
-+    int ret;
-+    int rv;
-+    struct media_device_info device_info = { 0 };
-+    struct media_v2_topology topology = { 0 };
-+    struct media_v2_interface *interfaces = NULL;
-+    struct udev *udev = udev_device_get_udev(device);
-+    struct udev_device *video_device;
-+    dev_t devnum;
-+    int media_fd = -1;
-+
-+    const char *path = udev_device_get_devnode(device);
-+    if (!path) {
-+        request_err(dc, "%s: get media device devnode failed\n", __func__);
-+        ret = -EINVAL;
-+        goto fail;
-+    }
-+
-+    media_fd = open(path, O_RDWR, 0);
-+    if (media_fd < 0) {
-+        ret = -errno;
-+        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret);
-+        goto fail;
-+    }
-+
-+    rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info);
-+    if (rv < 0) {
-+        ret = -errno;
-+        request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret);
-+        goto fail;
-+    }
-+
-+    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
-+    if (rv < 0) {
-+        ret = -errno;
-+        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
-+        goto fail;
-+    }
-+
-+    if (topology.num_interfaces <= 0) {
-+        request_err(dc, "%s: media device has no interfaces\n", __func__);
-+        ret = -EINVAL;
-+        goto fail;
-+    }
-+
-+    interfaces = calloc(topology.num_interfaces, sizeof(*interfaces));
-+    if (!interfaces) {
-+        request_err(dc, "%s: allocating media interface struct failed\n", __func__);
-+        ret = -ENOMEM;
-+        goto fail;
-+    }
-+
-+    topology.ptr_interfaces = (__u64)(uintptr_t)interfaces;
-+    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
-+    if (rv < 0) {
-+        ret = -errno;
-+        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
-+        goto fail;
-+    }
-+
-+    for (int i = 0; i < topology.num_interfaces; i++) {
-+        if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO)
-+            continue;
-+
-+        devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor);
-+        video_device = udev_device_new_from_devnum(udev, 'c', devnum);
-+        if (!video_device) {
-+            ret = -errno;
-+            request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device);
-+            continue;
-+        }
-+
-+        ret = probe_video_device(dc, video_device, scan, path);
-+        udev_device_unref(video_device);
-+
-+        if (ret != 0)
-+            goto fail;
-+    }
-+
-+fail:
-+    free(interfaces);
-+    if (media_fd != -1)
-+        close(media_fd);
-+    return ret;
-+}
-+
-+const char *decdev_media_path(const struct decdev *const dev)
-+{
-+    return !dev ? NULL : dev->mname;
-+}
-+
-+const char *decdev_video_path(const struct decdev *const dev)
-+{
-+    return !dev ? NULL : dev->vname;
-+}
-+
-+enum v4l2_buf_type decdev_src_type(const struct decdev *const dev)
-+{
-+    return !dev ? 0 : dev->src_type;
-+}
-+
-+uint32_t decdev_src_pixelformat(const struct decdev *const dev)
-+{
-+    return !dev ? 0 : dev->src_fmt_v4l2;
-+}
-+
-+
-+const struct decdev *devscan_find(struct devscan *const scan,
-+                  const uint32_t src_fmt_v4l2)
-+{
-+    unsigned int i;
-+
-+    if (scan->env.mname && scan->env.vname)
-+        return &scan->env;
-+
-+    if (!src_fmt_v4l2)
-+        return scan->dev_count ? scan->devs + 0 : NULL;
-+
-+    for (i = 0; i != scan->dev_count; ++i) {
-+        if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2)
-+            return scan->devs + i;
-+    }
-+    return NULL;
-+}
-+
-+int devscan_build(void * const dc, struct devscan **pscan)
-+{
-+    int ret;
-+    struct udev *udev;
-+    struct udev_enumerate *enumerate;
-+    struct udev_list_entry *devices;
-+    struct udev_list_entry *entry;
-+    struct udev_device *device;
-+    struct devscan * scan;
-+
-+    *pscan = NULL;
-+
-+    scan = calloc(1, sizeof(*scan));
-+    if (!scan) {
-+        ret = -ENOMEM;
-+        goto fail;
-+    }
-+
-+    scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH");
-+    scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH");
-+    if (scan->env.mname && scan->env.vname) {
-+        request_info(dc, "Media/video device env overrides found: %s,%s\n",
-+                 scan->env.mname, scan->env.vname);
-+        *pscan = scan;
-+        return 0;
-+    }
-+
-+    udev = udev_new();
-+    if (!udev) {
-+        request_err(dc, "%s: allocating udev context failed\n", __func__);
-+        ret = -ENOMEM;
-+        goto fail;
-+    }
-+
-+    enumerate = udev_enumerate_new(udev);
-+    if (!enumerate) {
-+        request_err(dc, "%s: allocating udev enumerator failed\n", __func__);
-+        ret = -ENOMEM;
-+        goto fail;
-+    }
-+
-+    udev_enumerate_add_match_subsystem(enumerate, "media");
-+    udev_enumerate_scan_devices(enumerate);
-+
-+    devices = udev_enumerate_get_list_entry(enumerate);
-+    udev_list_entry_foreach(entry, devices) {
-+        const char *path = udev_list_entry_get_name(entry);
-+        if (!path)
-+            continue;
-+
-+        device = udev_device_new_from_syspath(udev, path);
-+        if (!device)
-+            continue;
-+
-+        probe_media_device(dc, device, scan);
-+        udev_device_unref(device);
-+    }
-+
-+    udev_enumerate_unref(enumerate);
-+
-+    *pscan = scan;
-+    return 0;
-+
-+fail:
-+    udev_unref(udev);
-+    devscan_delete(&scan);
-+    return ret;
-+}
-+
-diff --git a/libavcodec/v4l2_req_devscan.h b/libavcodec/v4l2_req_devscan.h
-new file mode 100644
-index 000000000000..956d9234f11f
---- /dev/null
-+++ b/libavcodec/v4l2_req_devscan.h
-@@ -0,0 +1,23 @@
-+#ifndef _DEVSCAN_H_
-+#define _DEVSCAN_H_
-+
-+#include <stdint.h>
-+
-+struct devscan;
-+struct decdev;
-+enum v4l2_buf_type;
-+
-+/* These return pointers to data in the devscan structure and so are vaild
-+ * for the lifetime of that
-+ */
-+const char *decdev_media_path(const struct decdev *const dev);
-+const char *decdev_video_path(const struct decdev *const dev);
-+enum v4l2_buf_type decdev_src_type(const struct decdev *const dev);
-+uint32_t decdev_src_pixelformat(const struct decdev *const dev);
-+
-+const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2);
-+
-+int devscan_build(void * const dc, struct devscan **pscan);
-+void devscan_delete(struct devscan **const pScan);
-+
-+#endif
-diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
-new file mode 100644
-index 000000000000..ae6c64836972
---- /dev/null
-+++ b/libavcodec/v4l2_req_dmabufs.c
-@@ -0,0 +1,266 @@
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <unistd.h>
-+#include <inttypes.h>
-+#include <fcntl.h>
-+#include <errno.h>
-+#include <string.h>
-+#include <sys/ioctl.h>
-+#include <sys/mman.h>
-+#include <linux/mman.h>
-+#include <linux/dma-buf.h>
-+#include <linux/dma-heap.h>
-+
-+#include "v4l2_req_dmabufs.h"
-+#include "v4l2_req_utils.h"
-+
-+#define DMABUF_NAME1  "/dev/dma_heap/linux,cma"
-+#define DMABUF_NAME2  "/dev/dma_heap/reserved"
-+
-+#define TRACE_ALLOC 0
-+
-+struct dmabufs_ctl {
-+    int fd;
-+    size_t page_size;
-+};
-+
-+struct dmabuf_h {
-+    int fd;
-+    size_t size;
-+    size_t len;
-+    void * mapptr;
-+};
-+
-+#if TRACE_ALLOC
-+static unsigned int total_bufs = 0;
-+static size_t total_size = 0;
-+#endif
-+
-+struct dmabuf_h * dmabuf_import(int fd, size_t size)
-+{
-+    struct dmabuf_h *dh;
-+
-+    fd = dup(fd);
-+    if (fd < 0  || size == 0)
-+        return NULL;
-+
-+    dh = malloc(sizeof(*dh));
-+    if (!dh) {
-+        close(fd);
-+        return NULL;
-+    }
-+
-+    *dh = (struct dmabuf_h) {
-+        .fd = fd,
-+        .size = size,
-+        .mapptr = MAP_FAILED
-+    };
-+
-+#if TRACE_ALLOC
-+    ++total_bufs;
-+    total_size += dh->size;
-+    request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
-+#endif
-+
-+    return dh;
-+}
-+
-+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size)
-+{
-+    struct dmabuf_h * dh;
-+    struct dma_heap_allocation_data data = {
-+        .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
-+        .fd = 0,
-+        .fd_flags = O_RDWR,
-+        .heap_flags = 0
-+    };
-+
-+    if (old != NULL) {
-+        if (old->size == data.len) {
-+            return old;
-+        }
-+        dmabuf_free(old);
-+    }
-+
-+    if (size == 0 ||
-+        (dh = malloc(sizeof(*dh))) == NULL)
-+        return NULL;
-+
-+    while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
-+        int err = errno;
-+        request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
-+                (uint64_t)data.len,
-+                dbsc->fd,
-+                err,
-+                strerror(err));
-+        if (err == EINTR)
-+            continue;
-+        goto fail;
-+    }
-+
-+    *dh = (struct dmabuf_h){
-+        .fd = data.fd,
-+        .size = (size_t)data.len,
-+        .mapptr = MAP_FAILED
-+    };
-+
-+#if TRACE_ALLOC
-+    ++total_bufs;
-+    total_size += dh->size;
-+    request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
-+#endif
-+
-+    return dh;
-+
-+fail:
-+    free(dh);
-+    return NULL;
-+}
-+
-+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags)
-+{
-+    struct dma_buf_sync sync = {
-+        .flags = flags
-+    };
-+    while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
-+        const int err = errno;
-+        if (errno == EINTR)
-+            continue;
-+        request_log("%s: ioctl failed: flags=%#x\n", __func__, flags);
-+        return -err;
-+    }
-+    return 0;
-+}
-+
-+int dmabuf_write_start(struct dmabuf_h * const dh)
-+{
-+    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
-+}
-+
-+int dmabuf_write_end(struct dmabuf_h * const dh)
-+{
-+    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
-+}
-+
-+int dmabuf_read_start(struct dmabuf_h * const dh)
-+{
-+    if (!dmabuf_map(dh))
-+        return -1;
-+    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
-+}
-+
-+int dmabuf_read_end(struct dmabuf_h * const dh)
-+{
-+    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
-+}
-+
-+
-+void * dmabuf_map(struct dmabuf_h * const dh)
-+{
-+    if (!dh)
-+        return NULL;
-+    if (dh->mapptr != MAP_FAILED)
-+        return dh->mapptr;
-+    dh->mapptr = mmap(NULL, dh->size,
-+              PROT_READ | PROT_WRITE,
-+              MAP_SHARED | MAP_POPULATE,
-+              dh->fd, 0);
-+    if (dh->mapptr == MAP_FAILED) {
-+        request_log("%s: Map failed\n", __func__);
-+        return NULL;
-+    }
-+    return dh->mapptr;
-+}
-+
-+int dmabuf_fd(const struct dmabuf_h * const dh)
-+{
-+    if (!dh)
-+        return -1;
-+    return dh->fd;
-+}
-+
-+size_t dmabuf_size(const struct dmabuf_h * const dh)
-+{
-+    if (!dh)
-+        return 0;
-+    return dh->size;
-+}
-+
-+size_t dmabuf_len(const struct dmabuf_h * const dh)
-+{
-+    if (!dh)
-+        return 0;
-+    return dh->len;
-+}
-+
-+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len)
-+{
-+    dh->len = len;
-+}
-+
-+
-+
-+void dmabuf_free(struct dmabuf_h * dh)
-+{
-+    if (!dh)
-+        return;
-+
-+#if TRACE_ALLOC
-+    --total_bufs;
-+    total_size -= dh->size;
-+    request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
-+#endif
-+
-+    if (dh->mapptr != MAP_FAILED)
-+        munmap(dh->mapptr, dh->size);
-+    while (close(dh->fd) == -1 && errno == EINTR)
-+        /* loop */;
-+    free(dh);
-+}
-+
-+struct dmabufs_ctl * dmabufs_ctl_new(void)
-+{
-+    struct dmabufs_ctl * dbsc = malloc(sizeof(*dbsc));
-+
-+    if (!dbsc)
-+        return NULL;
-+
-+    while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 &&
-+           errno == EINTR)
-+        /* Loop */;
-+
-+    if (dbsc->fd == -1) {
-+        while ((dbsc->fd = open(DMABUF_NAME2, O_RDWR)) == -1 &&
-+               errno == EINTR)
-+            /* Loop */;
-+        if (dbsc->fd == -1) {
-+            request_log("Unable to open either %s or %s\n",
-+                    DMABUF_NAME1, DMABUF_NAME2);
-+            goto fail;
-+        }
-+    }
-+
-+    dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
-+
-+    return dbsc;
-+
-+fail:
-+    free(dbsc);
-+    return NULL;
-+}
-+
-+void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc)
-+{
-+    struct dmabufs_ctl * const dbsc = *pDbsc;
-+
-+    if (!dbsc)
-+        return;
-+    *pDbsc = NULL;
-+
-+    while (close(dbsc->fd) == -1 && errno == EINTR)
-+        /* loop */;
-+
-+    free(dbsc);
-+}
-+
-+
-diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h
-new file mode 100644
-index 000000000000..cfb17e801d59
---- /dev/null
-+++ b/libavcodec/v4l2_req_dmabufs.h
-@@ -0,0 +1,40 @@
-+#ifndef DMABUFS_H
-+#define DMABUFS_H
-+
-+#include <stddef.h>
-+
-+struct dmabufs_ctl;
-+struct dmabuf_h;
-+
-+struct dmabufs_ctl * dmabufs_ctl_new(void);
-+void dmabufs_ctl_delete(struct dmabufs_ctl ** const pdbsc);
-+
-+// Need not preserve old contents
-+// On NULL return old buffer is freed
-+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size);
-+
-+static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) {
-+    return dmabuf_realloc(dbsc, NULL, size);
-+}
-+/* Create from existing fd - dups(fd) */
-+struct dmabuf_h * dmabuf_import(int fd, size_t size);
-+void * dmabuf_map(struct dmabuf_h * const dh);
-+
-+/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */
-+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags);
-+
-+int dmabuf_write_start(struct dmabuf_h * const dh);
-+int dmabuf_write_end(struct dmabuf_h * const dh);
-+int dmabuf_read_start(struct dmabuf_h * const dh);
-+int dmabuf_read_end(struct dmabuf_h * const dh);
-+
-+int dmabuf_fd(const struct dmabuf_h * const dh);
-+/* Allocated size */
-+size_t dmabuf_size(const struct dmabuf_h * const dh);
-+/* Bytes in use */
-+size_t dmabuf_len(const struct dmabuf_h * const dh);
-+/* Set bytes in use */
-+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len);
-+void dmabuf_free(struct dmabuf_h * dh);
-+
-+#endif
-diff --git a/libavcodec/v4l2_req_hevc_v1.c b/libavcodec/v4l2_req_hevc_v1.c
-new file mode 100644
-index 000000000000..169b532832e0
---- /dev/null
-+++ b/libavcodec/v4l2_req_hevc_v1.c
-@@ -0,0 +1,3 @@
-+#define HEVC_CTRLS_VERSION 1
-+#include "v4l2_req_hevc_vx.c"
-+
-diff --git a/libavcodec/v4l2_req_hevc_v2.c b/libavcodec/v4l2_req_hevc_v2.c
-new file mode 100644
-index 000000000000..42af98e1569b
---- /dev/null
-+++ b/libavcodec/v4l2_req_hevc_v2.c
-@@ -0,0 +1,3 @@
-+#define HEVC_CTRLS_VERSION 2
-+#include "v4l2_req_hevc_vx.c"
-+
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-new file mode 100644
-index 000000000000..0ae03b10c4a8
---- /dev/null
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -0,0 +1,1213 @@
-+// File included by v4l2_req_hevc_v* - not compiled on its own
-+
-+#include "decode.h"
-+#include "hevcdec.h"
-+#include "hwconfig.h"
-+#include "internal.h"
-+#include "thread.h"
-+
-+#include "v4l2_request_hevc.h"
-+
-+#if HEVC_CTRLS_VERSION == 1
-+#include "hevc-ctrls-v1.h"
-+
-+// Fixup renamed entries
-+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT
-+
-+#elif HEVC_CTRLS_VERSION == 2
-+#include "hevc-ctrls-v2.h"
-+#else
-+#error Unknown HEVC_CTRLS_VERSION
-+#endif
-+
-+#include "libavutil/hwcontext_drm.h"
-+
-+#include <semaphore.h>
-+#include <pthread.h>
-+
-+#include "v4l2_req_devscan.h"
-+#include "v4l2_req_dmabufs.h"
-+#include "v4l2_req_pollqueue.h"
-+#include "v4l2_req_media.h"
-+#include "v4l2_req_utils.h"
-+
-+// Attached to buf[0] in frame
-+// Pooled in hwcontext so generally create once - 1/frame
-+typedef struct V4L2MediaReqDescriptor {
-+    AVDRMFrameDescriptor drm;
-+
-+    // Media
-+    uint64_t timestamp;
-+    struct qent_dst * qe_dst;
-+
-+    // Decode only - should be NULL by the time we emit the frame
-+    struct req_decode_ent decode_ent;
-+
-+    struct media_request *req;
-+    struct qent_src *qe_src;
-+
-+#if HEVC_CTRLS_VERSION >= 2
-+    struct v4l2_ctrl_hevc_decode_params dec;
-+#endif
-+
-+    size_t num_slices;
-+    size_t alloced_slices;
-+    struct v4l2_ctrl_hevc_slice_params * slice_params;
-+    struct slice_info * slices;
-+
-+} V4L2MediaReqDescriptor;
-+
-+struct slice_info {
-+    const uint8_t * ptr;
-+    size_t len; // bytes
-+};
-+
-+// Handy container for accumulating controls before setting
-+struct req_controls {
-+    int has_scaling;
-+    struct timeval tv;
-+    struct v4l2_ctrl_hevc_sps sps;
-+    struct v4l2_ctrl_hevc_pps pps;
-+    struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
-+};
-+
-+//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 };
-+
-+
-+// Get an FFmpeg format from the v4l2 format
-+static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format)
-+{
-+    switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
-+            format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) {
-+    case V4L2_PIX_FMT_YUV420:
-+        return AV_PIX_FMT_YUV420P;
-+    case V4L2_PIX_FMT_NV12:
-+        return AV_PIX_FMT_NV12;
-+#if CONFIG_SAND
-+    case V4L2_PIX_FMT_NV12_COL128:
-+        return AV_PIX_FMT_RPI4_8;
-+    case V4L2_PIX_FMT_NV12_10_COL128:
-+        return AV_PIX_FMT_RPI4_10;
-+#endif
-+    default:
-+        break;
-+    }
-+    return AV_PIX_FMT_NONE;
-+}
-+
-+static inline uint64_t frame_capture_dpb(const AVFrame * const frame)
-+{
-+    const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
-+    return rd->timestamp;
-+}
-+
-+static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp)
-+{
-+    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
-+    rd->timestamp = dpb_stamp;
-+}
-+
-+static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table)
-+{
-+    int32_t luma_weight_denom, chroma_weight_denom;
-+    const SliceHeader *sh = &h->sh;
-+
-+    if (sh->slice_type == HEVC_SLICE_I ||
-+        (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) ||
-+        (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag))
-+        return;
-+
-+    table->luma_log2_weight_denom = sh->luma_log2_weight_denom;
-+
-+    if (h->ps.sps->chroma_format_idc)
-+        table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom;
-+
-+    luma_weight_denom = (1 << sh->luma_log2_weight_denom);
-+    chroma_weight_denom = (1 << sh->chroma_log2_weight_denom);
-+
-+    for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) {
-+        table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom;
-+        table->luma_offset_l0[i] = sh->luma_offset_l0[i];
-+        table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom;
-+        table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom;
-+        table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0];
-+        table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1];
-+    }
-+
-+    if (sh->slice_type != HEVC_SLICE_B)
-+        return;
-+
-+    for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) {
-+        table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom;
-+        table->luma_offset_l1[i] = sh->luma_offset_l1[i];
-+        table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom;
-+        table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom;
-+        table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0];
-+        table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1];
-+    }
-+}
-+
-+static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp)
-+{
-+    const HEVCFrame *frame;
-+    int i;
-+
-+    for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
-+        frame = h->rps[ST_CURR_BEF].ref[i];
-+        if (frame && timestamp == frame_capture_dpb(frame->frame))
-+            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE;
-+    }
-+
-+    for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
-+        frame = h->rps[ST_CURR_AFT].ref[i];
-+        if (frame && timestamp == frame_capture_dpb(frame->frame))
-+            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER;
-+    }
-+
-+    for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
-+        frame = h->rps[LT_CURR].ref[i];
-+        if (frame && timestamp == frame_capture_dpb(frame->frame))
-+            return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR;
-+    }
-+
-+    return 0;
-+}
-+
-+static unsigned int
-+get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame,
-+                  const struct v4l2_hevc_dpb_entry * const entries,
-+                  const unsigned int num_entries)
-+{
-+    uint64_t timestamp;
-+
-+    if (!frame)
-+        return 0;
-+
-+    timestamp = frame_capture_dpb(frame->frame);
-+
-+    for (unsigned int i = 0; i < num_entries; i++) {
-+        if (entries[i].timestamp == timestamp)
-+            return i;
-+    }
-+
-+    return 0;
-+}
-+
-+static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx)
-+{
-+    unsigned int z = 0;
-+    while (idx--) {
-+        if (*b++ == 0) {
-+            ++z;
-+            if (z >= 2 && *b == 3) {
-+                ++b;
-+                z = 0;
-+            }
-+        }
-+        else {
-+            z = 0;
-+        }
-+    }
-+    return b;
-+}
-+
-+static int slice_add(V4L2MediaReqDescriptor * const rd)
-+{
-+    if (rd->num_slices >= rd->alloced_slices) {
-+        struct v4l2_ctrl_hevc_slice_params * p2;
-+        struct slice_info * s2;
-+        size_t n2 = rd->num_slices == 0 ? 8 : rd->num_slices * 2;
-+
-+        p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2));
-+        if (p2 == NULL)
-+            return AVERROR(ENOMEM);
-+        rd->slice_params = p2;
-+
-+        s2 = av_realloc_array(rd->slices, n2, sizeof(*s2));
-+        if (s2 == NULL)
-+            return AVERROR(ENOMEM);
-+        rd->slices = s2;
-+
-+        rd->alloced_slices = n2;
-+    }
-+    ++rd->num_slices;
-+    return 0;
-+}
-+
-+static unsigned int
-+fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries)
-+{
-+    unsigned int i;
-+    unsigned int n = 0;
-+    const HEVCFrame * const pic = h->ref;
-+
-+    for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
-+        const HEVCFrame * const frame = &h->DPB[i];
-+        if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) {
-+            struct v4l2_hevc_dpb_entry * const entry = entries + n++;
-+
-+            entry->timestamp = frame_capture_dpb(frame->frame);
-+            entry->rps = find_frame_rps_type(h, entry->timestamp);
-+            entry->field_pic = frame->frame->interlaced_frame;
-+
-+            /* TODO: Interleaved: Get the POC for each field. */
-+            entry->pic_order_cnt[0] = frame->poc;
-+            entry->pic_order_cnt[1] = frame->poc;
-+        }
-+    }
-+    return n;
-+}
-+
-+static void fill_slice_params(const HEVCContext * const h,
-+#if HEVC_CTRLS_VERSION >= 2
-+                              const struct v4l2_ctrl_hevc_decode_params * const dec,
-+#endif
-+                              struct v4l2_ctrl_hevc_slice_params *slice_params,
-+                              uint32_t bit_size, uint32_t bit_offset)
-+{
-+    const SliceHeader * const sh = &h->sh;
-+#if HEVC_CTRLS_VERSION >= 2
-+    const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb;
-+    const unsigned int dpb_n = dec->num_active_dpb_entries;
-+#else
-+    struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb;
-+    unsigned int dpb_n;
-+#endif
-+    unsigned int i;
-+    RefPicList *rpl;
-+
-+    *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
-+        .bit_size = bit_size,
-+        .data_bit_offset = bit_offset,
-+
-+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
-+        .slice_segment_addr = sh->slice_segment_addr,
-+
-+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
-+        .nal_unit_type = h->nal_unit_type,
-+        .nuh_temporal_id_plus1 = h->temporal_id + 1,
-+
-+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
-+        .slice_type = sh->slice_type,
-+        .colour_plane_id = sh->colour_plane_id,
-+        .slice_pic_order_cnt = h->ref->poc,
-+        .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0,
-+        .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0,
-+        .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0,
-+        .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand,
-+        .slice_qp_delta = sh->slice_qp_delta,
-+        .slice_cb_qp_offset = sh->slice_cb_qp_offset,
-+        .slice_cr_qp_offset = sh->slice_cr_qp_offset,
-+        .slice_act_y_qp_offset = 0,
-+        .slice_act_cb_qp_offset = 0,
-+        .slice_act_cr_qp_offset = 0,
-+        .slice_beta_offset_div2 = sh->beta_offset / 2,
-+        .slice_tc_offset_div2 = sh->tc_offset / 2,
-+
-+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
-+        .pic_struct = h->sei.picture_timing.picture_struct,
-+
-+#if HEVC_CTRLS_VERSION < 2
-+        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
-+        .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
-+        .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
-+        .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs,
-+#endif
-+    };
-+
-+    if (sh->slice_sample_adaptive_offset_flag[0])
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
-+
-+    if (sh->slice_sample_adaptive_offset_flag[1])
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
-+
-+    if (sh->slice_temporal_mvp_enabled_flag)
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
-+
-+    if (sh->mvd_l1_zero_flag)
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
-+
-+    if (sh->cabac_init_flag)
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
-+
-+    if (sh->collocated_list == L0)
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
-+
-+    if (sh->disable_deblocking_filter_flag)
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
-+
-+    if (sh->slice_loop_filter_across_slices_enabled_flag)
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
-+
-+    if (sh->dependent_slice_segment_flag)
-+        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
-+
-+#if HEVC_CTRLS_VERSION < 2
-+    dpb_n = fill_dpb_entries(h, dpb);
-+    slice_params->num_active_dpb_entries = dpb_n;
-+#endif
-+
-+    if (sh->slice_type != HEVC_SLICE_I) {
-+        rpl = &h->ref->refPicList[0];
-+        for (i = 0; i < rpl->nb_refs; i++)
-+            slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
-+    }
-+
-+    if (sh->slice_type == HEVC_SLICE_B) {
-+        rpl = &h->ref->refPicList[1];
-+        for (i = 0; i < rpl->nb_refs; i++)
-+            slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
-+    }
-+
-+    fill_pred_table(h, &slice_params->pred_weight_table);
-+
-+    slice_params->num_entry_point_offsets = sh->num_entry_point_offsets;
-+    if (slice_params->num_entry_point_offsets > 256) {
-+        slice_params->num_entry_point_offsets = 256;
-+        av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
-+    }
-+
-+    for (i = 0; i < slice_params->num_entry_point_offsets; i++)
-+        slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
-+}
-+
-+#if HEVC_CTRLS_VERSION >= 2
-+static void
-+fill_decode_params(const HEVCContext * const h,
-+                   struct v4l2_ctrl_hevc_decode_params * const dec)
-+{
-+    unsigned int i;
-+
-+    *dec = (struct v4l2_ctrl_hevc_decode_params){
-+        .pic_order_cnt_val = h->poc,
-+        .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
-+        .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
-+        .num_poc_lt_curr = h->rps[LT_CURR].nb_refs,
-+    };
-+
-+    dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb);
-+
-+    // The docn does seem to ask that we fit our 32 bit signed POC into
-+    // a U8 so... (To be fair 16 bits would be enough)
-+    // Luckily we (Pi) don't use these fields
-+    for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i)
-+        dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc;
-+    for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i)
-+        dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc;
-+    for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i)
-+        dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc;
-+
-+    if (IS_IRAP(h))
-+        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
-+    if (IS_IDR(h))
-+        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
-+    if (h->sh.no_output_of_prior_pics_flag)
-+        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR;
-+
-+}
-+#endif
-+
-+static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps)
-+{
-+    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
-+    *ctrl = (struct v4l2_ctrl_hevc_sps) {
-+        .chroma_format_idc = sps->chroma_format_idc,
-+        .pic_width_in_luma_samples = sps->width,
-+        .pic_height_in_luma_samples = sps->height,
-+        .bit_depth_luma_minus8 = sps->bit_depth - 8,
-+        .bit_depth_chroma_minus8 = sps->bit_depth - 8,
-+        .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
-+        .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1,
-+        .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
-+        .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1,
-+        .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
-+        .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
-+        .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
-+        .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size,
-+        .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
-+        .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
-+        .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
-+        .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
-+        .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
-+        .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
-+        .num_short_term_ref_pic_sets = sps->nb_st_rps,
-+        .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
-+        .chroma_format_idc = sps->chroma_format_idc,
-+        .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
-+    };
-+
-+    if (sps->separate_colour_plane_flag)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
-+
-+    if (sps->scaling_list_enable_flag)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
-+
-+    if (sps->amp_enabled_flag)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
-+
-+    if (sps->sao_enabled)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
-+
-+    if (sps->pcm_enabled_flag)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
-+
-+    if (sps->pcm.loop_filter_disable_flag)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
-+
-+    if (sps->long_term_ref_pics_present_flag)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
-+
-+    if (sps->sps_temporal_mvp_enabled_flag)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
-+
-+    if (sps->sps_strong_intra_smoothing_enable_flag)
-+        ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
-+}
-+
-+static void fill_scaling_matrix(const ScalingList * const sl,
-+                                struct v4l2_ctrl_hevc_scaling_matrix * const sm)
-+{
-+    unsigned int i;
-+
-+    for (i = 0; i < 6; i++) {
-+        unsigned int j;
-+
-+        for (j = 0; j < 16; j++)
-+            sm->scaling_list_4x4[i][j] = sl->sl[0][i][j];
-+        for (j = 0; j < 64; j++) {
-+            sm->scaling_list_8x8[i][j]   = sl->sl[1][i][j];
-+            sm->scaling_list_16x16[i][j] = sl->sl[2][i][j];
-+            if (i < 2)
-+                sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j];
-+        }
-+        sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i];
-+        if (i < 2)
-+            sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3];
-+    }
-+}
-+
-+static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps)
-+{
-+    uint64_t flags = 0;
-+
-+    if (pps->dependent_slice_segments_enabled_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
-+
-+    if (pps->output_flag_present_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
-+
-+    if (pps->sign_data_hiding_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
-+
-+    if (pps->cabac_init_present_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
-+
-+    if (pps->constrained_intra_pred_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
-+
-+    if (pps->transform_skip_enabled_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
-+
-+    if (pps->cu_qp_delta_enabled_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
-+
-+    if (pps->pic_slice_level_chroma_qp_offsets_present_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
-+
-+    if (pps->weighted_pred_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
-+
-+    if (pps->weighted_bipred_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
-+
-+    if (pps->transquant_bypass_enable_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
-+
-+    if (pps->tiles_enabled_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
-+
-+    if (pps->entropy_coding_sync_enabled_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
-+
-+    if (pps->loop_filter_across_tiles_enabled_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
-+
-+    if (pps->seq_loop_filter_across_slices_enabled_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
-+
-+    if (pps->deblocking_filter_override_enabled_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
-+
-+    if (pps->disable_dbf)
-+        flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
-+
-+    if (pps->lists_modification_present_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
-+
-+    if (pps->slice_header_extension_present_flag)
-+        flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT;
-+
-+    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
-+    *ctrl = (struct v4l2_ctrl_hevc_pps) {
-+        .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
-+        .init_qp_minus26 = pps->pic_init_qp_minus26,
-+        .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
-+        .pps_cb_qp_offset = pps->cb_qp_offset,
-+        .pps_cr_qp_offset = pps->cr_qp_offset,
-+        .pps_beta_offset_div2 = pps->beta_offset / 2,
-+        .pps_tc_offset_div2 = pps->tc_offset / 2,
-+        .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
-+        .flags = flags
-+    };
-+
-+
-+    if (pps->tiles_enabled_flag) {
-+        ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1;
-+        ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1;
-+
-+        for (int i = 0; i < pps->num_tile_columns; i++)
-+            ctrl->column_width_minus1[i] = pps->column_width[i] - 1;
-+
-+        for (int i = 0; i < pps->num_tile_rows; i++)
-+            ctrl->row_height_minus1[i] = pps->row_height[i] - 1;
-+    }
-+}
-+
-+// Called before finally returning the frame to the user
-+// Set corrupt flag here as this is actually the frame structure that
-+// is going to the user (in MT land each thread has its own pool)
-+static int frame_post_process(void *logctx, AVFrame *frame)
-+{
-+    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0];
-+
-+//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
-+    frame->flags &= ~AV_FRAME_FLAG_CORRUPT;
-+    if (rd->qe_dst) {
-+        MediaBufsStatus stat = qent_dst_wait(rd->qe_dst);
-+        if (stat != MEDIABUFS_STATUS_SUCCESS) {
-+            av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__);
-+            frame->flags |= AV_FRAME_FLAG_CORRUPT;
-+        }
-+    }
-+
-+    return 0;
-+}
-+
-+static inline struct timeval cvt_dpb_to_tv(uint64_t t)
-+{
-+    t /= 1000;
-+    return (struct timeval){
-+        .tv_usec = t % 1000000,
-+        .tv_sec = t / 1000000
-+    };
-+}
-+
-+static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t)
-+{
-+    return (uint64_t)t * 1000;
-+}
-+
-+static int v4l2_request_hevc_start_frame(AVCodecContext *avctx,
-+                                         av_unused const uint8_t *buffer,
-+                                         av_unused uint32_t size)
-+{
-+    const HEVCContext *h = avctx->priv_data;
-+    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+
-+//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
-+    decode_q_add(&ctx->decode_q, &rd->decode_ent);
-+
-+    rd->num_slices = 0;
-+    ctx->timestamp++;
-+    rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp);
-+
-+    {
-+        FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data;
-+        fdd->post_process = frame_post_process;
-+    }
-+
-+    // qe_dst needs to be bound to the data buffer and only returned when that is
-+    if (!rd->qe_dst)
-+    {
-+        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
-+            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
-+            return AVERROR(ENOMEM);
-+        }
-+    }
-+
-+    ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame
-+
-+    return 0;
-+}
-+
-+// Object fd & size will be zapped by this & need setting later
-+static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format)
-+{
-+    AVDRMLayerDescriptor *layer = &desc->layers[0];
-+    unsigned int width;
-+    unsigned int height;
-+    unsigned int bpl;
-+    uint32_t pixelformat;
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
-+        width       = format->fmt.pix_mp.width;
-+        height      = format->fmt.pix_mp.height;
-+        pixelformat = format->fmt.pix_mp.pixelformat;
-+        bpl         = format->fmt.pix_mp.plane_fmt[0].bytesperline;
-+    }
-+    else {
-+        width       = format->fmt.pix.width;
-+        height      = format->fmt.pix.height;
-+        pixelformat = format->fmt.pix.pixelformat;
-+        bpl         = format->fmt.pix.bytesperline;
-+    }
-+
-+    switch (pixelformat) {
-+    case V4L2_PIX_FMT_NV12:
-+        layer->format = DRM_FORMAT_NV12;
-+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        break;
-+#if CONFIG_SAND
-+    case V4L2_PIX_FMT_NV12_COL128:
-+        layer->format = DRM_FORMAT_NV12;
-+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
-+        break;
-+    case V4L2_PIX_FMT_NV12_10_COL128:
-+        layer->format = DRM_FORMAT_P030;
-+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
-+        break;
-+#endif
-+#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED
-+    case V4L2_PIX_FMT_SUNXI_TILED_NV12:
-+        layer->format = DRM_FORMAT_NV12;
-+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED;
-+        break;
-+#endif
-+#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15)
-+    case V4L2_PIX_FMT_NV15:
-+        layer->format = DRM_FORMAT_NV15;
-+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        break;
-+#endif
-+    case V4L2_PIX_FMT_NV16:
-+        layer->format = DRM_FORMAT_NV16;
-+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        break;
-+#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20)
-+    case V4L2_PIX_FMT_NV20:
-+        layer->format = DRM_FORMAT_NV20;
-+        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        break;
-+#endif
-+    default:
-+        return -1;
-+    }
-+
-+    desc->nb_objects = 1;
-+    desc->objects[0].fd = -1;
-+    desc->objects[0].size = 0;
-+
-+    desc->nb_layers = 1;
-+    layer->nb_planes = 2;
-+
-+    layer->planes[0].object_index = 0;
-+    layer->planes[0].offset = 0;
-+    layer->planes[0].pitch = bpl;
-+#if CONFIG_SAND
-+    if (pixelformat == V4L2_PIX_FMT_NV12_COL128) {
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = height * 128;
-+        layer->planes[0].pitch = width;
-+        layer->planes[1].pitch = width;
-+    }
-+    else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = height * 128;
-+        layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy
-+        layer->planes[1].pitch = width * 2;
-+    }
-+    else
-+#endif
-+    {
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = layer->planes[0].pitch * height;
-+        layer->planes[1].pitch = layer->planes[0].pitch;
-+    }
-+
-+    return 0;
-+}
-+
-+static int
-+set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
-+    struct req_controls *const controls,
-+#if HEVC_CTRLS_VERSION >= 2
-+    struct v4l2_ctrl_hevc_decode_params * const dec,
-+#endif
-+    struct v4l2_ctrl_hevc_slice_params * const slices,
-+    const unsigned int slice_no,
-+    const unsigned int slice_count)
-+{
-+    int rv;
-+
-+    struct v4l2_ext_control control[] = {
-+        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
-+            .ptr = &controls->sps,
-+            .size = sizeof(controls->sps),
-+        },
-+        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
-+            .ptr = &controls->pps,
-+            .size = sizeof(controls->pps),
-+        },
-+#if HEVC_CTRLS_VERSION >= 2
-+        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS,
-+            .ptr = dec,
-+            .size = sizeof(*dec),
-+        },
-+#endif
-+        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
-+            .ptr = slices + slice_no,
-+            .size = sizeof(*slices) * slice_count,
-+        },
-+        // Optional
-+        {
-+            .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX,
-+            .ptr = &controls->scaling_matrix,
-+            .size = sizeof(controls->scaling_matrix),
-+        },
-+    };
-+
-+    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control,
-+            controls->has_scaling ?
-+                FF_ARRAY_ELEMS(control) :
-+                FF_ARRAY_ELEMS(control) - 1);
-+
-+    return rv;
-+}
-+
-+static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
-+{
-+    const HEVCContext * const h = avctx->priv_data;
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
-+    int bcount = get_bits_count(&h->HEVClc->gb);
-+    uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount;
-+
-+    int rv;
-+    struct slice_info * si;
-+
-+    if ((rv = slice_add(rd)) != 0)
-+        return rv;
-+
-+    si = rd->slices + rd->num_slices - 1;
-+    si->ptr = buffer;
-+    si->len = size;
-+
-+    if (ctx->multi_slice && rd->num_slices > 1) {
-+        struct slice_info *const si0 = rd->slices;
-+        const size_t offset = (buffer - si0->ptr);
-+        boff += offset * 8;
-+        size += offset;
-+        si0->len = si->len + offset;
-+    }
-+
-+#if HEVC_CTRLS_VERSION >= 2
-+    if (rd->num_slices == 1)
-+        fill_decode_params(h, &rd->dec);
-+    fill_slice_params(h, &rd->dec, rd->slice_params + rd->num_slices - 1, size * 8, boff);
-+#else
-+    fill_slice_params(h, rd->slice_params + rd->num_slices - 1, size * 8, boff);
-+#endif
-+
-+    return 0;
-+}
-+
-+static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx)
-+{
-+    const HEVCContext * const h = avctx->priv_data;
-+    if (h->ref != NULL) {
-+        V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
-+        V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+
-+        media_request_abort(&rd->req);
-+        mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src);
-+
-+        decode_q_remove(&ctx->decode_q, &rd->decode_ent);
-+    }
-+}
-+
-+static int send_slice(AVCodecContext * const avctx,
-+                      V4L2MediaReqDescriptor * const rd,
-+                      struct req_controls *const controls,
-+                      const unsigned int i, const unsigned int j)
-+{
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+
-+    struct slice_info *const si = rd->slices + i;
-+    struct media_request * req = NULL;
-+    struct qent_src * src = NULL;
-+    MediaBufsStatus stat;
-+
-+    if ((req = media_request_get(ctx->mpool)) == NULL) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__);
-+        return AVERROR(ENOMEM);
-+    }
-+
-+    if (set_req_ctls(ctx, req,
-+                     controls,
-+#if HEVC_CTRLS_VERSION >= 2
-+                     &rd->dec,
-+#endif
-+                     rd->slice_params,
-+                     i, j - i)) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__);
-+        goto fail1;
-+    }
-+
-+    if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__);
-+        goto fail1;
-+    }
-+
-+    if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__);
-+        goto fail2;
-+    }
-+
-+    if (qent_src_params_set(src, &controls->tv)) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__);
-+        goto fail2;
-+    }
-+
-+#warning ANNEX_B start code
-+//        if (ctx->start_code == V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) {
-+//        }
-+
-+    stat = mediabufs_start_request(ctx->mbufs, &req, &src,
-+                                   i == 0 ? rd->qe_dst : NULL,
-+                                   j == rd->num_slices);
-+
-+    if (stat != MEDIABUFS_STATUS_SUCCESS) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__);
-+        return AVERROR_UNKNOWN;
-+    }
-+    return 0;
-+
-+fail2:
-+    mediabufs_src_qent_abort(ctx->mbufs, &src);
-+fail1:
-+    media_request_abort(&req);
-+    return AVERROR_UNKNOWN;
-+}
-+
-+static int v4l2_request_hevc_end_frame(AVCodecContext *avctx)
-+{
-+    const HEVCContext * const h = avctx->priv_data;
-+    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
-+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
-+    struct req_controls rc;
-+    unsigned int i;
-+    int rv;
-+
-+    // It is possible, though maybe a bug, to get an end_frame without
-+    // a previous start_frame.  If we do then give up.
-+    if (!decode_q_in_q(&rd->decode_ent)) {
-+        av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__);
-+        return AVERROR_INVALIDDATA;
-+    }
-+
-+    {
-+        const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ?
-+                                    &h->ps.pps->scaling_list :
-+                                h->ps.sps->scaling_list_enable_flag ?
-+                                    &h->ps.sps->scaling_list : NULL;
-+
-+
-+        memset(&rc, 0, sizeof(rc));
-+        rc.tv = cvt_dpb_to_tv(rd->timestamp);
-+        fill_sps(&rc.sps, h->ps.sps);
-+        fill_pps(&rc.pps, h->ps.pps);
-+        if (sl) {
-+            rc.has_scaling = 1;
-+            fill_scaling_matrix(sl, &rc.scaling_matrix);
-+        }
-+    }
-+
-+    decode_q_wait(&ctx->decode_q, &rd->decode_ent);
-+
-+    // qe_dst needs to be bound to the data buffer and only returned when that is
-+    // Alloc almost certainly wants to be serialised if there is any chance of blocking
-+    // so we get the next frame to be free in the thread that needs it for decode first.
-+    //
-+    // In our current world this probably isn't a concern but put it here anyway
-+    if (!rd->qe_dst)
-+    {
-+        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
-+            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
-+            rv = AVERROR(ENOMEM);
-+            goto fail;
-+        }
-+    }
-+
-+    // Send as slices
-+    if (ctx->multi_slice)
-+    {
-+        if ((rv = send_slice(avctx, rd, &rc, 0, rd->num_slices)) != 0)
-+            goto fail;
-+    }
-+    else
-+    {
-+        for (i = 0; i != rd->num_slices; ++i) {
-+            if ((rv = send_slice(avctx, rd, &rc, i, i + 1)) != 0)
-+                goto fail;
-+        }
-+    }
-+
-+    // Set the drm_prime desriptor
-+    drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs));
-+    rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0));
-+    rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0));
-+
-+    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
-+    return 0;
-+
-+fail:
-+    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
-+    return rv;
-+}
-+
-+// Initial check & init
-+static int
-+probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-+{
-+    const HEVCContext *h = avctx->priv_data;
-+    const HEVCSPS * const sps = h->ps.sps;
-+    struct v4l2_ctrl_hevc_sps ctrl_sps;
-+    unsigned int i;
-+
-+    // Check for var slice array
-+    struct v4l2_query_ext_ctrl qc[] = {
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX },
-+#if HEVC_CTRLS_VERSION >= 2
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS },
-+#endif
-+    };
-+    // Order & size must match!
-+    static const size_t ctrl_sizes[] = {
-+        sizeof(struct v4l2_ctrl_hevc_slice_params),
-+        sizeof(struct v4l2_ctrl_hevc_sps),
-+        sizeof(struct v4l2_ctrl_hevc_pps),
-+        sizeof(struct v4l2_ctrl_hevc_scaling_matrix),
-+#if HEVC_CTRLS_VERSION >= 2
-+        sizeof(struct v4l2_ctrl_hevc_decode_params),
-+#endif
-+    };
-+    const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc);
-+
-+    if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) {
-+        av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION);
-+        return AVERROR(EINVAL);
-+    }
-+    for (i = 0; i != noof_ctrls; ++i) {
-+        if (ctrl_sizes[i] != (size_t)qc[i].elem_size) {
-+            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n",
-+                   HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size);
-+            return AVERROR(EINVAL);
-+        }
-+    }
-+
-+    fill_sps(&ctrl_sps, sps);
-+
-+    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_MPEG_VIDEO_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n");
-+        return AVERROR(EINVAL);
-+    }
-+
-+    ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0;
-+    return 0;
-+}
-+
-+// Final init
-+static int
-+set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-+{
-+    int ret;
-+
-+    struct v4l2_query_ext_ctrl querys[] = {
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, },
-+    };
-+
-+    struct v4l2_ext_control ctrls[] = {
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, },
-+        { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, },
-+    };
-+
-+    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
-+
-+    ctx->decode_mode = querys[0].default_value;
-+
-+    if (ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED &&
-+        ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode);
-+        return AVERROR(EINVAL);
-+    }
-+
-+    ctx->start_code = querys[1].default_value;
-+    if (ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE &&
-+        ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code);
-+        return AVERROR(EINVAL);
-+    }
-+
-+    ctx->max_slices = querys[2].elems;
-+    if (ctx->max_slices > MAX_SLICES) {
-+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported max slices, %d\n", __func__, ctx->max_slices);
-+        return AVERROR(EINVAL);
-+    }
-+
-+    ctrls[0].value = ctx->decode_mode;
-+    ctrls[1].value = ctx->start_code;
-+
-+    ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls));
-+    return !ret ? 0 : AVERROR(-ret);
-+}
-+
-+static void v4l2_req_frame_free(void *opaque, uint8_t *data)
-+{
-+    AVCodecContext *avctx = opaque;
-+    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data;
-+
-+    av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data);
-+
-+    qent_dst_unref(&rd->qe_dst);
-+
-+    // We don't expect req or qe_src to be set
-+    if (rd->req || rd->qe_src)
-+        av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src);
-+
-+    av_freep(&rd->slices);
-+    av_freep(&rd->slice_params);
-+
-+    av_free(rd);
-+}
-+
-+static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size)
-+{
-+    AVCodecContext *avctx = opaque;
-+//    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
-+//    V4L2MediaReqDescriptor *req;
-+    AVBufferRef *ref;
-+    uint8_t *data;
-+//    int ret;
-+
-+    data = av_mallocz(size);
-+    if (!data)
-+        return NULL;
-+
-+    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data);
-+    ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0);
-+    if (!ref) {
-+        av_freep(&data);
-+        return NULL;
-+    }
-+    return ref;
-+}
-+
-+#if 0
-+static void v4l2_req_pool_free(void *opaque)
-+{
-+    av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque);
-+}
-+
-+static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc)
-+{
-+    av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool);
-+
-+    av_buffer_pool_uninit(&hwfc->pool);
-+}
-+#endif
-+
-+static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
-+{
-+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
-+    AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data;
-+    const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs);
-+
-+    hwfc->format = AV_PIX_FMT_DRM_PRIME;
-+    hwfc->sw_format = pixel_format_from_format(vfmt);
-+    if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) {
-+        hwfc->width = vfmt->fmt.pix_mp.width;
-+        hwfc->height = vfmt->fmt.pix_mp.height;
-+    } else {
-+        hwfc->width = vfmt->fmt.pix.width;
-+        hwfc->height = vfmt->fmt.pix.height;
-+    }
-+#if 0
-+    hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free);
-+    if (!hwfc->pool)
-+        return AVERROR(ENOMEM);
-+
-+    hwfc->free = v4l2_req_hwframe_ctx_free;
-+
-+    hwfc->initial_pool_size = 1;
-+
-+    switch (avctx->codec_id) {
-+    case AV_CODEC_ID_VP9:
-+        hwfc->initial_pool_size += 8;
-+        break;
-+    case AV_CODEC_ID_VP8:
-+        hwfc->initial_pool_size += 3;
-+        break;
-+    default:
-+        hwfc->initial_pool_size += 2;
-+    }
-+#endif
-+    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size);
-+
-+    return 0;
-+}
-+
-+static int alloc_frame(AVCodecContext * avctx, AVFrame *frame)
-+{
-+    int rv;
-+
-+    frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor));
-+    if (!frame->buf[0])
-+        return AVERROR(ENOMEM);
-+
-+    frame->data[0] = frame->buf[0]->data;
-+
-+    frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
-+
-+    if ((rv = ff_attach_decode_data(frame)) != 0) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n");
-+        av_frame_unref(frame);
-+        return rv;
-+    }
-+
-+    return 0;
-+}
-+
-+const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = {
-+    .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE,
-+    .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION),
-+    .probe = probe,
-+    .set_controls = set_controls,
-+
-+    .start_frame    = v4l2_request_hevc_start_frame,
-+    .decode_slice   = v4l2_request_hevc_decode_slice,
-+    .end_frame      = v4l2_request_hevc_end_frame,
-+    .abort_frame    = v4l2_request_hevc_abort_frame,
-+    .frame_params   = frame_params,
-+    .alloc_frame    = alloc_frame,
-+};
-+
-diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c
-new file mode 100644
-index 000000000000..eb00ecb40623
---- /dev/null
-+++ b/libavcodec/v4l2_req_media.c
-@@ -0,0 +1,1596 @@
-+/*
-+ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
-+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#include <errno.h>
-+#include <fcntl.h>
-+#include <poll.h>
-+#include <pthread.h>
-+#include <semaphore.h>
-+#include <stdatomic.h>
-+#include <stdbool.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <unistd.h>
-+#include <linux/media.h>
-+#include <sys/ioctl.h>
-+#include <sys/select.h>
-+#include <sys/ioctl.h>
-+
-+#include <linux/videodev2.h>
-+
-+#include "v4l2_req_dmabufs.h"
-+#include "v4l2_req_media.h"
-+#include "v4l2_req_pollqueue.h"
-+#include "v4l2_req_utils.h"
-+#include "weak_link.h"
-+
-+
-+/* floor(log2(x)) */
-+static unsigned int log2_size(size_t x)
-+{
-+    unsigned int n = 0;
-+
-+    if (x & ~0xffff) {
-+        n += 16;
-+        x >>= 16;
-+    }
-+    if (x & ~0xff) {
-+        n += 8;
-+        x >>= 8;
-+    }
-+    if (x & ~0xf) {
-+        n += 4;
-+        x >>= 4;
-+    }
-+    if (x & ~3) {
-+        n += 2;
-+        x >>= 2;
-+    }
-+    return (x & ~1) ? n + 1 : n;
-+}
-+
-+static size_t round_up_size(const size_t x)
-+{
-+    /* Admit no size < 256 */
-+    const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
-+
-+    return x >= (3 << n) ? 4 << n : (3 << n);
-+}
-+
-+struct media_request;
-+
-+struct media_pool {
-+    int fd;
-+    sem_t sem;
-+    pthread_mutex_t lock;
-+    struct media_request * free_reqs;
-+    struct pollqueue * pq;
-+};
-+
-+struct media_request {
-+    struct media_request * next;
-+    struct media_pool * mp;
-+    int fd;
-+    struct polltask * pt;
-+};
-+
-+
-+static inline int do_trywait(sem_t *const sem)
-+{
-+    while (sem_trywait(sem)) {
-+        if (errno != EINTR)
-+            return -errno;
-+    }
-+    return 0;
-+}
-+
-+static inline int do_wait(sem_t *const sem)
-+{
-+    while (sem_wait(sem)) {
-+        if (errno != EINTR)
-+            return -errno;
-+    }
-+    return 0;
-+}
-+
-+static int request_buffers(int video_fd, unsigned int type,
-+                           enum v4l2_memory memory, unsigned int buffers_count)
-+{
-+    struct v4l2_requestbuffers buffers;
-+    int rc;
-+
-+    memset(&buffers, 0, sizeof(buffers));
-+    buffers.type = type;
-+    buffers.memory = memory;
-+    buffers.count = buffers_count;
-+
-+    rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers);
-+    if (rc < 0) {
-+        rc = -errno;
-+        request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc));
-+        return rc;
-+    }
-+
-+    return 0;
-+}
-+
-+
-+static int set_stream(int video_fd, unsigned int type, bool enable)
-+{
-+    enum v4l2_buf_type buf_type = type;
-+    int rc;
-+
-+    rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF,
-+           &buf_type);
-+    if (rc < 0) {
-+        rc = -errno;
-+        request_log("Unable to %sable stream: %s\n",
-+                enable ? "en" : "dis", strerror(-rc));
-+        return rc;
-+    }
-+
-+    return 0;
-+}
-+
-+
-+
-+struct media_request * media_request_get(struct media_pool * const mp)
-+{
-+    struct media_request *req = NULL;
-+
-+    /* Timeout handled by poll code */
-+    if (do_wait(&mp->sem))
-+        return NULL;
-+
-+    pthread_mutex_lock(&mp->lock);
-+    req = mp->free_reqs;
-+    if (req) {
-+        mp->free_reqs = req->next;
-+        req->next = NULL;
-+    }
-+    pthread_mutex_unlock(&mp->lock);
-+    return req;
-+}
-+
-+int media_request_fd(const struct media_request * const req)
-+{
-+    return req->fd;
-+}
-+
-+int media_request_start(struct media_request * const req)
-+{
-+    while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1)
-+    {
-+        const int err = errno;
-+        if (err == EINTR)
-+            continue;
-+        request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err));
-+        return -err;
-+    }
-+
-+    pollqueue_add_task(req->pt, 2000);
-+    return 0;
-+}
-+
-+static void media_request_done(void *v, short revents)
-+{
-+    struct media_request *const req = v;
-+    struct media_pool *const mp = req->mp;
-+
-+    /* ** Not sure what to do about timeout */
-+
-+    if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0)
-+        request_log("Unable to reinit media request: %s\n",
-+                strerror(errno));
-+
-+    pthread_mutex_lock(&mp->lock);
-+    req->next = mp->free_reqs;
-+    mp->free_reqs = req;
-+    pthread_mutex_unlock(&mp->lock);
-+    sem_post(&mp->sem);
-+}
-+
-+int media_request_abort(struct media_request ** const preq)
-+{
-+    struct media_request * const req = *preq;
-+
-+    if (req == NULL)
-+        return 0;
-+    *preq = NULL;
-+
-+    media_request_done(req, 0);
-+    return 0;
-+}
-+
-+static void delete_req_chain(struct media_request * const chain)
-+{
-+    struct media_request * next = chain;
-+    while (next) {
-+        struct media_request * const req = next;
-+        next = req->next;
-+        if (req->pt)
-+            polltask_delete(&req->pt);
-+        if (req->fd != -1)
-+            close(req->fd);
-+        free(req);
-+    }
-+}
-+
-+struct media_pool * media_pool_new(const char * const media_path,
-+                   struct pollqueue * const pq,
-+                   const unsigned int n)
-+{
-+    struct media_pool * const mp = calloc(1, sizeof(*mp));
-+    unsigned int i;
-+
-+    if (!mp)
-+        goto fail0;
-+
-+    mp->pq = pq;
-+    pthread_mutex_init(&mp->lock, NULL);
-+    mp->fd = open(media_path, O_RDWR | O_NONBLOCK);
-+    if (mp->fd == -1) {
-+        request_log("Failed to open '%s': %s\n", media_path, strerror(errno));
-+        goto fail1;
-+    }
-+
-+    for (i = 0; i != n; ++i) {
-+        struct media_request * req = malloc(sizeof(*req));
-+        if (!req)
-+            goto fail4;
-+
-+        *req = (struct media_request){
-+            .next = mp->free_reqs,
-+            .mp = mp,
-+            .fd = -1
-+        };
-+        mp->free_reqs = req;
-+
-+        if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) {
-+            request_log("Failed to alloc request %d: %s\n", i, strerror(errno));
-+            goto fail4;
-+        }
-+
-+        req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req);
-+        if (!req->pt)
-+            goto fail4;
-+    }
-+
-+    sem_init(&mp->sem, 0, n);
-+
-+    return mp;
-+
-+fail4:
-+    delete_req_chain(mp->free_reqs);
-+    close(mp->fd);
-+    pthread_mutex_destroy(&mp->lock);
-+fail1:
-+    free(mp);
-+fail0:
-+    return NULL;
-+}
-+
-+void media_pool_delete(struct media_pool ** pMp)
-+{
-+    struct media_pool * const mp = *pMp;
-+
-+    if (!mp)
-+        return;
-+    *pMp = NULL;
-+
-+    delete_req_chain(mp->free_reqs);
-+    close(mp->fd);
-+    sem_destroy(&mp->sem);
-+    pthread_mutex_destroy(&mp->lock);
-+    free(mp);
-+}
-+
-+
-+#define INDEX_UNSET (~(uint32_t)0)
-+
-+enum qent_status {
-+    QENT_NEW = 0,       // Initial state - shouldn't last
-+    QENT_FREE,          // On free chain
-+    QENT_PENDING,       // User has ent
-+    QENT_WAITING,       // On inuse
-+    QENT_DONE,          // Frame rx
-+    QENT_ERROR,         // Error
-+    QENT_IMPORT
-+};
-+
-+struct qent_base {
-+    atomic_int ref_count;
-+    struct qent_base *next;
-+    struct qent_base *prev;
-+    enum qent_status status;
-+    uint32_t index;
-+    struct dmabuf_h *dh[VIDEO_MAX_PLANES];
-+    struct timeval timestamp;
-+};
-+
-+struct qent_src {
-+    struct qent_base base;
-+    int fixed_size;
-+};
-+
-+struct qent_dst {
-+    struct qent_base base;
-+    bool waiting;
-+    pthread_mutex_t lock;
-+    pthread_cond_t cond;
-+    struct ff_weak_link_client * mbc_wl;
-+};
-+
-+struct qe_list_head {
-+    struct qent_base *head;
-+    struct qent_base *tail;
-+};
-+
-+struct buf_pool {
-+    pthread_mutex_t lock;
-+    sem_t free_sem;
-+    enum v4l2_buf_type buf_type;
-+    struct qe_list_head free;
-+    struct qe_list_head inuse;
-+};
-+
-+
-+static inline struct qent_dst *base_to_dst(struct qent_base *be)
-+{
-+    return (struct qent_dst *)be;
-+}
-+
-+static inline struct qent_src *base_to_src(struct qent_base *be)
-+{
-+    return (struct qent_src *)be;
-+}
-+
-+
-+#define QENT_BASE_INITIALIZER {\
-+    .ref_count = ATOMIC_VAR_INIT(0),\
-+    .status = QENT_NEW,\
-+    .index  = INDEX_UNSET\
-+}
-+
-+static void qe_base_uninit(struct qent_base *const be)
-+{
-+    unsigned int i;
-+    for (i = 0; i != VIDEO_MAX_PLANES; ++i) {
-+        dmabuf_free(be->dh[i]);
-+        be->dh[i] = NULL;
-+    }
-+}
-+
-+static void qe_src_free(struct qent_src *const be_src)
-+{
-+    if (!be_src)
-+        return;
-+    qe_base_uninit(&be_src->base);
-+    free(be_src);
-+}
-+
-+static struct qent_src * qe_src_new(void)
-+{
-+    struct qent_src *const be_src = malloc(sizeof(*be_src));
-+    if (!be_src)
-+        return NULL;
-+    *be_src = (struct qent_src){
-+        .base = QENT_BASE_INITIALIZER
-+    };
-+    return be_src;
-+}
-+
-+static void qe_dst_free(struct qent_dst *const be_dst)
-+{
-+    if (!be_dst)
-+        return;
-+
-+    ff_weak_link_unref(&be_dst->mbc_wl);
-+    pthread_cond_destroy(&be_dst->cond);
-+    pthread_mutex_destroy(&be_dst->lock);
-+    qe_base_uninit(&be_dst->base);
-+    free(be_dst);
-+}
-+
-+static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl)
-+{
-+    struct qent_dst *const be_dst = malloc(sizeof(*be_dst));
-+    if (!be_dst)
-+        return NULL;
-+    *be_dst = (struct qent_dst){
-+        .base = QENT_BASE_INITIALIZER,
-+        .lock = PTHREAD_MUTEX_INITIALIZER,
-+        .cond = PTHREAD_COND_INITIALIZER,
-+        .mbc_wl = ff_weak_link_ref(wl)
-+    };
-+    return be_dst;
-+}
-+
-+static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be)
-+{
-+    if (ql->tail)
-+        ql->tail->next = be;
-+    else
-+        ql->head = be;
-+    be->prev = ql->tail;
-+    be->next = NULL;
-+    ql->tail = be;
-+}
-+
-+static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be)
-+{
-+    if (!be)
-+        return NULL;
-+
-+    if (be->next)
-+        be->next->prev = be->prev;
-+    else
-+        ql->tail = be->prev;
-+    if (be->prev)
-+        be->prev->next = be->next;
-+    else
-+        ql->head = be->next;
-+    be->next = NULL;
-+    be->prev = NULL;
-+    return be;
-+}
-+
-+
-+static void bq_put_free(struct buf_pool *const bp, struct qent_base * be)
-+{
-+    ql_add_tail(&bp->free, be);
-+}
-+
-+static struct qent_base * bq_get_free(struct buf_pool *const bp)
-+{
-+    return ql_extract(&bp->free, bp->free.head);
-+}
-+
-+static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be)
-+{
-+    return ql_extract(&bp->inuse, be);
-+}
-+
-+static struct qent_base * bq_get_inuse(struct buf_pool *const bp)
-+{
-+    return ql_extract(&bp->inuse, bp->inuse.head);
-+}
-+
-+static void bq_free_all_free_src(struct buf_pool *const bp)
-+{
-+    struct qent_base *be;
-+    while ((be = bq_get_free(bp)) != NULL)
-+        qe_src_free(base_to_src(be));
-+}
-+
-+static void bq_free_all_inuse_src(struct buf_pool *const bp)
-+{
-+    struct qent_base *be;
-+    while ((be = bq_get_inuse(bp)) != NULL)
-+        qe_src_free(base_to_src(be));
-+}
-+
-+static void bq_free_all_free_dst(struct buf_pool *const bp)
-+{
-+    struct qent_base *be;
-+    while ((be = bq_get_free(bp)) != NULL)
-+        qe_dst_free(base_to_dst(be));
-+}
-+
-+static void queue_put_free(struct buf_pool *const bp, struct qent_base *be)
-+{
-+    unsigned int i;
-+
-+    pthread_mutex_lock(&bp->lock);
-+    /* Clear out state vars */
-+    be->timestamp.tv_sec = 0;
-+    be->timestamp.tv_usec = 0;
-+    be->status = QENT_FREE;
-+    for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i)
-+        dmabuf_len_set(be->dh[i], 0);
-+    bq_put_free(bp, be);
-+    pthread_mutex_unlock(&bp->lock);
-+    sem_post(&bp->free_sem);
-+}
-+
-+static bool queue_is_inuse(const struct buf_pool *const bp)
-+{
-+    return bp->inuse.tail != NULL;
-+}
-+
-+static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be)
-+{
-+    if (!be)
-+        return;
-+    pthread_mutex_lock(&bp->lock);
-+    ql_add_tail(&bp->inuse, be);
-+    be->status = QENT_WAITING;
-+    pthread_mutex_unlock(&bp->lock);
-+}
-+
-+static struct qent_base *queue_get_free(struct buf_pool *const bp)
-+{
-+    struct qent_base *buf;
-+
-+    if (do_wait(&bp->free_sem))
-+        return NULL;
-+    pthread_mutex_lock(&bp->lock);
-+    buf = bq_get_free(bp);
-+    pthread_mutex_unlock(&bp->lock);
-+    return buf;
-+}
-+
-+static struct qent_base *queue_tryget_free(struct buf_pool *const bp)
-+{
-+    struct qent_base *buf;
-+
-+    if (do_trywait(&bp->free_sem))
-+        return NULL;
-+    pthread_mutex_lock(&bp->lock);
-+    buf = bq_get_free(bp);
-+    pthread_mutex_unlock(&bp->lock);
-+    return buf;
-+}
-+
-+static struct qent_base * queue_find_extract_fd(struct buf_pool *const bp, const int fd)
-+{
-+    struct qent_base *be;
-+
-+    pthread_mutex_lock(&bp->lock);
-+    /* Expect 1st in Q, but allow anywhere */
-+    for (be = bp->inuse.head; be; be = be->next) {
-+        if (dmabuf_fd(be->dh[0]) == fd) {
-+            bq_extract_inuse(bp, be);
-+            break;
-+        }
-+    }
-+    pthread_mutex_unlock(&bp->lock);
-+
-+    return be;
-+}
-+
-+static void queue_delete(struct buf_pool *const bp)
-+{
-+    sem_destroy(&bp->free_sem);
-+    pthread_mutex_destroy(&bp->lock);
-+    free(bp);
-+}
-+
-+static struct buf_pool* queue_new(const int vfd)
-+{
-+    struct buf_pool *bp = calloc(1, sizeof(*bp));
-+    if (!bp)
-+        return NULL;
-+    pthread_mutex_init(&bp->lock, NULL);
-+    sem_init(&bp->free_sem, 0, 0);
-+    return bp;
-+}
-+
-+
-+struct mediabufs_ctl {
-+    atomic_int ref_count;  /* 0 is single ref for easier atomics */
-+    void * dc;
-+    int vfd;
-+    bool stream_on;
-+    bool polling;
-+    bool dst_fixed;             // Dst Q is fixed size
-+    pthread_mutex_t lock;
-+    struct buf_pool * src;
-+    struct buf_pool * dst;
-+    struct polltask * pt;
-+    struct pollqueue * pq;
-+    struct ff_weak_link_master * this_wlm;
-+
-+    struct v4l2_format src_fmt;
-+    struct v4l2_format dst_fmt;
-+};
-+
-+static int qe_v4l2_queue(struct qent_base *const be,
-+               const int vfd, struct media_request *const mreq,
-+               const struct v4l2_format *const fmt,
-+               const bool is_dst, const bool hold_flag)
-+{
-+    struct v4l2_buffer buffer = {
-+        .type = fmt->type,
-+        .memory = V4L2_MEMORY_DMABUF,
-+        .index = be->index
-+    };
-+    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        unsigned int i;
-+        for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) {
-+            if (is_dst)
-+                dmabuf_len_set(be->dh[i], 0);
-+
-+            /* *** Really need a pixdesc rather than a format so we can fill in data_offset */
-+            planes[i].length = dmabuf_size(be->dh[i]);
-+            planes[i].bytesused = dmabuf_len(be->dh[i]);
-+            planes[i].m.fd = dmabuf_fd(be->dh[i]);
-+        }
-+        buffer.m.planes = planes;
-+        buffer.length = i;
-+    }
-+    else {
-+        if (is_dst)
-+            dmabuf_len_set(be->dh[0], 0);
-+
-+        buffer.bytesused = dmabuf_len(be->dh[0]);
-+        buffer.length = dmabuf_size(be->dh[0]);
-+        buffer.m.fd = dmabuf_fd(be->dh[0]);
-+    }
-+
-+    if (!is_dst && mreq) {
-+        buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD;
-+        buffer.request_fd = media_request_fd(mreq);
-+        if (hold_flag)
-+            buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
-+    }
-+
-+    if (is_dst)
-+        be->timestamp = (struct timeval){0,0};
-+
-+    buffer.timestamp = be->timestamp;
-+
-+    while (ioctl(vfd, VIDIOC_QBUF, &buffer)) {
-+        const int err = errno;
-+        if (err != EINTR) {
-+            request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err));
-+            return -err;
-+        }
-+    }
-+    return 0;
-+}
-+
-+static struct qent_base * qe_dequeue(struct buf_pool *const bp,
-+                     const int vfd,
-+                     const struct v4l2_format * const f)
-+{
-+    int fd;
-+    struct qent_base *be;
-+    int rc;
-+    const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type);
-+    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
-+    struct v4l2_buffer buffer = {
-+        .type =  f->type,
-+        .memory = V4L2_MEMORY_DMABUF
-+    };
-+    if (mp) {
-+        buffer.length = f->fmt.pix_mp.num_planes;
-+        buffer.m.planes = planes;
-+    }
-+
-+    while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 &&
-+           errno == EINTR)
-+        /* Loop */;
-+    if (rc) {
-+        request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno));
-+        return NULL;
-+    }
-+
-+    fd = mp ? planes[0].m.fd : buffer.m.fd;
-+    be = queue_find_extract_fd(bp, fd);
-+    if (!be) {
-+        request_log("Failed to find fd %d in Q\n", fd);
-+        return NULL;
-+    }
-+
-+    be->timestamp = buffer.timestamp;
-+    be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE;
-+    return be;
-+}
-+
-+static void qe_dst_done(struct qent_dst * dst_be)
-+{
-+    pthread_mutex_lock(&dst_be->lock);
-+    dst_be->waiting = false;
-+    pthread_cond_broadcast(&dst_be->cond);
-+    pthread_mutex_unlock(&dst_be->lock);
-+
-+    qent_dst_unref(&dst_be);
-+}
-+
-+static bool qe_dst_waiting(struct qent_dst *const dst_be)
-+{
-+    bool waiting;
-+    pthread_mutex_lock(&dst_be->lock);
-+    waiting = dst_be->waiting;
-+    dst_be->waiting = true;
-+    pthread_mutex_unlock(&dst_be->lock);
-+    return waiting;
-+}
-+
-+
-+static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc)
-+{
-+    return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst);
-+}
-+
-+static void mediabufs_poll_cb(void * v, short revents)
-+{
-+    struct mediabufs_ctl *mbc = v;
-+    struct qent_src *src_be = NULL;
-+    struct qent_dst *dst_be = NULL;
-+
-+    if (!revents)
-+        request_err(mbc->dc, "%s: Timeout\n", __func__);
-+
-+    pthread_mutex_lock(&mbc->lock);
-+    mbc->polling = false;
-+
-+    if ((revents & POLLOUT) != 0)
-+        src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt));
-+    if ((revents & POLLIN) != 0)
-+        dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt));
-+
-+    /* Reschedule */
-+    if (mediabufs_wants_poll(mbc)) {
-+        mbc->polling = true;
-+        pollqueue_add_task(mbc->pt, 2000);
-+    }
-+    pthread_mutex_unlock(&mbc->lock);
-+
-+    if (src_be)
-+        queue_put_free(mbc->src, &src_be->base);
-+    if (dst_be)
-+        qe_dst_done(dst_be);
-+}
-+
-+int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp)
-+{
-+    struct qent_base *const be = &be_src->base;
-+
-+    be->timestamp = *timestamp;
-+    return 0;
-+}
-+
-+struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst)
-+{
-+    return be_dst->base.timestamp;
-+}
-+
-+static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc)
-+{
-+    if (!be->dh[0] || len > dmabuf_size(be->dh[0])) {
-+        size_t newsize = round_up_size(len);
-+        request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize);
-+        if (!dbsc) {
-+            request_log("%s: No dmbabuf_ctrl for realloc\n", __func__);
-+            return -ENOMEM;
-+        }
-+        if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) {
-+            request_log("%s: Realloc %zd failed\n", __func__, newsize);
-+            return -ENOMEM;
-+        }
-+    }
-+    return 0;
-+}
-+
-+int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc)
-+{
-+    struct qent_base *const be = &be_src->base;
-+    return qent_base_realloc(be, len, dbsc);
-+}
-+
-+
-+int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc)
-+{
-+    void * dst;
-+    struct qent_base *const be = &be_src->base;
-+    int rv;
-+
-+    // Realloc doesn't copy so don't alloc if offset != 0
-+    if ((rv = qent_base_realloc(be, offset + len,
-+                                be_src->fixed_size || offset ? NULL : dbsc)) != 0)
-+        return rv;
-+
-+    dmabuf_write_start(be->dh[0]);
-+    dst = dmabuf_map(be->dh[0]);
-+    if (!dst)
-+        return -1;
-+    memcpy((char*)dst + offset, src, len);
-+    dmabuf_len_set(be->dh[0], len);
-+    dmabuf_write_end(be->dh[0]);
-+    return 0;
-+}
-+
-+const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane)
-+{
-+    const struct qent_base *const be = &be_dst->base;
-+
-+    return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane];
-+}
-+
-+int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane)
-+{
-+    return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane)));
-+}
-+
-+MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
-+                struct media_request **const pmreq,
-+                struct qent_src **const psrc_be,
-+                struct qent_dst *const dst_be,
-+                const bool is_final)
-+{
-+    struct media_request * mreq = *pmreq;
-+    struct qent_src *const src_be = *psrc_be;
-+
-+    // Req & src are always both "consumed"
-+    *pmreq = NULL;
-+    *psrc_be = NULL;
-+
-+    pthread_mutex_lock(&mbc->lock);
-+
-+    if (!src_be)
-+        goto fail1;
-+
-+    if (dst_be) {
-+        if (qe_dst_waiting(dst_be)) {
-+            request_info(mbc->dc, "Request buffer already waiting on start\n");
-+            goto fail1;
-+        }
-+        dst_be->base.timestamp = (struct timeval){0,0};
-+        if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false))
-+            goto fail1;
-+
-+        qent_dst_ref(dst_be);
-+        queue_put_inuse(mbc->dst, &dst_be->base);
-+    }
-+
-+    if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final))
-+        goto fail1;
-+    queue_put_inuse(mbc->src, &src_be->base);
-+
-+    if (!mbc->polling && mediabufs_wants_poll(mbc)) {
-+        mbc->polling = true;
-+        pollqueue_add_task(mbc->pt, 2000);
-+    }
-+    pthread_mutex_unlock(&mbc->lock);
-+
-+    if (media_request_start(mreq))
-+        return MEDIABUFS_ERROR_OPERATION_FAILED;
-+
-+    return MEDIABUFS_STATUS_SUCCESS;
-+
-+fail1:
-+    media_request_abort(&mreq);
-+    if (src_be)
-+        queue_put_free(mbc->src, &src_be->base);
-+
-+// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q
-+    if (dst_be) {
-+        dst_be->base.status = QENT_ERROR;
-+        qe_dst_done(dst_be);
-+    }
-+    pthread_mutex_unlock(&mbc->lock);
-+    return MEDIABUFS_ERROR_OPERATION_FAILED;
-+}
-+
-+
-+static int qe_alloc_from_fmt(struct qent_base *const be,
-+                   struct dmabufs_ctl *const dbsc,
-+                   const struct v4l2_format *const fmt)
-+{
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        unsigned int i;
-+        for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) {
-+            be->dh[i] = dmabuf_realloc(dbsc, be->dh[i],
-+                fmt->fmt.pix_mp.plane_fmt[i].sizeimage);
-+            /* On failure tidy up and die */
-+            if (!be->dh[i]) {
-+                while (i--) {
-+                    dmabuf_free(be->dh[i]);
-+                    be->dh[i] = NULL;
-+                }
-+                return -1;
-+            }
-+        }
-+    }
-+    else {
-+//      be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage);
-+        size_t size = fmt->fmt.pix.sizeimage;
-+        be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size);
-+        if (!be->dh[0])
-+            return -1;
-+    }
-+    return 0;
-+}
-+
-+static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd,
-+            const enum v4l2_buf_type buftype,
-+            uint32_t pixfmt,
-+            const unsigned int width, const unsigned int height,
-+                               const size_t bufsize)
-+{
-+    *fmt = (struct v4l2_format){.type = buftype};
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
-+        fmt->fmt.pix_mp.width = width;
-+        fmt->fmt.pix_mp.height = height;
-+        fmt->fmt.pix_mp.pixelformat = pixfmt;
-+        if (bufsize) {
-+            fmt->fmt.pix_mp.num_planes = 1;
-+            fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize;
-+        }
-+    }
-+    else {
-+        fmt->fmt.pix.width = width;
-+        fmt->fmt.pix.height = height;
-+        fmt->fmt.pix.pixelformat = pixfmt;
-+        fmt->fmt.pix.sizeimage = bufsize;
-+    }
-+
-+    while (ioctl(fd, VIDIOC_S_FMT, fmt))
-+        if (errno != EINTR)
-+            return MEDIABUFS_ERROR_OPERATION_FAILED;
-+
-+    // Treat anything where we don't get at least what we asked for as a fail
-+    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
-+        if (fmt->fmt.pix_mp.width < width ||
-+            fmt->fmt.pix_mp.height < height ||
-+            fmt->fmt.pix_mp.pixelformat != pixfmt) {
-+            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
-+        }
-+    }
-+    else {
-+        if (fmt->fmt.pix.width < width ||
-+            fmt->fmt.pix.height < height ||
-+            fmt->fmt.pix.pixelformat != pixfmt) {
-+            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
-+        }
-+    }
-+
-+    return MEDIABUFS_STATUS_SUCCESS;
-+}
-+
-+static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt,
-+                   const int fd,
-+                   const unsigned int type_v4l2,
-+                   const uint32_t flags_must,
-+                   const uint32_t flags_not,
-+                   const unsigned int width,
-+                   const unsigned int height,
-+                   mediabufs_dst_fmt_accept_fn *const accept_fn,
-+                   void *const accept_v)
-+{
-+    unsigned int i;
-+
-+    for (i = 0;; ++i) {
-+        struct v4l2_fmtdesc fmtdesc = {
-+            .index = i,
-+            .type = type_v4l2
-+        };
-+        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
-+            if (errno != EINTR)
-+                return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
-+        }
-+        if ((fmtdesc.flags & flags_must) != flags_must ||
-+            (fmtdesc.flags & flags_not))
-+            continue;
-+        if (!accept_fn(accept_v, &fmtdesc))
-+            continue;
-+
-+        if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat,
-+                width, height, 0) == MEDIABUFS_STATUS_SUCCESS)
-+            return MEDIABUFS_STATUS_SUCCESS;
-+    }
-+    return 0;
-+}
-+
-+
-+/* Wait for qent done */
-+
-+MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst)
-+{
-+    struct qent_base *const be = &be_dst->base;
-+    enum qent_status estat;
-+
-+    pthread_mutex_lock(&be_dst->lock);
-+    while (be_dst->waiting &&
-+           !pthread_cond_wait(&be_dst->cond, &be_dst->lock))
-+        /* Loop */;
-+    estat = be->status;
-+    pthread_mutex_unlock(&be_dst->lock);
-+
-+    return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS :
-+        estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR :
-+            MEDIABUFS_ERROR_OPERATION_FAILED;
-+}
-+
-+const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no)
-+{
-+    struct qent_base *const be = &be_dst->base;
-+    return dmabuf_map(be->dh[buf_no]);
-+}
-+
-+MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst)
-+{
-+    struct qent_base *const be = &be_dst->base;
-+    unsigned int i;
-+    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
-+        if (dmabuf_read_start(be->dh[i])) {
-+            while (i--)
-+                dmabuf_read_end(be->dh[i]);
-+            return MEDIABUFS_ERROR_ALLOCATION_FAILED;
-+        }
-+    }
-+    return MEDIABUFS_STATUS_SUCCESS;
-+}
-+
-+MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst)
-+{
-+    struct qent_base *const be = &be_dst->base;
-+    unsigned int i;
-+    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
-+
-+    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
-+        if (dmabuf_read_end(be->dh[i]))
-+            status = MEDIABUFS_ERROR_OPERATION_FAILED;
-+    }
-+    return status;
-+}
-+
-+struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst)
-+{
-+    if (be_dst)
-+        atomic_fetch_add(&be_dst->base.ref_count, 1);
-+    return be_dst;
-+}
-+
-+void qent_dst_unref(struct qent_dst ** const pbe_dst)
-+{
-+    struct qent_dst * const be_dst = *pbe_dst;
-+    struct mediabufs_ctl * mbc;
-+    if (!be_dst)
-+        return;
-+    *pbe_dst = NULL;
-+
-+    if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0)
-+        return;
-+
-+    if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) {
-+        queue_put_free(mbc->dst, &be_dst->base);
-+        ff_weak_link_unlock(be_dst->mbc_wl);
-+    }
-+    else {
-+        qe_dst_free(be_dst);
-+    }
-+}
-+
-+MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
-+                unsigned int plane,
-+                int fd, size_t size)
-+{
-+    struct qent_base *const be = &be_dst->base;
-+    struct dmabuf_h * dh;
-+
-+    if (be->status != QENT_IMPORT || be->dh[plane])
-+        return MEDIABUFS_ERROR_OPERATION_FAILED;
-+
-+    dh = dmabuf_import(fd, size);
-+    if (!dh)
-+        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
-+
-+    be->dh[plane] = dh;
-+    return MEDIABUFS_STATUS_SUCCESS;
-+}
-+
-+// Returns noof buffers created, -ve for error
-+static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[])
-+{
-+    unsigned int i;
-+
-+    struct v4l2_create_buffers cbuf = {
-+        .count = n,
-+        .memory = V4L2_MEMORY_DMABUF,
-+        .format = mbc->dst_fmt,
-+    };
-+
-+    while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) {
-+        const int err = -errno;
-+        if (err != EINTR) {
-+            request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__);
-+            return -err;
-+        }
-+    }
-+
-+    if (cbuf.count != n)
-+        request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n);
-+
-+    for (i = 0; i != cbuf.count; ++i)
-+        qes[i]->base.index = cbuf.index + i;
-+
-+    return cbuf.count;
-+}
-+
-+struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc)
-+{
-+    struct qent_dst * be_dst;
-+
-+    if (mbc == NULL) {
-+        be_dst = qe_dst_new(NULL);
-+        if (be_dst)
-+            be_dst->base.status = QENT_IMPORT;
-+        return be_dst;
-+    }
-+
-+    if (mbc->dst_fixed) {
-+        be_dst = base_to_dst(queue_get_free(mbc->dst));
-+        if (!be_dst)
-+            return NULL;
-+    }
-+    else {
-+        be_dst = base_to_dst(queue_tryget_free(mbc->dst));
-+        if (!be_dst) {
-+            be_dst = qe_dst_new(mbc->this_wlm);
-+            if (!be_dst)
-+                return NULL;
-+
-+            if (create_dst_bufs(mbc, 1, &be_dst) != 1) {
-+                qe_dst_free(be_dst);
-+                return NULL;
-+            }
-+        }
-+    }
-+
-+    if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) {
-+        /* Given  how create buf works we can't uncreate it on alloc failure
-+         * all we can do is put it on the free Q
-+        */
-+        queue_put_free(mbc->dst, &be_dst->base);
-+        return NULL;
-+    }
-+
-+    be_dst->base.status = QENT_PENDING;
-+    atomic_store(&be_dst->base.ref_count, 0);
-+    return be_dst;
-+}
-+
-+const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc)
-+{
-+    return &mbc->dst_fmt;
-+}
-+
-+MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
-+               const unsigned int width,
-+               const unsigned int height,
-+               mediabufs_dst_fmt_accept_fn *const accept_fn,
-+               void *const accept_v)
-+{
-+    MediaBufsStatus status;
-+    unsigned int i;
-+    const enum v4l2_buf_type buf_type = mbc->dst_fmt.type;
-+    static const struct {
-+        unsigned int flags_must;
-+        unsigned int flags_not;
-+    } trys[] = {
-+        {0, V4L2_FMT_FLAG_EMULATED},
-+        {V4L2_FMT_FLAG_EMULATED, 0},
-+    };
-+    for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) {
-+        status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd,
-+                                buf_type,
-+                                trys[i].flags_must,
-+                                trys[i].flags_not,
-+                                width, height, accept_fn, accept_v);
-+        if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE)
-+            return status;
-+    }
-+
-+    if (status != MEDIABUFS_STATUS_SUCCESS)
-+        return status;
-+
-+    /* Try to create a buffer - don't alloc */
-+    return status;
-+}
-+
-+// ** This is a mess if we get partial alloc but without any way to remove
-+//    individual V4L2 Q members we are somewhat stuffed
-+MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed)
-+{
-+    unsigned int i;
-+    int a = 0;
-+    unsigned int qc;
-+    struct qent_dst * qes[32];
-+
-+    if (n > 32)
-+        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
-+
-+    // Create qents first as it is hard to get rid of the V4L2 buffers on error
-+    for (qc = 0; qc != n; ++qc)
-+    {
-+        if ((qes[qc] = qe_dst_new(mbc->this_wlm)) == NULL)
-+            goto fail;
-+    }
-+
-+    if ((a = create_dst_bufs(mbc, n, qes)) < 0)
-+        goto fail;
-+
-+    for (i = 0; i != a; ++i)
-+        queue_put_free(mbc->dst, &qes[i]->base);
-+
-+    if (a != n)
-+        goto fail;
-+
-+    mbc->dst_fixed = fixed;
-+    return MEDIABUFS_STATUS_SUCCESS;
-+
-+fail:
-+    for (i = (a < 0 ? 0 : a); i != qc; ++i)
-+        qe_dst_free(qes[i]);
-+
-+    return MEDIABUFS_ERROR_ALLOCATION_FAILED;
-+}
-+
-+struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc)
-+{
-+    struct qent_base * buf = queue_get_free(mbc->src);
-+    buf->status = QENT_PENDING;
-+    return base_to_src(buf);
-+}
-+
-+void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src)
-+{
-+    struct qent_src *const qe_src = *pqe_src;
-+    if (!qe_src)
-+        return;
-+    *pqe_src = NULL;
-+    queue_put_free(mbc->src, &qe_src->base);
-+}
-+
-+/* src format must have been set up before this */
-+MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc,
-+                  struct dmabufs_ctl * const dbsc,
-+                  unsigned int n)
-+{
-+    unsigned int i;
-+    struct v4l2_requestbuffers req = {
-+        .count = n,
-+        .type = mbc->src_fmt.type,
-+        .memory = V4L2_MEMORY_DMABUF
-+    };
-+
-+    bq_free_all_free_src(mbc->src);
-+    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) {
-+        if (errno != EINTR) {
-+            request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__);
-+            return MEDIABUFS_ERROR_OPERATION_FAILED;
-+        }
-+    }
-+
-+    if (n > req.count) {
-+        request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n);
-+        n = req.count;
-+    }
-+
-+    for (i = 0; i != n; ++i) {
-+        struct qent_src *const be_src = qe_src_new();
-+        if (!be_src) {
-+            request_err(mbc->dc, "Failed to create src be %d\n", i);
-+            goto fail;
-+        }
-+        if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) {
-+            qe_src_free(be_src);
-+            goto fail;
-+        }
-+        be_src->base.index = i;
-+        be_src->fixed_size = !mediabufs_src_resizable(mbc);
-+
-+        queue_put_free(mbc->src, &be_src->base);
-+    }
-+
-+    return MEDIABUFS_STATUS_SUCCESS;
-+
-+fail:
-+    bq_free_all_free_src(mbc->src);
-+    req.count = 0;
-+    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 &&
-+           errno == EINTR)
-+        /* Loop */;
-+
-+    return MEDIABUFS_ERROR_OPERATION_FAILED;
-+}
-+
-+
-+
-+/*
-+ * Set stuff order:
-+ *  Set src fmt
-+ *  Set parameters (sps) on vfd
-+ *  Negotiate dst format (dst_fmt_set)
-+ *  Create src buffers
-+ *  Alloc a dst buffer or Create dst slots
-+*/
-+MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc)
-+{
-+    if (mbc->stream_on)
-+        return MEDIABUFS_STATUS_SUCCESS;
-+
-+    if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) {
-+        request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type);
-+        return MEDIABUFS_ERROR_OPERATION_FAILED;
-+    }
-+
-+    if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) {
-+        request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type);
-+        set_stream(mbc->vfd, mbc->src_fmt.type, false);
-+        return MEDIABUFS_ERROR_OPERATION_FAILED;
-+    }
-+
-+    mbc->stream_on = true;
-+    return MEDIABUFS_STATUS_SUCCESS;
-+}
-+
-+MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc)
-+{
-+    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
-+
-+    if (!mbc->stream_on)
-+        return MEDIABUFS_STATUS_SUCCESS;
-+
-+    if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) {
-+        request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type);
-+        status = MEDIABUFS_ERROR_OPERATION_FAILED;
-+    }
-+
-+    if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) {
-+        request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type);
-+        status = MEDIABUFS_ERROR_OPERATION_FAILED;
-+    }
-+
-+    mbc->stream_on = false;
-+    return status;
-+}
-+
-+int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n)
-+{
-+    struct v4l2_ext_controls controls = {
-+        .controls = control_array,
-+        .count = n
-+    };
-+
-+    if (mreq) {
-+        controls.which = V4L2_CTRL_WHICH_REQUEST_VAL;
-+        controls.request_fd = media_request_fd(mreq);
-+    }
-+
-+    while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls))
-+    {
-+        const int err = errno;
-+        if (err != EINTR) {
-+            request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err));
-+            return -err;
-+        }
-+    }
-+
-+    return 0;
-+}
-+
-+MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
-+                struct media_request * const mreq,
-+                unsigned int id, void *data,
-+                unsigned int size)
-+{
-+    struct v4l2_ext_control control = {
-+        .id = id,
-+        .ptr = data,
-+        .size = size
-+    };
-+
-+    int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1);
-+    return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED;
-+}
-+
-+MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
-+                                      enum v4l2_buf_type buf_type,
-+                   const uint32_t pixfmt,
-+                   const uint32_t width, const uint32_t height,
-+                                      const size_t bufsize)
-+{
-+    MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize);
-+    if (rv != MEDIABUFS_STATUS_SUCCESS)
-+        request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height);
-+
-+    return rv;
-+}
-+
-+int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n)
-+{
-+    int rv = 0;
-+    while (n--) {
-+        while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) {
-+            const int err = errno;
-+            if (err != EINTR) {
-+                // Often used for probing - errors are to be expected
-+                request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err);
-+                ctrls->type = 0; // 0 is invalid
-+                rv = -err;
-+                break;
-+            }
-+        }
-+        ++ctrls;
-+    }
-+    return rv;
-+}
-+
-+int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc)
-+{
-+    // Single planar OUTPUT can only take exact size buffers
-+    // Multiplanar will take larger than negotiated
-+    return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type);
-+}
-+
-+static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc)
-+{
-+    if (!mbc)
-+        return;
-+
-+    // Break the weak link first
-+    ff_weak_link_break(&mbc->this_wlm);
-+
-+    polltask_delete(&mbc->pt);
-+
-+    mediabufs_stream_off(mbc);
-+
-+    // Empty v4l2 buffer stash
-+    request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0);
-+    request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0);
-+
-+    bq_free_all_free_src(mbc->src);
-+    bq_free_all_inuse_src(mbc->src);
-+    bq_free_all_free_dst(mbc->dst);
-+
-+    {
-+        struct qent_dst *dst_be;
-+        while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) {
-+            dst_be->base.timestamp = (struct timeval){0};
-+            dst_be->base.status = QENT_ERROR;
-+            qe_dst_done(dst_be);
-+        }
-+    }
-+
-+    queue_delete(mbc->dst);
-+    queue_delete(mbc->src);
-+    close(mbc->vfd);
-+    pthread_mutex_destroy(&mbc->lock);
-+
-+    free(mbc);
-+}
-+
-+struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc)
-+{
-+    atomic_fetch_add(&mbc->ref_count, 1);
-+    return mbc;
-+}
-+
-+void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc)
-+{
-+    struct mediabufs_ctl *const mbc = *pmbc;
-+    int n;
-+
-+    if (!mbc)
-+        return;
-+    *pmbc = NULL;
-+    n = atomic_fetch_sub(&mbc->ref_count, 1);
-+    if (n)
-+        return;
-+    mediabufs_ctl_delete(mbc);
-+}
-+
-+static int set_capabilities(struct mediabufs_ctl *const mbc)
-+{
-+    struct v4l2_capability capability = { 0 };
-+    uint32_t caps;
-+
-+    if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &capability)) {
-+        int err = errno;
-+        request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err));
-+        return -err;
-+    }
-+
-+    caps = (capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ?
-+            capability.device_caps :
-+            capability.capabilities;
-+
-+    if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) {
-+        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
-+        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
-+    }
-+    else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) {
-+        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-+        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-+    }
-+    else {
-+        request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps);
-+        return -EINVAL;
-+    }
-+
-+    return 0;
-+}
-+
-+/* One of these per context */
-+struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq)
-+{
-+    struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc));
-+
-+    if (!mbc)
-+        return NULL;
-+
-+    mbc->dc = dc;
-+    // Default mono planar
-+    mbc->pq = pq;
-+    pthread_mutex_init(&mbc->lock, NULL);
-+
-+    /* Pick a default  - could we scan for this? */
-+    if (vpath == NULL)
-+        vpath = "/dev/media0";
-+
-+    while ((mbc->vfd = open(vpath, O_RDWR)) == -1)
-+    {
-+        const int err = errno;
-+        if (err != EINTR) {
-+            request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err));
-+            goto fail0;
-+        }
-+    }
-+
-+    if (set_capabilities(mbc)) {
-+        request_err(dc, "Bad capabilities for video dev '%s'\n", vpath);
-+        goto fail1;
-+    }
-+
-+    mbc->src = queue_new(mbc->vfd);
-+    if (!mbc->src)
-+        goto fail1;
-+    mbc->dst = queue_new(mbc->vfd);
-+    if (!mbc->dst)
-+        goto fail2;
-+    mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc);
-+    if (!mbc->pt)
-+        goto fail3;
-+    mbc->this_wlm = ff_weak_link_new(mbc);
-+    if (!mbc->this_wlm)
-+        goto fail4;
-+
-+    /* Cannot add polltask now - polling with nothing pending
-+     * generates infinite error polls
-+    */
-+    return mbc;
-+
-+fail4:
-+    polltask_delete(&mbc->pt);
-+fail3:
-+    queue_delete(mbc->dst);
-+fail2:
-+    queue_delete(mbc->src);
-+fail1:
-+    close(mbc->vfd);
-+fail0:
-+    free(mbc);
-+    request_info(dc, "%s: FAILED\n", __func__);
-+    return NULL;
-+}
-+
-+
-+
-diff --git a/libavcodec/v4l2_req_media.h b/libavcodec/v4l2_req_media.h
-new file mode 100644
-index 000000000000..2f826cfb14e7
---- /dev/null
-+++ b/libavcodec/v4l2_req_media.h
-@@ -0,0 +1,151 @@
-+/*
-+e.h
-+*
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the
-+ * "Software"), to deal in the Software without restriction, including
-+ * without limitation the rights to use, copy, modify, merge, publish,
-+ * distribute, sub license, and/or sell copies of the Software, and to
-+ * permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice (including the
-+ * next paragraph) shall be included in all copies or substantial portions
-+ * of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
-+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+
-+#ifndef _MEDIA_H_
-+#define _MEDIA_H_
-+
-+#include <stdbool.h>
-+#include <stdint.h>
-+
-+struct v4l2_format;
-+struct v4l2_fmtdesc;
-+struct v4l2_query_ext_ctrl;
-+
-+struct pollqueue;
-+struct media_request;
-+struct media_pool;
-+
-+typedef enum media_buf_status {
-+    MEDIABUFS_STATUS_SUCCESS = 0,
-+    MEDIABUFS_ERROR_OPERATION_FAILED,
-+    MEDIABUFS_ERROR_DECODING_ERROR,
-+    MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE,
-+    MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT,
-+    MEDIABUFS_ERROR_ALLOCATION_FAILED,
-+} MediaBufsStatus;
-+
-+struct media_pool * media_pool_new(const char * const media_path,
-+                   struct pollqueue * const pq,
-+                   const unsigned int n);
-+void media_pool_delete(struct media_pool ** pmp);
-+
-+// Obtain a media request
-+// Will block if none availible - has a 2sec timeout
-+struct media_request * media_request_get(struct media_pool * const mp);
-+int media_request_fd(const struct media_request * const req);
-+
-+// Start this request
-+// Request structure is returned to pool once done
-+int media_request_start(struct media_request * const req);
-+
-+// Return an *unstarted* media_request to the pool
-+// May later be upgraded to allow for aborting a started req
-+int media_request_abort(struct media_request ** const preq);
-+
-+
-+struct mediabufs_ctl;
-+struct qent_src;
-+struct qent_dst;
-+struct dmabuf_h;
-+struct dmabufs_ctl;
-+
-+int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp);
-+struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst);
-+
-+// prealloc
-+int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc);
-+// dbsc may be NULL if realloc not required
-+int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc);
-+const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane);
-+int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane);
-+MediaBufsStatus qent_dst_wait(struct qent_dst *const be);
-+void qent_dst_delete(struct qent_dst *const be);
-+// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead
-+void qent_dst_unref(struct qent_dst ** const pbe_dst);
-+struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst);
-+
-+const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no);
-+MediaBufsStatus qent_dst_read_start(struct qent_dst *const be);
-+MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be);
-+/* Import an fd unattached to any mediabuf */
-+MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
-+                unsigned int plane,
-+                int fd, size_t size);
-+
-+MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
-+                struct media_request **const pmreq,
-+                struct qent_src **const psrc_be,
-+                struct qent_dst *const dst_be,
-+                const bool is_final);
-+// Get / alloc a dst buffer & associate with a slot
-+// If the dst pool is empty then behaviour depends on the fixed flag passed to
-+// dst_slots_create.  Default is !fixed = unlimited alloc
-+struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc,
-+                           struct dmabufs_ctl *const dbsc);
-+// Create dst slots without alloc
-+// If fixed true then qent_alloc will only get slots from this pool and will
-+// block until a qent has been unrefed
-+MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed);
-+
-+MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc);
-+MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc);
-+const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc);
-+
-+typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc);
-+
-+MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
-+               const unsigned int width,
-+               const unsigned int height,
-+               mediabufs_dst_fmt_accept_fn *const accept_fn,
-+               void *const accept_v);
-+struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc);
-+void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src);
-+
-+int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq,
-+                                struct v4l2_ext_control control_array[], unsigned int n);
-+MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
-+                struct media_request * const mreq,
-+                unsigned int id, void *data,
-+                unsigned int size);
-+int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n);
-+
-+int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc);
-+
-+MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
-+                                      enum v4l2_buf_type buf_type,
-+                                      const uint32_t pixfmt,
-+                                      const uint32_t width, const uint32_t height,
-+                                      const size_t bufsize);
-+
-+MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw,
-+                  struct dmabufs_ctl * const dbsc,
-+                  unsigned int n);
-+
-+struct mediabufs_ctl * mediabufs_ctl_new(void * const dc,
-+                     const char *vpath, struct pollqueue *const pq);
-+void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc);
-+struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc);
-+
-+
-+#endif
-diff --git a/libavcodec/v4l2_req_pollqueue.c b/libavcodec/v4l2_req_pollqueue.c
-new file mode 100644
-index 000000000000..cc8a5d400120
---- /dev/null
-+++ b/libavcodec/v4l2_req_pollqueue.c
-@@ -0,0 +1,361 @@
-+#include <errno.h>
-+#include <limits.h>
-+#include <poll.h>
-+#include <pthread.h>
-+#include <semaphore.h>
-+#include <stdatomic.h>
-+#include <stdbool.h>
-+#include <stdlib.h>
-+#include <stdint.h>
-+#include <stdio.h>
-+#include <string.h>
-+#include <unistd.h>
-+#include <sys/eventfd.h>
-+
-+#include "v4l2_req_pollqueue.h"
-+#include "v4l2_req_utils.h"
-+
-+
-+struct pollqueue;
-+
-+enum polltask_state {
-+    POLLTASK_UNQUEUED = 0,
-+    POLLTASK_QUEUED,
-+    POLLTASK_RUNNING,
-+    POLLTASK_Q_KILL,
-+    POLLTASK_RUN_KILL,
-+};
-+
-+struct polltask {
-+    struct polltask *next;
-+    struct polltask *prev;
-+    struct pollqueue *q;
-+    enum polltask_state state;
-+
-+    int fd;
-+    short events;
-+
-+    void (*fn)(void *v, short revents);
-+    void * v;
-+
-+    uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */
-+    sem_t kill_sem;
-+};
-+
-+struct pollqueue {
-+    atomic_int ref_count;
-+    pthread_mutex_t lock;
-+
-+    struct polltask *head;
-+    struct polltask *tail;
-+
-+    bool kill;
-+    bool no_prod;
-+    int prod_fd;
-+    struct polltask *prod_pt;
-+    pthread_t worker;
-+};
-+
-+struct polltask *polltask_new(struct pollqueue *const pq,
-+                              const int fd, const short events,
-+                  void (*const fn)(void *v, short revents),
-+                  void *const v)
-+{
-+    struct polltask *pt;
-+
-+    if (!events)
-+        return NULL;
-+
-+    pt = malloc(sizeof(*pt));
-+    if (!pt)
-+        return NULL;
-+
-+    *pt = (struct polltask){
-+        .next = NULL,
-+        .prev = NULL,
-+        .q = pollqueue_ref(pq),
-+        .fd = fd,
-+        .events = events,
-+        .fn = fn,
-+        .v = v
-+    };
-+
-+    sem_init(&pt->kill_sem, 0, 0);
-+
-+    return pt;
-+}
-+
-+static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt)
-+{
-+    if (pt->prev)
-+        pt->prev->next = pt->next;
-+    else
-+        pq->head = pt->next;
-+    if (pt->next)
-+        pt->next->prev = pt->prev;
-+    else
-+        pq->tail = pt->prev;
-+    pt->next = NULL;
-+    pt->prev = NULL;
-+}
-+
-+static void polltask_free(struct polltask * const pt)
-+{
-+    sem_destroy(&pt->kill_sem);
-+    free(pt);
-+}
-+
-+static int pollqueue_prod(const struct pollqueue *const pq)
-+{
-+    static const uint64_t one = 1;
-+    return write(pq->prod_fd, &one, sizeof(one));
-+}
-+
-+void polltask_delete(struct polltask **const ppt)
-+{
-+    struct polltask *const pt = *ppt;
-+    struct pollqueue * pq;
-+    enum polltask_state state;
-+    bool prodme;
-+
-+    if (!pt)
-+        return;
-+
-+    pq = pt->q;
-+    pthread_mutex_lock(&pq->lock);
-+    state = pt->state;
-+    pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL;
-+    prodme = !pq->no_prod;
-+    pthread_mutex_unlock(&pq->lock);
-+
-+    if (state != POLLTASK_UNQUEUED) {
-+        if (prodme)
-+            pollqueue_prod(pq);
-+        while (sem_wait(&pt->kill_sem) && errno == EINTR)
-+            /* loop */;
-+    }
-+
-+    // Leave zapping the ref until we have DQed the PT as might well be
-+    // legitimately used in it
-+    *ppt = NULL;
-+    polltask_free(pt);
-+    pollqueue_unref(&pq);
-+}
-+
-+static uint64_t pollqueue_now(int timeout)
-+{
-+    struct timespec now;
-+    uint64_t now_ms;
-+
-+    if (clock_gettime(CLOCK_MONOTONIC, &now))
-+        return 0;
-+    now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout;
-+    return now_ms ? now_ms : (uint64_t)1;
-+}
-+
-+void pollqueue_add_task(struct polltask *const pt, const int timeout)
-+{
-+    bool prodme = false;
-+    struct pollqueue * const pq = pt->q;
-+
-+    pthread_mutex_lock(&pq->lock);
-+    if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) {
-+        if (pq->tail)
-+            pq->tail->next = pt;
-+        else
-+            pq->head = pt;
-+        pt->prev = pq->tail;
-+        pt->next = NULL;
-+        pt->state = POLLTASK_QUEUED;
-+        pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout);
-+        pq->tail = pt;
-+        prodme = !pq->no_prod;
-+    }
-+    pthread_mutex_unlock(&pq->lock);
-+    if (prodme)
-+        pollqueue_prod(pq);
-+}
-+
-+static void *poll_thread(void *v)
-+{
-+    struct pollqueue *const pq = v;
-+    struct pollfd *a = NULL;
-+    size_t asize = 0;
-+
-+    pthread_mutex_lock(&pq->lock);
-+    do {
-+        unsigned int i;
-+        unsigned int n = 0;
-+        struct polltask *pt;
-+        struct polltask *pt_next;
-+        uint64_t now = pollqueue_now(0);
-+        int timeout = -1;
-+        int rv;
-+
-+        for (pt = pq->head; pt; pt = pt_next) {
-+            int64_t t;
-+
-+            pt_next = pt->next;
-+
-+            if (pt->state == POLLTASK_Q_KILL) {
-+                pollqueue_rem_task(pq, pt);
-+                sem_post(&pt->kill_sem);
-+                continue;
-+            }
-+
-+            if (n >= asize) {
-+                asize = asize ? asize * 2 : 4;
-+                a = realloc(a, asize * sizeof(*a));
-+                if (!a) {
-+                    request_log("Failed to realloc poll array to %zd\n", asize);
-+                    goto fail_locked;
-+                }
-+            }
-+
-+            a[n++] = (struct pollfd){
-+                .fd = pt->fd,
-+                .events = pt->events
-+            };
-+
-+            t = (int64_t)(pt->timeout - now);
-+            if (pt->timeout && t < INT_MAX &&
-+                (timeout < 0 || (int)t < timeout))
-+                timeout = (t < 0) ? 0 : (int)t;
-+        }
-+        pthread_mutex_unlock(&pq->lock);
-+
-+        if ((rv = poll(a, n, timeout)) == -1) {
-+            if (errno != EINTR) {
-+                request_log("Poll error: %s\n", strerror(errno));
-+                goto fail_unlocked;
-+            }
-+        }
-+
-+        pthread_mutex_lock(&pq->lock);
-+        now = pollqueue_now(0);
-+
-+        /* Prodding in this loop is pointless and might lead to
-+         * infinite looping
-+        */
-+        pq->no_prod = true;
-+        for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) {
-+            pt_next = pt->next;
-+
-+            /* Pending? */
-+            if (a[i].revents ||
-+                (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) {
-+                pollqueue_rem_task(pq, pt);
-+                if (pt->state == POLLTASK_QUEUED)
-+                    pt->state = POLLTASK_RUNNING;
-+                if (pt->state == POLLTASK_Q_KILL)
-+                    pt->state = POLLTASK_RUN_KILL;
-+                pthread_mutex_unlock(&pq->lock);
-+
-+                /* This can add new entries to the Q but as
-+                 * those are added to the tail our existing
-+                 * chain remains intact
-+                */
-+                pt->fn(pt->v, a[i].revents);
-+
-+                pthread_mutex_lock(&pq->lock);
-+                if (pt->state == POLLTASK_RUNNING)
-+                    pt->state = POLLTASK_UNQUEUED;
-+                if (pt->state == POLLTASK_RUN_KILL)
-+                    sem_post(&pt->kill_sem);
-+            }
-+        }
-+        pq->no_prod = false;
-+
-+    } while (!pq->kill);
-+
-+fail_locked:
-+    pthread_mutex_unlock(&pq->lock);
-+fail_unlocked:
-+    free(a);
-+    return NULL;
-+}
-+
-+static void prod_fn(void *v, short revents)
-+{
-+    struct pollqueue *const pq = v;
-+    char buf[8];
-+    if (revents)
-+        read(pq->prod_fd, buf, 8);
-+    if (!pq->kill)
-+        pollqueue_add_task(pq->prod_pt, -1);
-+}
-+
-+struct pollqueue * pollqueue_new(void)
-+{
-+    struct pollqueue *pq = malloc(sizeof(*pq));
-+    if (!pq)
-+        return NULL;
-+    *pq = (struct pollqueue){
-+        .ref_count = ATOMIC_VAR_INIT(0),
-+        .lock = PTHREAD_MUTEX_INITIALIZER,
-+        .head = NULL,
-+        .tail = NULL,
-+        .kill = false,
-+        .prod_fd = -1
-+    };
-+
-+    pq->prod_fd = eventfd(0, EFD_NONBLOCK);
-+    if (pq->prod_fd == 1)
-+        goto fail1;
-+    pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq);
-+    if (!pq->prod_pt)
-+        goto fail2;
-+    pollqueue_add_task(pq->prod_pt, -1);
-+    if (pthread_create(&pq->worker, NULL, poll_thread, pq))
-+        goto fail3;
-+    // Reset ref count which will have been inced by the add_task
-+    atomic_store(&pq->ref_count, 0);
-+    return pq;
-+
-+fail3:
-+    polltask_free(pq->prod_pt);
-+fail2:
-+    close(pq->prod_fd);
-+fail1:
-+    free(pq);
-+    return NULL;
-+}
-+
-+static void pollqueue_free(struct pollqueue *const pq)
-+{
-+    void *rv;
-+
-+    pthread_mutex_lock(&pq->lock);
-+    pq->kill = true;
-+    pollqueue_prod(pq);
-+    pthread_mutex_unlock(&pq->lock);
-+
-+    pthread_join(pq->worker, &rv);
-+    polltask_free(pq->prod_pt);
-+    pthread_mutex_destroy(&pq->lock);
-+    close(pq->prod_fd);
-+    free(pq);
-+}
-+
-+struct pollqueue * pollqueue_ref(struct pollqueue *const pq)
-+{
-+    atomic_fetch_add(&pq->ref_count, 1);
-+    return pq;
-+}
-+
-+void pollqueue_unref(struct pollqueue **const ppq)
-+{
-+    struct pollqueue * const pq = *ppq;
-+
-+    if (!pq)
-+        return;
-+    *ppq = NULL;
-+
-+    if (atomic_fetch_sub(&pq->ref_count, 1) != 0)
-+        return;
-+
-+    pollqueue_free(pq);
-+}
-+
-+
-+
-diff --git a/libavcodec/v4l2_req_pollqueue.h b/libavcodec/v4l2_req_pollqueue.h
-new file mode 100644
-index 000000000000..e1182cb2fc92
---- /dev/null
-+++ b/libavcodec/v4l2_req_pollqueue.h
-@@ -0,0 +1,18 @@
-+#ifndef POLLQUEUE_H_
-+#define POLLQUEUE_H_
-+
-+struct polltask;
-+struct pollqueue;
-+
-+struct polltask *polltask_new(struct pollqueue *const pq,
-+			      const int fd, const short events,
-+			      void (*const fn)(void *v, short revents),
-+			      void *const v);
-+void polltask_delete(struct polltask **const ppt);
-+
-+void pollqueue_add_task(struct polltask *const pt, const int timeout);
-+struct pollqueue * pollqueue_new(void);
-+void pollqueue_unref(struct pollqueue **const ppq);
-+struct pollqueue * pollqueue_ref(struct pollqueue *const pq);
-+
-+#endif /* POLLQUEUE_H_ */
-diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h
-new file mode 100644
-index 000000000000..a31cc1f4ec2a
---- /dev/null
-+++ b/libavcodec/v4l2_req_utils.h
-@@ -0,0 +1,27 @@
-+#ifndef AVCODEC_V4L2_REQ_UTILS_H
-+#define AVCODEC_V4L2_REQ_UTILS_H
-+
-+#include <stdint.h>
-+#include "libavutil/log.h"
-+
-+#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
-+
-+#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__)
-+#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__)
-+#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__)
-+#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__)
-+
-+static inline char safechar(char c) {
-+    return c > 0x20 && c < 0x7f ? c : '.';
-+}
-+
-+static inline const char * strfourcc(char tbuf[5], uint32_t fcc) {
-+    tbuf[0] = safechar((fcc >>  0) & 0xff);
-+    tbuf[1] = safechar((fcc >>  8) & 0xff);
-+    tbuf[2] = safechar((fcc >> 16) & 0xff);
-+    tbuf[3] = safechar((fcc >> 24) & 0xff);
-+    tbuf[4] = '\0';
-+    return tbuf;
-+}
-+
-+#endif
-diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-new file mode 100644
-index 000000000000..b0a5930844a8
---- /dev/null
-+++ b/libavcodec/v4l2_request_hevc.c
-@@ -0,0 +1,297 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+
-+
-+#include "decode.h"
-+#include "hevcdec.h"
-+#include "hwconfig.h"
-+#include "internal.h"
-+
-+#include "v4l2_request_hevc.h"
-+
-+#include "libavutil/hwcontext_drm.h"
-+
-+#include "v4l2_req_devscan.h"
-+#include "v4l2_req_dmabufs.h"
-+#include "v4l2_req_pollqueue.h"
-+#include "v4l2_req_media.h"
-+#include "v4l2_req_utils.h"
-+
-+static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8)
-+{
-+    const size_t wxh = w * h;
-+    size_t bits_alloc;
-+
-+    /* Annex A gives a min compression of 2 @ lvl 3.1
-+     * (wxh <= 983040) and min 4 thereafter but avoid
-+     * the odity of 983041 having a lower limit than
-+     * 983040.
-+     * Multiply by 3/2 for 4:2:0
-+     */
-+    bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
-+        wxh < 983040 * 2 ? 983040 * 3 / 4 :
-+        wxh * 3 / 8;
-+    /* Allow for bit depth */
-+    bits_alloc += (bits_alloc * bits_minus8) / 8;
-+    /* Add a few bytes (16k) for overhead */
-+    bits_alloc += 0x4000;
-+    return bits_alloc;
-+}
-+
-+static int v4l2_req_hevc_start_frame(AVCodecContext *avctx,
-+                                     av_unused const uint8_t *buffer,
-+                                     av_unused uint32_t size)
-+{
-+    const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+    return ctx->fns->start_frame(avctx, buffer, size);
-+}
-+
-+static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
-+{
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+    return ctx->fns->decode_slice(avctx, buffer, size);
-+}
-+
-+static int v4l2_req_hevc_end_frame(AVCodecContext *avctx)
-+{
-+    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
-+    return ctx->fns->end_frame(avctx);
-+}
-+
-+static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx)
-+{
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+    ctx->fns->abort_frame(avctx);
-+}
-+
-+static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
-+{
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+    return ctx->fns->frame_params(avctx, hw_frames_ctx);
-+}
-+
-+static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame)
-+{
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+    return ctx->fns->alloc_frame(avctx, frame);
-+}
-+
-+
-+static int v4l2_request_hevc_uninit(AVCodecContext *avctx)
-+{
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+
-+    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
-+
-+    decode_q_wait(&ctx->decode_q, NULL);  // Wait for all other threads to be out of decode
-+
-+    mediabufs_ctl_unref(&ctx->mbufs);
-+    media_pool_delete(&ctx->mpool);
-+    pollqueue_unref(&ctx->pq);
-+    dmabufs_ctl_delete(&ctx->dbufs);
-+    devscan_delete(&ctx->devscan);
-+
-+    decode_q_uninit(&ctx->decode_q);
-+
-+//    if (avctx->hw_frames_ctx) {
-+//        AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-+//        av_buffer_pool_flush(hwfc->pool);
-+//    }
-+    return 0;
-+}
-+
-+static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc)
-+{
-+    AVCodecContext *const avctx = v;
-+    const HEVCContext *const h = avctx->priv_data;
-+
-+    if (h->ps.sps->bit_depth == 8) {
-+        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 ||
-+            fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) {
-+            return 1;
-+        }
-+    }
-+    else if (h->ps.sps->bit_depth == 10) {
-+        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
-+            return 1;
-+        }
-+    }
-+    return 0;
-+}
-+
-+static int v4l2_request_hevc_init(AVCodecContext *avctx)
-+{
-+    const HEVCContext *h = avctx->priv_data;
-+    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
-+    const HEVCSPS * const sps = h->ps.sps;
-+    int ret;
-+    const struct decdev * decdev;
-+    const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 1).src_pix_fmt_v4l2;  // Assuming constant for all APIs but avoiding V4L2 includes
-+    size_t src_size;
-+
-+    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
-+
-+    if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) {
-+        av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n");
-+        return (AVERROR(-ret));
-+    }
-+    ret = AVERROR(ENOMEM);  // Assume mem fail by default for these
-+
-+    if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL)
-+    {
-+        av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n");
-+        ret = AVERROR(ENODEV);
-+        goto fail0;
-+    }
-+    av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n",
-+           decdev_media_path(decdev), decdev_video_path(decdev));
-+
-+    if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) {
-+        av_log(avctx, AV_LOG_ERROR, "Unable to open dmabufs\n");
-+        goto fail0;
-+    }
-+
-+    if ((ctx->pq = pollqueue_new()) == NULL) {
-+        av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n");
-+        goto fail1;
-+    }
-+
-+    if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) {
-+        av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n");
-+        goto fail2;
-+    }
-+
-+    if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) {
-+        av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n");
-+        goto fail3;
-+    }
-+
-+    // Ask for an initial bitbuf size of max size / 4
-+    // We will realloc if we need more
-+    // Must use sps->h/w as avctx contains cropped size
-+    src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8);
-+    if (mediabufs_src_resizable(ctx->mbufs))
-+        src_size /= 4;
-+    // Kludge for conformance tests which break Annex A limits
-+    else if (src_size < 0x40000)
-+        src_size = 0x40000;
-+
-+    if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt,
-+                              sps->width, sps->height, src_size)) {
-+        char tbuf1[5];
-+        av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
-+        goto fail4;
-+    }
-+
-+    if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) {
-+        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n");
-+        ctx->fns = &V2(ff_v4l2_req_hevc, 2);
-+    }
-+    else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) {
-+        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n");
-+        ctx->fns = &V2(ff_v4l2_req_hevc, 1);
-+    }
-+    else {
-+        av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n");
-+        ret = AVERROR(EINVAL);
-+        goto fail4;
-+    }
-+
-+    if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) {
-+        char tbuf1[5];
-+        av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
-+        goto fail4;
-+    }
-+
-+    if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6)) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n");
-+        goto fail4;
-+    }
-+
-+    {
-+        unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering +
-+            avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6);
-+        av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots,
-+               sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering,
-+               avctx->thread_count, avctx->extra_hw_frames);
-+
-+        // extra_hw_frames is -1 if unset
-+        if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0))) {
-+            av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n");
-+            goto fail4;
-+        }
-+    }
-+
-+    if (mediabufs_stream_on(ctx->mbufs)) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed stream on\n");
-+        goto fail4;
-+    }
-+
-+    if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n");
-+        goto fail4;
-+    }
-+
-+    if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed set controls\n");
-+        goto fail5;
-+    }
-+
-+    decode_q_init(&ctx->decode_q);
-+
-+    // Set our s/w format
-+    avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format;
-+
-+    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s\n",
-+           ctx->fns->name,
-+           decdev_media_path(decdev), decdev_video_path(decdev));
-+
-+    return 0;
-+
-+fail5:
-+    av_buffer_unref(&avctx->hw_frames_ctx);
-+fail4:
-+    mediabufs_ctl_unref(&ctx->mbufs);
-+fail3:
-+    media_pool_delete(&ctx->mpool);
-+fail2:
-+    pollqueue_unref(&ctx->pq);
-+fail1:
-+    dmabufs_ctl_delete(&ctx->dbufs);
-+fail0:
-+    devscan_delete(&ctx->devscan);
-+    return ret;
-+}
-+
-+const AVHWAccel ff_hevc_v4l2request_hwaccel = {
-+    .name           = "hevc_v4l2request",
-+    .type           = AVMEDIA_TYPE_VIDEO,
-+    .id             = AV_CODEC_ID_HEVC,
-+    .pix_fmt        = AV_PIX_FMT_DRM_PRIME,
-+    .alloc_frame    = v4l2_req_hevc_alloc_frame,
-+    .start_frame    = v4l2_req_hevc_start_frame,
-+    .decode_slice   = v4l2_req_hevc_decode_slice,
-+    .end_frame      = v4l2_req_hevc_end_frame,
-+    .abort_frame    = v4l2_req_hevc_abort_frame,
-+    .init           = v4l2_request_hevc_init,
-+    .uninit         = v4l2_request_hevc_uninit,
-+    .priv_data_size = sizeof(V4L2RequestContextHEVC),
-+    .frame_params   = v4l2_req_hevc_frame_params,
-+    .caps_internal  = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE,
-+};
-diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h
-new file mode 100644
-index 000000000000..f14f594564d7
---- /dev/null
-+++ b/libavcodec/v4l2_request_hevc.h
-@@ -0,0 +1,102 @@
-+#ifndef AVCODEC_V4L2_REQUEST_HEVC_H
-+#define AVCODEC_V4L2_REQUEST_HEVC_H
-+
-+#include <stdint.h>
-+#include <drm_fourcc.h>
-+#include "v4l2_req_decode_q.h"
-+
-+#ifndef DRM_FORMAT_NV15
-+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
-+#endif
-+
-+#ifndef DRM_FORMAT_NV20
-+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
-+#endif
-+
-+// P030 should be defined in drm_fourcc.h and hopefully will be sometime
-+// in the future but until then...
-+#ifndef DRM_FORMAT_P030
-+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
-+#endif
-+
-+#ifndef DRM_FORMAT_NV15
-+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
-+#endif
-+
-+#ifndef DRM_FORMAT_NV20
-+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
-+#endif
-+
-+#include <linux/videodev2.h>
-+#ifndef V4L2_CID_CODEC_BASE
-+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
-+#endif
-+
-+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
-+// in drm_fourcc.h hopefully will be sometime in the future but until then...
-+#ifndef V4L2_PIX_FMT_NV12_10_COL128
-+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
-+#endif
-+
-+#ifndef V4L2_PIX_FMT_NV12_COL128
-+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
-+#endif
-+
-+#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY
-+#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY	0x0800
-+#endif
-+
-+#define MAX_SLICES 128
-+
-+#define VCAT(name, version) name##_v##version
-+#define V2(n,v) VCAT(n, v)
-+#define V(n) V2(n, HEVC_CTRLS_VERSION)
-+
-+#define S2(x) #x
-+#define STR(x) S2(x)
-+
-+// 1 per decoder
-+struct v4l2_req_decode_fns;
-+
-+typedef struct V4L2RequestContextHEVC {
-+//    V4L2RequestContext base;
-+    const struct v4l2_req_decode_fns * fns;
-+
-+    unsigned int timestamp;  // ?? maybe uint64_t
-+
-+    int multi_slice;
-+    int decode_mode;
-+    int start_code;
-+    int max_slices;
-+
-+    req_decode_q decode_q;
-+
-+    struct devscan *devscan;
-+    struct dmabufs_ctl *dbufs;
-+    struct pollqueue *pq;
-+    struct media_pool * mpool;
-+    struct mediabufs_ctl *mbufs;
-+} V4L2RequestContextHEVC;
-+
-+typedef struct v4l2_req_decode_fns {
-+    int src_pix_fmt_v4l2;
-+    const char * name;
-+
-+    // Init setup
-+    int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
-+    int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
-+
-+    // Passthrough of hwaccel fns
-+    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
-+    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
-+    int (*end_frame)(AVCodecContext *avctx);
-+    void (*abort_frame)(AVCodecContext *avctx);
-+    int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
-+    int (*alloc_frame)(AVCodecContext * avctx, AVFrame *frame);
-+} v4l2_req_decode_fns;
-+
-+
-+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
-+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
-+
-+#endif
-
-From 43c7c3d42888304f2f5ca39739bf88baa3c1861e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Apr 2021 19:30:36 +0100
-Subject: [PATCH 013/186] Add no_cvt_hw option to ffmpeg
-
----
- fftools/ffmpeg.c     | 6 ++++--
- fftools/ffmpeg.h     | 2 ++
- fftools/ffmpeg_opt.c | 3 +++
- 3 files changed, 9 insertions(+), 2 deletions(-)
-
-diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
-index ca5431aeb401..719463016216 100644
---- a/fftools/ffmpeg.c
-+++ b/fftools/ffmpeg.c
-@@ -2008,6 +2008,9 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame, int keep_ref
-         (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data))
-         need_reinit = 1;
- 
-+    if (no_cvt_hw && fg->graph)
-+        need_reinit = 0;
-+
-     if (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)) {
-         if (!ifilter->displaymatrix || memcmp(sd->data, ifilter->displaymatrix, sizeof(int32_t) * 9))
-             need_reinit = 1;
-@@ -2277,8 +2280,7 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_
-         decoded_frame->top_field_first = ist->top_field_first;
- 
-     ist->frames_decoded++;
--
--    if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
-+    if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
-         err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame);
-         if (err < 0)
-             goto fail;
-diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
-index f1412f6446b8..8f478619b305 100644
---- a/fftools/ffmpeg.h
-+++ b/fftools/ffmpeg.h
-@@ -729,6 +729,8 @@ extern enum VideoSyncMethod video_sync_method;
- extern float frame_drop_threshold;
- extern int do_benchmark;
- extern int do_benchmark_all;
-+extern int no_cvt_hw;
-+extern int do_deinterlace;
- extern int do_hex_dump;
- extern int do_pkt_dump;
- extern int copy_ts;
-diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
-index 055275d81394..761db365883b 100644
---- a/fftools/ffmpeg_opt.c
-+++ b/fftools/ffmpeg_opt.c
-@@ -71,6 +71,7 @@ enum VideoSyncMethod video_sync_method = VSYNC_AUTO;
- float frame_drop_threshold = 0;
- int do_benchmark      = 0;
- int do_benchmark_all  = 0;
-+int no_cvt_hw         = 0;
- int do_hex_dump       = 0;
- int do_pkt_dump       = 0;
- int copy_ts           = 0;
-@@ -1427,6 +1428,8 @@ const OptionDef options[] = {
-         "add timings for benchmarking" },
-     { "benchmark_all",  OPT_BOOL | OPT_EXPERT,                       { &do_benchmark_all },
-       "add timings for each task" },
-+    { "no_cvt_hw",      OPT_BOOL | OPT_EXPERT,                       { &no_cvt_hw },
-+      "do not auto-convert hw frames to sw" },
-     { "progress",       HAS_ARG | OPT_EXPERT,                        { .func_arg = opt_progress },
-       "write program-readable progress information", "url" },
-     { "stdin",          OPT_BOOL | OPT_EXPERT,                       { &stdin_interaction },
-
-From 0b46976bcb9c699235dd9a3ff6528c2dfcdba4ec Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 28 Apr 2021 10:16:39 +0100
-Subject: [PATCH 014/186] Add vout_drm
-
----
- configure                |   4 +
- libavdevice/Makefile     |   1 +
- libavdevice/alldevices.c |   1 +
- libavdevice/drm_vout.c   | 638 +++++++++++++++++++++++++++++++++++++++
- 4 files changed, 644 insertions(+)
- create mode 100644 libavdevice/drm_vout.c
-
-diff --git a/configure b/configure
-index c09144673050..fb72aa89a60c 100755
---- a/configure
-+++ b/configure
-@@ -346,6 +346,7 @@ External library support:
-   --enable-libnpp          enable Nvidia Performance Primitives-based code [no]
-   --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
-   --enable-sand            enable sand video formats [rpi]
-+  --enable-vout-drm        enable the vout_drm module - for internal testing only [no]
-   --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
-   --disable-nvenc          disable Nvidia video encoding code [autodetect]
-   --enable-omx             enable OpenMAX IL code [no]
-@@ -1940,6 +1941,7 @@ FEATURE_LIST="
-     small
-     static
-     swscale_alpha
-+    vout_drm
- "
- 
- # this list should be kept in linking order
-@@ -3559,8 +3561,10 @@ sndio_indev_deps="sndio"
- sndio_outdev_deps="sndio"
- v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
- v4l2_indev_suggest="libv4l2"
-+v4l2_outdev_deps="libdrm"
- v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
- v4l2_outdev_suggest="libv4l2"
-+vout_drm_outdev_deps="libdrm vout_drm"
- vfwcap_indev_deps="vfw32 vfwcap_defines"
- xcbgrab_indev_deps="libxcb"
- xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
-diff --git a/libavdevice/Makefile b/libavdevice/Makefile
-index 8a62822b69ec..36aac301861a 100644
---- a/libavdevice/Makefile
-+++ b/libavdevice/Makefile
-@@ -48,6 +48,7 @@ OBJS-$(CONFIG_SNDIO_OUTDEV)              += sndio_enc.o sndio.o
- OBJS-$(CONFIG_V4L2_INDEV)                += v4l2.o v4l2-common.o timefilter.o
- OBJS-$(CONFIG_V4L2_OUTDEV)               += v4l2enc.o v4l2-common.o
- OBJS-$(CONFIG_VFWCAP_INDEV)              += vfwcap.o
-+OBJS-$(CONFIG_VOUT_DRM_OUTDEV)           += drm_vout.o
- OBJS-$(CONFIG_XCBGRAB_INDEV)             += xcbgrab.o
- OBJS-$(CONFIG_XV_OUTDEV)                 += xv.o
- 
-diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
-index 8a90fcb5d782..e2a8669f2712 100644
---- a/libavdevice/alldevices.c
-+++ b/libavdevice/alldevices.c
-@@ -52,6 +52,7 @@ extern const FFOutputFormat ff_sndio_muxer;
- extern const AVInputFormat  ff_v4l2_demuxer;
- extern const FFOutputFormat ff_v4l2_muxer;
- extern const AVInputFormat  ff_vfwcap_demuxer;
-+extern const FFOutputFormat ff_vout_drm_muxer;
- extern const AVInputFormat  ff_xcbgrab_demuxer;
- extern const FFOutputFormat ff_xv_muxer;
- 
-diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c
-new file mode 100644
-index 000000000000..cfb33ce7c319
---- /dev/null
-+++ b/libavdevice/drm_vout.c
-@@ -0,0 +1,638 @@
-+/*
-+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+
-+// *** This module is a work in progress and its utility is strictly
-+//     limited to testing.
-+
-+#include "libavutil/opt.h"
-+#include "libavutil/pixdesc.h"
-+#include "libavutil/hwcontext_drm.h"
-+#include "libavformat/mux.h"
-+#include "avdevice.h"
-+
-+#include "pthread.h"
-+#include <semaphore.h>
-+#include <unistd.h>
-+
-+#include <xf86drm.h>
-+#include <xf86drmMode.h>
-+
-+#define TRACE_ALL 0
-+
-+#define DRM_MODULE "vc4"
-+
-+#define ERRSTR strerror(errno)
-+
-+struct drm_setup {
-+   int conId;
-+   uint32_t crtcId;
-+   int crtcIdx;
-+   uint32_t planeId;
-+   unsigned int out_fourcc;
-+   struct {
-+       int x, y, width, height;
-+   } compose;
-+};
-+
-+typedef struct drm_aux_s {
-+    unsigned int fb_handle;
-+    uint32_t bo_handles[AV_DRM_MAX_PLANES];
-+    AVFrame * frame;
-+} drm_aux_t;
-+
-+// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS
-+// we get initial flicker probably due to dodgy drm timing
-+#define AUX_SIZE 3
-+typedef struct drm_display_env_s
-+{
-+    AVClass *class;
-+
-+    int drm_fd;
-+    uint32_t con_id;
-+    struct drm_setup setup;
-+    enum AVPixelFormat avfmt;
-+    int show_all;
-+
-+    unsigned int ano;
-+    drm_aux_t aux[AUX_SIZE];
-+
-+    pthread_t q_thread;
-+    sem_t q_sem_in;
-+    sem_t q_sem_out;
-+    int q_terminate;
-+    AVFrame * q_next;
-+
-+} drm_display_env_t;
-+
-+
-+static int drm_vout_write_trailer(AVFormatContext *s)
-+{
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
-+#endif
-+
-+    return 0;
-+}
-+
-+static int drm_vout_write_header(AVFormatContext *s)
-+{
-+    const AVCodecParameters * const par = s->streams[0]->codecpar;
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
-+#endif
-+    if (   s->nb_streams > 1
-+        || par->codec_type != AVMEDIA_TYPE_VIDEO
-+        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
-+        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
-+        return AVERROR(EINVAL);
-+    }
-+
-+    return 0;
-+}
-+
-+static int find_plane(struct AVFormatContext * const avctx,
-+                      const int drmfd, const int crtcidx, const uint32_t format,
-+                      uint32_t * const pplane_id)
-+{
-+   drmModePlaneResPtr planes;
-+   drmModePlanePtr plane;
-+   unsigned int i;
-+   unsigned int j;
-+   int ret = 0;
-+
-+   planes = drmModeGetPlaneResources(drmfd);
-+   if (!planes)
-+   {
-+       av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR);
-+       return -1;
-+   }
-+
-+   for (i = 0; i < planes->count_planes; ++i) {
-+      plane = drmModeGetPlane(drmfd, planes->planes[i]);
-+      if (!planes)
-+      {
-+          av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR);
-+          break;
-+      }
-+
-+      if (!(plane->possible_crtcs & (1 << crtcidx))) {
-+         drmModeFreePlane(plane);
-+         continue;
-+      }
-+
-+      for (j = 0; j < plane->count_formats; ++j) {
-+         if (plane->formats[j] == format)
-+            break;
-+      }
-+
-+      if (j == plane->count_formats) {
-+         drmModeFreePlane(plane);
-+         continue;
-+      }
-+
-+      *pplane_id = plane->plane_id;
-+      drmModeFreePlane(plane);
-+      break;
-+   }
-+
-+   if (i == planes->count_planes)
-+      ret = -1;
-+
-+   drmModeFreePlaneResources(planes);
-+   return ret;
-+}
-+
-+static void da_uninit(drm_display_env_t * const de, drm_aux_t * da)
-+{
-+    if (da->fb_handle != 0) {
-+        drmModeRmFB(de->drm_fd, da->fb_handle);
-+        da->fb_handle = 0;
-+    }
-+
-+    for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) {
-+        if (da->bo_handles[i]) {
-+            struct drm_gem_close gem_close = {.handle = da->bo_handles[i]};
-+            drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
-+            da->bo_handles[i] = 0;
-+        }
-+    }
-+    av_frame_free(&da->frame);
-+}
-+
-+static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame)
-+{
-+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
-+    drm_aux_t * da = de->aux + de->ano;
-+    const uint32_t format = desc->layers[0].format;
-+    int ret = 0;
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd);
-+#endif
-+
-+    if (de->setup.out_fourcc != format) {
-+        if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) {
-+            av_frame_free(&frame);
-+            av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format);
-+            return -1;
-+        }
-+        de->setup.out_fourcc = format;
-+    }
-+
-+    {
-+        drmVBlank vbl = {
-+            .request = {
-+                .type = DRM_VBLANK_RELATIVE,
-+                .sequence = 0
-+            }
-+        };
-+
-+        while (drmWaitVBlank(de->drm_fd, &vbl)) {
-+            if (errno != EINTR) {
-+//                av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR);
-+                break;
-+            }
-+        }
-+    }
-+
-+    da_uninit(de, da);
-+
-+    {
-+        uint32_t pitches[4] = {0};
-+        uint32_t offsets[4] = {0};
-+        uint64_t modifiers[4] = {0};
-+        uint32_t bo_handles[4] = {0};
-+        int i, j, n;
-+
-+        da->frame = frame;
-+
-+        for (i = 0; i < desc->nb_objects; ++i) {
-+            if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) {
-+                av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR);
-+                return -1;
-+            }
-+        }
-+
-+        n = 0;
-+        for (i = 0; i < desc->nb_layers; ++i) {
-+            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
-+                const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
-+                const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
-+                pitches[n] = p->pitch;
-+                offsets[n] = p->offset;
-+                modifiers[n] = obj->format_modifier;
-+                bo_handles[n] = da->bo_handles[p->object_index];
-+                ++n;
-+            }
-+        }
-+
-+#if 1 && TRACE_ALL
-+        av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
-+               " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
-+               av_frame_cropped_width(frame),
-+               av_frame_cropped_height(frame),
-+               desc->layers[0].format,
-+               bo_handles[0],
-+               bo_handles[1],
-+               bo_handles[2],
-+               bo_handles[3],
-+               pitches[0],
-+               pitches[1],
-+               pitches[2],
-+               pitches[3],
-+               offsets[0],
-+               offsets[1],
-+               offsets[2],
-+               offsets[3],
-+               (long long)modifiers[0],
-+               (long long)modifiers[1],
-+               (long long)modifiers[2],
-+               (long long)modifiers[3]
-+               );
-+#endif
-+
-+        if (drmModeAddFB2WithModifiers(de->drm_fd,
-+                                         av_frame_cropped_width(frame),
-+                                         av_frame_cropped_height(frame),
-+                                         desc->layers[0].format, bo_handles,
-+                                         pitches, offsets, modifiers,
-+                                         &da->fb_handle, DRM_MODE_FB_MODIFIERS /** 0 if no mods */) != 0) {
-+            av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR);
-+            return -1;
-+        }
-+    }
-+
-+    ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId,
-+                              da->fb_handle, 0,
-+                de->setup.compose.x, de->setup.compose.y,
-+                de->setup.compose.width,
-+                de->setup.compose.height,
-+                0, 0,
-+                av_frame_cropped_width(frame) << 16,
-+                av_frame_cropped_height(frame) << 16);
-+
-+    if (ret != 0) {
-+        av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR);
-+    }
-+
-+    de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1;
-+
-+    return ret;
-+}
-+
-+static int do_sem_wait(sem_t * const sem, const int nowait)
-+{
-+    while (nowait ? sem_trywait(sem) : sem_wait(sem)) {
-+        if (errno != EINTR)
-+            return -errno;
-+    }
-+    return 0;
-+}
-+
-+static void * display_thread(void * v)
-+{
-+    AVFormatContext * const s = v;
-+    drm_display_env_t * const de = s->priv_data;
-+    int i;
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
-+#endif
-+
-+    sem_post(&de->q_sem_out);
-+
-+    for (;;) {
-+        AVFrame * frame;
-+
-+        do_sem_wait(&de->q_sem_in, 0);
-+
-+        if (de->q_terminate)
-+            break;
-+
-+        frame = de->q_next;
-+        de->q_next = NULL;
-+        sem_post(&de->q_sem_out);
-+
-+        do_display(s, de, frame);
-+    }
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
-+#endif
-+
-+    for (i = 0; i != AUX_SIZE; ++i)
-+        da_uninit(de, de->aux + i);
-+
-+    av_frame_free(&de->q_next);
-+
-+    return NULL;
-+}
-+
-+static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
-+{
-+    const AVFrame * const src_frame = (AVFrame *)pkt->data;
-+    AVFrame * frame;
-+    drm_display_env_t * const de = s->priv_data;
-+    int ret;
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
-+#endif
-+
-+    if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) {
-+        av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts);
-+        return 0;
-+    }
-+
-+    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
-+        frame = av_frame_alloc();
-+        av_frame_ref(frame, src_frame);
-+    }
-+    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
-+        frame = av_frame_alloc();
-+        frame->format = AV_PIX_FMT_DRM_PRIME;
-+        if (av_hwframe_map(frame, src_frame, 0) != 0)
-+        {
-+            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
-+            av_frame_free(&frame);
-+            return AVERROR(EINVAL);
-+        }
-+    }
-+    else {
-+        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
-+        return AVERROR(EINVAL);
-+    }
-+
-+    ret = do_sem_wait(&de->q_sem_out, !de->show_all);
-+    if (ret) {
-+        av_frame_free(&frame);
-+    }
-+    else {
-+        de->q_next = frame;
-+        sem_post(&de->q_sem_in);
-+    }
-+
-+    return 0;
-+}
-+
-+static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
-+                          unsigned flags)
-+{
-+    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
-+    return AVERROR_PATCHWELCOME;
-+}
-+
-+static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
-+{
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type);
-+#endif
-+    switch(type) {
-+    case AV_APP_TO_DEV_WINDOW_REPAINT:
-+        return 0;
-+    default:
-+        break;
-+    }
-+    return AVERROR(ENOSYS);
-+}
-+
-+static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId)
-+{
-+   int ret = -1;
-+   int i;
-+   drmModeRes *res = drmModeGetResources(drmfd);
-+   drmModeConnector *c;
-+
-+   if(!res)
-+   {
-+      printf( "drmModeGetResources failed: %s\n", ERRSTR);
-+      return -1;
-+   }
-+
-+   if (res->count_crtcs <= 0)
-+   {
-+      printf( "drm: no crts\n");
-+      goto fail_res;
-+   }
-+
-+   if (!s->conId) {
-+      fprintf(stderr,
-+         "No connector ID specified.  Choosing default from list:\n");
-+
-+      for (i = 0; i < res->count_connectors; i++) {
-+         drmModeConnector *con =
-+            drmModeGetConnector(drmfd, res->connectors[i]);
-+         drmModeEncoder *enc = NULL;
-+         drmModeCrtc *crtc = NULL;
-+
-+         if (con->encoder_id) {
-+            enc = drmModeGetEncoder(drmfd, con->encoder_id);
-+            if (enc->crtc_id) {
-+               crtc = drmModeGetCrtc(drmfd, enc->crtc_id);
-+            }
-+         }
-+
-+         if (!s->conId && crtc) {
-+            s->conId = con->connector_id;
-+            s->crtcId = crtc->crtc_id;
-+         }
-+
-+         av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n",
-+                con->connector_id,
-+                crtc ? crtc->crtc_id : 0,
-+                con->connector_type,
-+                crtc ? crtc->width : 0,
-+                crtc ? crtc->height : 0,
-+                (s->conId == (int)con->connector_id ?
-+            " (chosen)" : ""));
-+      }
-+
-+      if (!s->conId) {
-+         av_log(avctx, AV_LOG_ERROR,
-+            "No suitable enabled connector found.\n");
-+         return -1;;
-+      }
-+   }
-+
-+   s->crtcIdx = -1;
-+
-+   for (i = 0; i < res->count_crtcs; ++i) {
-+      if (s->crtcId == res->crtcs[i]) {
-+         s->crtcIdx = i;
-+         break;
-+      }
-+   }
-+
-+   if (s->crtcIdx == -1)
-+   {
-+       av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId);
-+       goto fail_res;
-+   }
-+
-+   if (res->count_connectors <= 0)
-+   {
-+       av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n");
-+       goto fail_res;
-+   }
-+
-+   c = drmModeGetConnector(drmfd, s->conId);
-+   if (!c)
-+   {
-+       av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR);
-+       goto fail_res;
-+   }
-+
-+   if (!c->count_modes)
-+   {
-+       av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n");
-+       goto fail_conn;
-+   }
-+
-+   {
-+      drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId);
-+      s->compose.x = crtc->x;
-+      s->compose.y = crtc->y;
-+      s->compose.width = crtc->width;
-+      s->compose.height = crtc->height;
-+      drmModeFreeCrtc(crtc);
-+   }
-+
-+   if (pConId)
-+      *pConId = c->connector_id;
-+   ret = 0;
-+
-+fail_conn:
-+   drmModeFreeConnector(c);
-+
-+fail_res:
-+   drmModeFreeResources(res);
-+
-+   return ret;
-+}
-+
-+// deinit is called if init fails so no need to clean up explicity here
-+static int drm_vout_init(struct AVFormatContext * s)
-+{
-+    drm_display_env_t * const de = s->priv_data;
-+    int rv;
-+    const char * drm_module = DRM_MODULE;
-+
-+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
-+
-+    de->drm_fd = -1;
-+    de->con_id = 0;
-+    de->setup = (struct drm_setup){0};
-+    de->q_terminate = 0;
-+
-+    if ((de->drm_fd = drmOpen(drm_module, NULL)) < 0)
-+    {
-+        rv = AVERROR(errno);
-+        av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", drm_module, av_err2str(rv));
-+        return rv;
-+    }
-+
-+    if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0)
-+    {
-+        av_log(s, AV_LOG_ERROR, "failed to find valid mode\n");
-+        rv = AVERROR(EINVAL);
-+        goto fail_close;
-+    }
-+
-+    sem_init(&de->q_sem_in, 0, 0);
-+    sem_init(&de->q_sem_out, 0, 0);
-+    if (pthread_create(&de->q_thread, NULL, display_thread, s)) {
-+        rv = AVERROR(errno);
-+        av_log(s, AV_LOG_ERROR, "Failed to creatye display thread: %s\n", av_err2str(rv));
-+        goto fail_close;
-+    }
-+
-+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
-+
-+    return 0;
-+
-+fail_close:
-+    close(de->drm_fd);
-+    de->drm_fd = -1;
-+    av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__);
-+
-+    return rv;
-+}
-+
-+static void drm_vout_deinit(struct AVFormatContext * s)
-+{
-+    drm_display_env_t * const de = s->priv_data;
-+
-+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
-+
-+    de->q_terminate = 1;
-+    sem_post(&de->q_sem_in);
-+    pthread_join(de->q_thread, NULL);
-+    sem_destroy(&de->q_sem_in);
-+    sem_destroy(&de->q_sem_out);
-+
-+    for (unsigned int i = 0; i != AUX_SIZE; ++i)
-+        da_uninit(de, de->aux + i);
-+
-+    av_frame_free(&de->q_next);
-+
-+    if (de->drm_fd >= 0) {
-+        close(de->drm_fd);
-+        de->drm_fd = -1;
-+    }
-+
-+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
-+}
-+
-+
-+#define OFFSET(x) offsetof(drm_display_env_t, x)
-+static const AVOption options[] = {
-+    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
-+    { NULL }
-+};
-+
-+static const AVClass drm_vout_class = {
-+    .class_name = "drm vid outdev",
-+    .item_name  = av_default_item_name,
-+    .option     = options,
-+    .version    = LIBAVUTIL_VERSION_INT,
-+    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
-+};
-+
-+FFOutputFormat ff_vout_drm_muxer = {
-+    .p = {
-+        .name           = "vout_drm",
-+        .long_name      = NULL_IF_CONFIG_SMALL("Drm video output device"),
-+        .audio_codec    = AV_CODEC_ID_NONE,
-+        .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
-+        .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
-+        .priv_class     = &drm_vout_class,
-+    },
-+    .priv_data_size = sizeof(drm_display_env_t),
-+    .write_header   = drm_vout_write_header,
-+    .write_packet   = drm_vout_write_packet,
-+    .write_uncoded_frame = drm_vout_write_frame,
-+    .write_trailer  = drm_vout_write_trailer,
-+    .control_message = drm_vout_control_message,
-+    .init           = drm_vout_init,
-+    .deinit         = drm_vout_deinit,
-+};
-+
-
-From bfdfdd9ad94d8a9773c9069b930725ad44e9ddb3 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 28 Apr 2021 11:34:18 +0100
-Subject: [PATCH 015/186] Add vout_egl
-
----
- configure                |   6 +
- libavdevice/Makefile     |   1 +
- libavdevice/alldevices.c |   1 +
- libavdevice/egl_vout.c   | 811 +++++++++++++++++++++++++++++++++++++++
- 4 files changed, 819 insertions(+)
- create mode 100644 libavdevice/egl_vout.c
-
-diff --git a/configure b/configure
-index fb72aa89a60c..a4ffd8797690 100755
---- a/configure
-+++ b/configure
-@@ -347,6 +347,7 @@ External library support:
-   --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
-   --enable-sand            enable sand video formats [rpi]
-   --enable-vout-drm        enable the vout_drm module - for internal testing only [no]
-+  --enable-vout-egl        enable the vout_egl module - for internal testing only [no]
-   --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
-   --disable-nvenc          disable Nvidia video encoding code [autodetect]
-   --enable-omx             enable OpenMAX IL code [no]
-@@ -1818,6 +1819,7 @@ EXTERNAL_LIBRARY_LIST="
-     libdav1d
-     libdc1394
-     libdrm
-+    epoxy
-     libflite
-     libfontconfig
-     libfreetype
-@@ -1942,6 +1944,7 @@ FEATURE_LIST="
-     static
-     swscale_alpha
-     vout_drm
-+    vout_egl
- "
- 
- # this list should be kept in linking order
-@@ -3565,6 +3568,8 @@ v4l2_outdev_deps="libdrm"
- v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
- v4l2_outdev_suggest="libv4l2"
- vout_drm_outdev_deps="libdrm vout_drm"
-+vout_egl_outdev_deps="xlib"
-+vout_egl_outdev_select="epoxy"
- vfwcap_indev_deps="vfw32 vfwcap_defines"
- xcbgrab_indev_deps="libxcb"
- xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
-@@ -6596,6 +6601,7 @@ enabled libdav1d          && require_pkg_config libdav1d "dav1d >= 0.5.0" "dav1d
- enabled libdavs2          && require_pkg_config libdavs2 "davs2 >= 1.6.0" davs2.h davs2_decoder_open
- enabled libdc1394         && require_pkg_config libdc1394 libdc1394-2 dc1394/dc1394.h dc1394_new
- enabled libdrm            && require_pkg_config libdrm libdrm xf86drm.h drmGetVersion
-+enabled epoxy             && require_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version
- enabled libfdk_aac        && { check_pkg_config libfdk_aac fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
-                                { require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&
-                                  warn "using libfdk without pkg-config"; } }
-diff --git a/libavdevice/Makefile b/libavdevice/Makefile
-index 36aac301861a..0989cb895f9e 100644
---- a/libavdevice/Makefile
-+++ b/libavdevice/Makefile
-@@ -49,6 +49,7 @@ OBJS-$(CONFIG_V4L2_INDEV)                += v4l2.o v4l2-common.o timefilter.o
- OBJS-$(CONFIG_V4L2_OUTDEV)               += v4l2enc.o v4l2-common.o
- OBJS-$(CONFIG_VFWCAP_INDEV)              += vfwcap.o
- OBJS-$(CONFIG_VOUT_DRM_OUTDEV)           += drm_vout.o
-+OBJS-$(CONFIG_VOUT_EGL_OUTDEV)           += egl_vout.o
- OBJS-$(CONFIG_XCBGRAB_INDEV)             += xcbgrab.o
- OBJS-$(CONFIG_XV_OUTDEV)                 += xv.o
- 
-diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
-index e2a8669f2712..ffb410b92da8 100644
---- a/libavdevice/alldevices.c
-+++ b/libavdevice/alldevices.c
-@@ -53,6 +53,7 @@ extern const AVInputFormat  ff_v4l2_demuxer;
- extern const FFOutputFormat ff_v4l2_muxer;
- extern const AVInputFormat  ff_vfwcap_demuxer;
- extern const FFOutputFormat ff_vout_drm_muxer;
-+extern const FFOutputFormat ff_vout_egl_muxer;
- extern const AVInputFormat  ff_xcbgrab_demuxer;
- extern const FFOutputFormat ff_xv_muxer;
- 
-diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c
-new file mode 100644
-index 000000000000..7b9c610ace28
---- /dev/null
-+++ b/libavdevice/egl_vout.c
-@@ -0,0 +1,811 @@
-+/*
-+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+
-+// *** This module is a work in progress and its utility is strictly
-+//     limited to testing.
-+//     Amongst other issues it doesn't wait for the pic to be displayed before
-+//     returning the buffer so flikering does occur.
-+
-+#include <epoxy/gl.h>
-+#include <epoxy/egl.h>
-+
-+#include "libavutil/opt.h"
-+#include "libavutil/avassert.h"
-+#include "libavutil/pixdesc.h"
-+#include "libavutil/imgutils.h"
-+#include "libavutil/hwcontext_drm.h"
-+#include "libavformat/mux.h"
-+#include "avdevice.h"
-+
-+#include "pthread.h"
-+#include <semaphore.h>
-+#include <stdatomic.h>
-+#include <unistd.h>
-+
-+#include <X11/Xlib.h>
-+#include <X11/Xutil.h>
-+
-+#include "libavutil/rpi_sand_fns.h"
-+
-+#define TRACE_ALL 0
-+
-+struct egl_setup {
-+   int conId;
-+
-+   Display *dpy;
-+   EGLDisplay egl_dpy;
-+   EGLContext ctx;
-+   EGLSurface surf;
-+   Window win;
-+
-+   uint32_t crtcId;
-+   int crtcIdx;
-+   uint32_t planeId;
-+   struct {
-+       int x, y, width, height;
-+   } compose;
-+};
-+
-+typedef struct egl_aux_s {
-+    int fd;
-+    GLuint texture;
-+
-+} egl_aux_t;
-+
-+typedef struct egl_display_env_s
-+{
-+    AVClass *class;
-+
-+    struct egl_setup setup;
-+    enum AVPixelFormat avfmt;
-+
-+    int show_all;
-+    int window_width, window_height;
-+    int window_x, window_y;
-+    int fullscreen;
-+
-+    egl_aux_t aux[32];
-+
-+    pthread_t q_thread;
-+    pthread_mutex_t q_lock;
-+    sem_t display_start_sem;
-+    sem_t q_sem;
-+    int q_terminate;
-+    AVFrame * q_this;
-+    AVFrame * q_next;
-+
-+} egl_display_env_t;
-+
-+
-+/**
-+ * Remove window border/decorations.
-+ */
-+static void
-+no_border( Display *dpy, Window w)
-+{
-+   static const unsigned MWM_HINTS_DECORATIONS = (1 << 1);
-+   static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5;
-+
-+   typedef struct
-+   {
-+      unsigned long       flags;
-+      unsigned long       functions;
-+      unsigned long       decorations;
-+      long                inputMode;
-+      unsigned long       status;
-+   } PropMotifWmHints;
-+
-+   PropMotifWmHints motif_hints;
-+   Atom prop, proptype;
-+   unsigned long flags = 0;
-+
-+   /* setup the property */
-+   motif_hints.flags = MWM_HINTS_DECORATIONS;
-+   motif_hints.decorations = flags;
-+
-+   /* get the atom for the property */
-+   prop = XInternAtom( dpy, "_MOTIF_WM_HINTS", True );
-+   if (!prop) {
-+      /* something went wrong! */
-+      return;
-+   }
-+
-+   /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */
-+   proptype = prop;
-+
-+   XChangeProperty( dpy, w,                         /* display, window */
-+                    prop, proptype,                 /* property, type */
-+                    32,                             /* format: 32-bit datums */
-+                    PropModeReplace,                /* mode */
-+                    (unsigned char *) &motif_hints, /* data */
-+                    PROP_MOTIF_WM_HINTS_ELEMENTS    /* nelements */
-+                  );
-+}
-+
-+
-+/*
-+ * Create an RGB, double-buffered window.
-+ * Return the window and context handles.
-+ */
-+static int
-+make_window(struct AVFormatContext * const s,
-+            egl_display_env_t * const de,
-+            Display *dpy, EGLDisplay egl_dpy, const char *name,
-+            Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet)
-+{
-+   int scrnum = DefaultScreen( dpy );
-+   XSetWindowAttributes attr;
-+   unsigned long mask;
-+   Window root = RootWindow( dpy, scrnum );
-+   Window win;
-+   EGLContext ctx;
-+   const int fullscreen = de->fullscreen;
-+   EGLConfig config;
-+   int x = de->window_x;
-+   int y = de->window_y;
-+   int width = de->window_width ? de->window_width : 1280;
-+   int height = de->window_height ? de->window_height : 720;
-+
-+
-+   if (fullscreen) {
-+      int scrnum = DefaultScreen(dpy);
-+
-+      x = 0; y = 0;
-+      width = DisplayWidth(dpy, scrnum);
-+      height = DisplayHeight(dpy, scrnum);
-+   }
-+
-+   {
-+      EGLint num_configs;
-+      static const EGLint attribs[] = {
-+         EGL_RED_SIZE, 1,
-+         EGL_GREEN_SIZE, 1,
-+         EGL_BLUE_SIZE, 1,
-+         EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
-+         EGL_NONE
-+      };
-+
-+      if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) {
-+         av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n");
-+         return -1;
-+      }
-+   }
-+
-+   {
-+      EGLint vid;
-+      if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) {
-+         av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n");
-+         return -1;
-+      }
-+
-+      {
-+         XVisualInfo visTemplate = {
-+            .visualid = vid,
-+         };
-+         int num_visuals;
-+         XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask,
-+                                               &visTemplate, &num_visuals);
-+
-+         /* window attributes */
-+         attr.background_pixel = 0;
-+         attr.border_pixel = 0;
-+         attr.colormap = XCreateColormap( dpy, root, visinfo->visual, AllocNone);
-+         attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask;
-+         /* XXX this is a bad way to get a borderless window! */
-+         mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask;
-+
-+         win = XCreateWindow( dpy, root, x, y, width, height,
-+                              0, visinfo->depth, InputOutput,
-+                              visinfo->visual, mask, &attr );
-+         XFree(visinfo);
-+      }
-+   }
-+
-+   if (fullscreen)
-+      no_border(dpy, win);
-+
-+   /* set hints and properties */
-+   {
-+      XSizeHints sizehints;
-+      sizehints.x = x;
-+      sizehints.y = y;
-+      sizehints.width  = width;
-+      sizehints.height = height;
-+      sizehints.flags = USSize | USPosition;
-+      XSetNormalHints(dpy, win, &sizehints);
-+      XSetStandardProperties(dpy, win, name, name,
-+                              None, (char **)NULL, 0, &sizehints);
-+   }
-+
-+   eglBindAPI(EGL_OPENGL_ES_API);
-+
-+   {
-+      static const EGLint ctx_attribs[] = {
-+         EGL_CONTEXT_CLIENT_VERSION, 2,
-+         EGL_NONE
-+      };
-+      ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs );
-+      if (!ctx) {
-+         av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
-+         return -1;
-+      }
-+   }
-+
-+
-+   XMapWindow(dpy, win);
-+
-+   {
-+      EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL);
-+      if (!surf) {
-+         av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n");
-+         return -1;
-+      }
-+
-+      if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) {
-+         av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
-+         return -1;
-+      }
-+
-+      *winRet = win;
-+      *ctxRet = ctx;
-+      *surfRet = surf;
-+   }
-+
-+   return 0;
-+}
-+
-+static GLint
-+compile_shader(struct AVFormatContext * const avctx, GLenum target, const char *source)
-+{
-+   GLuint s = glCreateShader(target);
-+
-+   if (s == 0) {
-+      av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n");
-+      return 0;
-+   }
-+
-+   glShaderSource(s, 1, (const GLchar **) &source, NULL);
-+   glCompileShader(s);
-+
-+   {
-+      GLint ok;
-+      glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
-+
-+      if (!ok) {
-+         GLchar *info;
-+         GLint size;
-+
-+         glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size);
-+         info = malloc(size);
-+
-+         glGetShaderInfoLog(s, size, NULL, info);
-+         av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source);
-+
-+         return 0;
-+      }
-+   }
-+
-+   return s;
-+}
-+
-+static GLuint link_program(struct AVFormatContext * const s, GLint vs, GLint fs)
-+{
-+   GLuint prog = glCreateProgram();
-+
-+   if (prog == 0) {
-+      av_log(s, AV_LOG_ERROR, "Failed to create program\n");
-+      return 0;
-+   }
-+
-+   glAttachShader(prog, vs);
-+   glAttachShader(prog, fs);
-+   glLinkProgram(prog);
-+
-+   {
-+      GLint ok;
-+      glGetProgramiv(prog, GL_LINK_STATUS, &ok);
-+      if (!ok) {
-+         /* Some drivers return a size of 1 for an empty log.  This is the size
-+          * of a log that contains only a terminating NUL character.
-+          */
-+         GLint size;
-+         GLchar *info = NULL;
-+         glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size);
-+         if (size > 1) {
-+            info = malloc(size);
-+            glGetProgramInfoLog(prog, size, NULL, info);
-+         }
-+
-+         av_log(s, AV_LOG_ERROR, "Failed to link: %s\n",
-+                 (info != NULL) ? info : "<empty log>");
-+         return 0;
-+      }
-+   }
-+
-+   return prog;
-+}
-+
-+static int
-+gl_setup(struct AVFormatContext * const s)
-+{
-+   const char *vs =
-+      "attribute vec4 pos;\n"
-+      "varying vec2 texcoord;\n"
-+      "\n"
-+      "void main() {\n"
-+      "  gl_Position = pos;\n"
-+      "  texcoord.x = (pos.x + 1.0) / 2.0;\n"
-+      "  texcoord.y = (-pos.y + 1.0) / 2.0;\n"
-+      "}\n";
-+   const char *fs =
-+      "#extension GL_OES_EGL_image_external : enable\n"
-+      "precision mediump float;\n"
-+      "uniform samplerExternalOES s;\n"
-+      "varying vec2 texcoord;\n"
-+      "void main() {\n"
-+      "  gl_FragColor = texture2D(s, texcoord);\n"
-+      "}\n";
-+
-+   GLuint vs_s;
-+   GLuint fs_s;
-+   GLuint prog;
-+
-+   if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) ||
-+       !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) ||
-+       !(prog = link_program(s, vs_s, fs_s)))
-+      return -1;
-+
-+   glUseProgram(prog);
-+
-+   {
-+      static const float verts[] = {
-+         -1, -1,
-+         1, -1,
-+         1, 1,
-+         -1, 1,
-+      };
-+      glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts);
-+   }
-+
-+   glEnableVertexAttribArray(0);
-+   return 0;
-+}
-+
-+static int egl_vout_write_trailer(AVFormatContext *s)
-+{
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, "%s\n", __func__);
-+#endif
-+
-+    return 0;
-+}
-+
-+static int egl_vout_write_header(AVFormatContext *s)
-+{
-+    const AVCodecParameters * const par = s->streams[0]->codecpar;
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, "%s\n", __func__);
-+#endif
-+    if (   s->nb_streams > 1
-+        || par->codec_type != AVMEDIA_TYPE_VIDEO
-+        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
-+        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
-+        return AVERROR(EINVAL);
-+    }
-+
-+    return 0;
-+}
-+
-+
-+static int do_display(AVFormatContext * const s, egl_display_env_t * const de, AVFrame * const frame)
-+{
-+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
-+    egl_aux_t * da = NULL;
-+    unsigned int i;
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
-+#endif
-+
-+    for (i = 0; i != 32; ++i) {
-+        if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) {
-+            da = de->aux + i;
-+            break;
-+        }
-+    }
-+
-+    if (da == NULL) {
-+        av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__);
-+        return AVERROR(EINVAL);
-+    }
-+
-+    if (da->texture == 0) {
-+        EGLint attribs[50];
-+        EGLint * a = attribs;
-+        int i, j;
-+        static const EGLint anames[] = {
-+           EGL_DMA_BUF_PLANE0_FD_EXT,
-+           EGL_DMA_BUF_PLANE0_OFFSET_EXT,
-+           EGL_DMA_BUF_PLANE0_PITCH_EXT,
-+           EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
-+           EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
-+           EGL_DMA_BUF_PLANE1_FD_EXT,
-+           EGL_DMA_BUF_PLANE1_OFFSET_EXT,
-+           EGL_DMA_BUF_PLANE1_PITCH_EXT,
-+           EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT,
-+           EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
-+           EGL_DMA_BUF_PLANE2_FD_EXT,
-+           EGL_DMA_BUF_PLANE2_OFFSET_EXT,
-+           EGL_DMA_BUF_PLANE2_PITCH_EXT,
-+           EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT,
-+           EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT,
-+        };
-+        const EGLint * b = anames;
-+
-+        *a++ = EGL_WIDTH;
-+        *a++ = av_frame_cropped_width(frame);
-+        *a++ = EGL_HEIGHT;
-+        *a++ = av_frame_cropped_height(frame);
-+        *a++ = EGL_LINUX_DRM_FOURCC_EXT;
-+        *a++ = desc->layers[0].format;
-+
-+        for (i = 0; i < desc->nb_layers; ++i) {
-+            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
-+                const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
-+                const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
-+                *a++ = *b++;
-+                *a++ = obj->fd;
-+                *a++ = *b++;
-+                *a++ = p->offset;
-+                *a++ = *b++;
-+                *a++ = p->pitch;
-+                if (obj->format_modifier == 0) {
-+                   b += 2;
-+                }
-+                else {
-+                   *a++ = *b++;
-+                   *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF);
-+                   *a++ = *b++;
-+                   *a++ = (EGLint)(obj->format_modifier >> 32);
-+                }
-+            }
-+        }
-+
-+        *a = EGL_NONE;
-+
-+#if TRACE_ALL
-+        for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) {
-+           av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]);
-+        }
-+#endif
-+        {
-+           const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy,
-+                                              EGL_NO_CONTEXT,
-+                                              EGL_LINUX_DMA_BUF_EXT,
-+                                              NULL, attribs);
-+           if (!image) {
-+              av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd);
-+              return -1;
-+           }
-+
-+           glGenTextures(1, &da->texture);
-+           glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
-+           glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-+           glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-+           glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
-+
-+           eglDestroyImageKHR(de->setup.egl_dpy, image);
-+        }
-+
-+        da->fd = desc->objects[0].fd;
-+
-+#if 0
-+        av_log(s, AV_LOG_INFO, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
-+               " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
-+               av_frame_cropped_width(frame),
-+               av_frame_cropped_height(frame),
-+               desc->layers[0].format,
-+               bo_plane_handles[0],
-+               bo_plane_handles[1],
-+               bo_plane_handles[2],
-+               bo_plane_handles[3],
-+               pitches[0],
-+               pitches[1],
-+               pitches[2],
-+               pitches[3],
-+               offsets[0],
-+               offsets[1],
-+               offsets[2],
-+               offsets[3],
-+               (long long)modifiers[0],
-+               (long long)modifiers[1],
-+               (long long)modifiers[2],
-+               (long long)modifiers[3]
-+               );
-+#endif
-+    }
-+
-+    glClearColor(0.5, 0.5, 0.5, 0.5);
-+    glClear(GL_COLOR_BUFFER_BIT);
-+
-+    glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
-+    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
-+    eglSwapBuffers(de->setup.egl_dpy, de->setup.surf);
-+
-+    glDeleteTextures(1, &da->texture);
-+    da->texture = 0;
-+    da->fd = -1;
-+
-+    return 0;
-+}
-+
-+static void * display_thread(void * v)
-+{
-+    AVFormatContext * const s = v;
-+    egl_display_env_t * const de = s->priv_data;
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
-+#endif
-+    {
-+       EGLint egl_major, egl_minor;
-+
-+       de->setup.dpy = XOpenDisplay(NULL);
-+       if (!de->setup.dpy) {
-+          av_log(s, AV_LOG_ERROR, "Couldn't open X display\n");
-+          goto fail;
-+       }
-+
-+       de->setup.egl_dpy = eglGetDisplay(de->setup.dpy);
-+       if (!de->setup.egl_dpy) {
-+          av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n");
-+          goto fail;
-+       }
-+
-+       if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) {
-+           av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n");
-+           goto fail;
-+       }
-+
-+       av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor);
-+
-+       if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) {
-+          av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n");
-+          goto fail;
-+       }
-+    }
-+
-+    if (!de->window_width || !de->window_height) {
-+       de->window_width = 1280;
-+       de->window_height = 720;
-+    }
-+    if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout",
-+                    &de->setup.win, &de->setup.ctx, &de->setup.surf)) {
-+       av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__);
-+       goto fail;
-+    }
-+
-+    if (gl_setup(s)) {
-+       av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__);
-+       goto fail;
-+    }
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__);
-+#endif
-+    sem_post(&de->display_start_sem);
-+
-+    for (;;) {
-+        AVFrame * frame;
-+
-+        while (sem_wait(&de->q_sem) != 0) {
-+            av_assert0(errno == EINTR);
-+        }
-+
-+        if (de->q_terminate)
-+            break;
-+
-+        pthread_mutex_lock(&de->q_lock);
-+        frame = de->q_next;
-+        de->q_next = NULL;
-+        pthread_mutex_unlock(&de->q_lock);
-+
-+        do_display(s, de, frame);
-+
-+        av_frame_free(&de->q_this);
-+        de->q_this = frame;
-+    }
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, ">>> %s\n", __func__);
-+#endif
-+
-+    return NULL;
-+
-+fail:
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__);
-+#endif
-+    de->q_terminate = 1;
-+    sem_post(&de->display_start_sem);
-+
-+    return NULL;
-+}
-+
-+static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
-+{
-+    const AVFrame * const src_frame = (AVFrame *)pkt->data;
-+    AVFrame * frame;
-+    egl_display_env_t * const de = s->priv_data;
-+
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, "%s\n", __func__);
-+#endif
-+
-+    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
-+        frame = av_frame_alloc();
-+        av_frame_ref(frame, src_frame);
-+    }
-+    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
-+        frame = av_frame_alloc();
-+        frame->format = AV_PIX_FMT_DRM_PRIME;
-+        if (av_hwframe_map(frame, src_frame, 0) != 0)
-+        {
-+            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
-+            av_frame_free(&frame);
-+            return AVERROR(EINVAL);
-+        }
-+    }
-+    else {
-+        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
-+        return AVERROR(EINVAL);
-+    }
-+
-+    // Really hacky sync
-+    while (de->show_all && de->q_next) {
-+       usleep(3000);
-+    }
-+
-+    pthread_mutex_lock(&de->q_lock);
-+    {
-+        AVFrame * const t = de->q_next;
-+        de->q_next = frame;
-+        frame = t;
-+    }
-+    pthread_mutex_unlock(&de->q_lock);
-+
-+    if (frame == NULL)
-+        sem_post(&de->q_sem);
-+    else
-+        av_frame_free(&frame);
-+
-+    return 0;
-+}
-+
-+static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
-+                          unsigned flags)
-+{
-+    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
-+    return AVERROR_PATCHWELCOME;
-+}
-+
-+static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
-+{
-+#if TRACE_ALL
-+    av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type);
-+#endif
-+    switch(type) {
-+    case AV_APP_TO_DEV_WINDOW_REPAINT:
-+        return 0;
-+    default:
-+        break;
-+    }
-+    return AVERROR(ENOSYS);
-+}
-+
-+// deinit is called if init fails so no need to clean up explicity here
-+static int egl_vout_init(struct AVFormatContext * s)
-+{
-+    egl_display_env_t * const de = s->priv_data;
-+    unsigned int i;
-+
-+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
-+
-+    de->setup = (struct egl_setup){0};
-+
-+    for (i = 0; i != 32; ++i) {
-+        de->aux[i].fd = -1;
-+    }
-+
-+    de->q_terminate = 0;
-+    pthread_mutex_init(&de->q_lock, NULL);
-+    sem_init(&de->q_sem, 0, 0);
-+    sem_init(&de->display_start_sem, 0, 0);
-+    av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0);
-+
-+    sem_wait(&de->display_start_sem);
-+    if (de->q_terminate) {
-+       av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__);
-+       return -1;
-+    }
-+
-+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
-+
-+    return 0;
-+}
-+
-+static void egl_vout_deinit(struct AVFormatContext * s)
-+{
-+    egl_display_env_t * const de = s->priv_data;
-+
-+    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
-+
-+    de->q_terminate = 1;
-+    sem_post(&de->q_sem);
-+    pthread_join(de->q_thread, NULL);
-+    sem_destroy(&de->q_sem);
-+    pthread_mutex_destroy(&de->q_lock);
-+
-+    av_frame_free(&de->q_next);
-+    av_frame_free(&de->q_this);
-+
-+    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
-+}
-+
-+#define OFFSET(x) offsetof(egl_display_env_t, x)
-+static const AVOption options[] = {
-+   { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
-+   { "window_size",  "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
-+   { "window_x",     "set window x offset",    OFFSET(window_x),     AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
-+   { "window_y",     "set window y offset",    OFFSET(window_y),     AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
-+   { "fullscreen",   "set fullscreen display", OFFSET(fullscreen),   AV_OPT_TYPE_BOOL,   {.i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
-+    { NULL }
-+
-+};
-+
-+static const AVClass egl_vout_class = {
-+    .class_name = "egl vid outdev",
-+    .item_name  = av_default_item_name,
-+    .option     = options,
-+    .version    = LIBAVUTIL_VERSION_INT,
-+    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
-+};
-+
-+FFOutputFormat ff_vout_egl_muxer = {
-+    .p = {
-+        .name           = "vout_egl",
-+        .long_name      = NULL_IF_CONFIG_SMALL("Egl video output device"),
-+        .audio_codec    = AV_CODEC_ID_NONE,
-+        .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
-+        .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
-+        .priv_class     = &egl_vout_class,
-+    },
-+    .priv_data_size = sizeof(egl_display_env_t),
-+    .write_header   = egl_vout_write_header,
-+    .write_packet   = egl_vout_write_packet,
-+    .write_uncoded_frame = egl_vout_write_frame,
-+    .write_trailer  = egl_vout_write_trailer,
-+    .control_message = egl_vout_control_message,
-+    .init           = egl_vout_init,
-+    .deinit         = egl_vout_deinit,
-+};
-+
-
-From 55cc6b4be80730d1bbd67c483e97921f39b58965 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 28 Apr 2021 12:51:22 +0100
-Subject: [PATCH 016/186] V4L2 stateful rework
-
----
- libavcodec/Makefile       |   3 +-
- libavcodec/v4l2_buffers.c | 556 +++++++++++++++++++++++++++-----------
- libavcodec/v4l2_buffers.h |  28 +-
- libavcodec/v4l2_context.c | 536 +++++++++++++++++++++++++++---------
- libavcodec/v4l2_context.h |  20 +-
- libavcodec/v4l2_m2m.c     |  20 +-
- libavcodec/v4l2_m2m.h     |  31 +++
- libavcodec/v4l2_m2m_dec.c | 446 ++++++++++++++++++++++++++----
- 8 files changed, 1286 insertions(+), 354 deletions(-)
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index 2d440b56486b..e1aa0ba014ed 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -169,7 +169,8 @@ OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
- OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
- OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
- OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
--OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
-+OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\
-+                                          weak_link.o
- OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\
- 					  v4l2_req_devscan.o weak_link.o
- OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 3f5471067a1a..a003934ca19e 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -21,6 +21,7 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
-+#include <drm_fourcc.h>
- #include <linux/videodev2.h>
- #include <sys/ioctl.h>
- #include <sys/mman.h>
-@@ -29,12 +30,14 @@
- #include <poll.h>
- #include "libavcodec/avcodec.h"
- #include "libavutil/pixdesc.h"
-+#include "libavutil/hwcontext.h"
- #include "v4l2_context.h"
- #include "v4l2_buffers.h"
- #include "v4l2_m2m.h"
-+#include "weak_link.h"
- 
- #define USEC_PER_SEC 1000000
--static AVRational v4l2_timebase = { 1, USEC_PER_SEC };
-+static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
- 
- static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf)
- {
-@@ -51,34 +54,44 @@ static inline AVCodecContext *logger(V4L2Buffer *buf)
- static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf)
- {
-     V4L2m2mContext *s = buf_to_m2mctx(avbuf);
--
--    if (s->avctx->pkt_timebase.num)
--        return s->avctx->pkt_timebase;
--    return s->avctx->time_base;
-+    const AVRational tb = s->avctx->pkt_timebase.num ?
-+        s->avctx->pkt_timebase :
-+        s->avctx->time_base;
-+    return tb.num && tb.den ? tb : v4l2_timebase;
- }
- 
--static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts)
-+static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts, int no_rescale)
- {
--    int64_t v4l2_pts;
--
--    if (pts == AV_NOPTS_VALUE)
--        pts = 0;
--
-     /* convert pts to v4l2 timebase */
--    v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
-+    const int64_t v4l2_pts =
-+        no_rescale ? pts :
-+        pts == AV_NOPTS_VALUE ? 0 :
-+            av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
-     out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
-     out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
- }
- 
--static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf)
-+static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf, int no_rescale)
- {
--    int64_t v4l2_pts;
--
-     /* convert pts back to encoder timebase */
--    v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
-+    const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
-                         avbuf->buf.timestamp.tv_usec;
- 
--    return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
-+    return
-+        no_rescale ? v4l2_pts :
-+        v4l2_pts == 0 ? AV_NOPTS_VALUE :
-+            av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
-+}
-+
-+static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
-+{
-+    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
-+        out->planes[plane].bytesused = bytesused;
-+        out->planes[plane].length = length;
-+    } else {
-+        out->buf.bytesused = bytesused;
-+        out->buf.length = length;
-+    }
- }
- 
- static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
-@@ -209,68 +222,143 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
-     return AVCOL_TRC_UNSPECIFIED;
- }
- 
--static void v4l2_free_buffer(void *opaque, uint8_t *unused)
-+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
- {
--    V4L2Buffer* avbuf = opaque;
--    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
-+    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
-+    AVDRMLayerDescriptor *layer;
- 
--    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
--        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
-+    /* fill the DRM frame descriptor */
-+    drm_desc->nb_objects = avbuf->num_planes;
-+    drm_desc->nb_layers = 1;
- 
--        if (s->reinit) {
--            if (!atomic_load(&s->refcount))
--                sem_post(&s->refsync);
--        } else {
--            if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) {
--                /* no need to queue more buffers to the driver */
--                avbuf->status = V4L2BUF_AVAILABLE;
--            }
--            else if (avbuf->context->streamon)
--                ff_v4l2_buffer_enqueue(avbuf);
--        }
-+    layer = &drm_desc->layers[0];
-+    layer->nb_planes = avbuf->num_planes;
-+
-+    for (int i = 0; i < avbuf->num_planes; i++) {
-+        layer->planes[i].object_index = i;
-+        layer->planes[i].offset = 0;
-+        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
-+    }
-+
-+    switch (avbuf->context->av_pix_fmt) {
-+    case AV_PIX_FMT_YUYV422:
-+
-+        layer->format = DRM_FORMAT_YUYV;
-+        layer->nb_planes = 1;
-+
-+        break;
-+
-+    case AV_PIX_FMT_NV12:
-+    case AV_PIX_FMT_NV21:
-+
-+        layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ?
-+            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
-+
-+        if (avbuf->num_planes > 1)
-+            break;
-+
-+        layer->nb_planes = 2;
-+
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
-+            avbuf->context->format.fmt.pix.height;
-+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
-+        break;
-+
-+    case AV_PIX_FMT_YUV420P:
-+
-+        layer->format = DRM_FORMAT_YUV420;
-+
-+        if (avbuf->num_planes > 1)
-+            break;
-+
-+        layer->nb_planes = 3;
-+
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
-+            avbuf->context->format.fmt.pix.height;
-+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
-+
-+        layer->planes[2].object_index = 0;
-+        layer->planes[2].offset = layer->planes[1].offset +
-+            ((avbuf->plane_info[0].bytesperline *
-+              avbuf->context->format.fmt.pix.height) >> 2);
-+        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
-+        break;
- 
--        av_buffer_unref(&avbuf->context_ref);
-+    default:
-+        drm_desc->nb_layers = 0;
-+        break;
-     }
-+
-+    return (uint8_t *) drm_desc;
- }
- 
--static int v4l2_buf_increase_ref(V4L2Buffer *in)
-+static void v4l2_free_bufref(void *opaque, uint8_t *data)
- {
--    V4L2m2mContext *s = buf_to_m2mctx(in);
-+    AVBufferRef * bufref = (AVBufferRef *)data;
-+    V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data;
-+    struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl);
- 
--    if (in->context_ref)
--        atomic_fetch_add(&in->context_refcount, 1);
--    else {
--        in->context_ref = av_buffer_ref(s->self_ref);
--        if (!in->context_ref)
--            return AVERROR(ENOMEM);
-+    if (ctx != NULL) {
-+        // Buffer still attached to context
-+        V4L2m2mContext *s = buf_to_m2mctx(avbuf);
- 
--        in->context_refcount = 1;
--    }
-+        ff_mutex_lock(&ctx->lock);
- 
--    in->status = V4L2BUF_RET_USER;
--    atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
-+        avbuf->status = V4L2BUF_AVAILABLE;
- 
--    return 0;
-+        if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
-+            /* no need to queue more buffers to the driver */
-+        }
-+        else if (ctx->streamon) {
-+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name);
-+            avbuf->buf.timestamp.tv_sec = 0;
-+            avbuf->buf.timestamp.tv_usec = 0;
-+            ff_v4l2_buffer_enqueue(avbuf);  // will set to IN_DRIVER
-+        }
-+        else {
-+            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name);
-+        }
-+
-+        ff_mutex_unlock(&ctx->lock);
-+    }
-+
-+    ff_weak_link_unlock(avbuf->context_wl);
-+    av_buffer_unref(&bufref);
- }
- 
--static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
-+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
- {
--    int ret;
-+    struct v4l2_exportbuffer expbuf;
-+    int i, ret;
- 
--    if (plane >= in->num_planes)
--        return AVERROR(EINVAL);
-+    for (i = 0; i < avbuf->num_planes; i++) {
-+        memset(&expbuf, 0, sizeof(expbuf));
- 
--    /* even though most encoders return 0 in data_offset encoding vp8 does require this value */
--    *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
--                            in->plane_info[plane].length, v4l2_free_buffer, in, 0);
--    if (!*buf)
--        return AVERROR(ENOMEM);
-+        expbuf.index = avbuf->buf.index;
-+        expbuf.type = avbuf->buf.type;
-+        expbuf.plane = i;
- 
--    ret = v4l2_buf_increase_ref(in);
--    if (ret)
--        av_buffer_unref(buf);
-+        ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_EXPBUF, &expbuf);
-+        if (ret < 0)
-+            return AVERROR(errno);
- 
--    return ret;
-+        if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) {
-+            /* drm frame */
-+            avbuf->drm_frame.objects[i].size = avbuf->buf.m.planes[i].length;
-+            avbuf->drm_frame.objects[i].fd = expbuf.fd;
-+            avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        } else {
-+            /* drm frame */
-+            avbuf->drm_frame.objects[0].size = avbuf->buf.length;
-+            avbuf->drm_frame.objects[0].fd = expbuf.fd;
-+            avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        }
-+    }
-+
-+    return 0;
- }
- 
- static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset)
-@@ -285,30 +373,50 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i
- 
-     memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset));
- 
--    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
--        out->planes[plane].bytesused = bytesused;
--        out->planes[plane].length = length;
--    } else {
--        out->buf.bytesused = bytesused;
--        out->buf.length = length;
--    }
-+    set_buf_length(out, plane, bytesused, length);
- 
-     return 0;
- }
- 
-+static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf)
-+{
-+    AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]);
-+    AVBufferRef * newbuf;
-+
-+    if (!bufref)
-+        return NULL;
-+
-+    newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0);
-+    if (newbuf == NULL)
-+        av_buffer_unref(&bufref);
-+
-+    avbuf->status = V4L2BUF_RET_USER;
-+    return newbuf;
-+}
-+
- static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
- {
--    int i, ret;
-+    int i;
- 
-     frame->format = avbuf->context->av_pix_fmt;
- 
--    for (i = 0; i < avbuf->num_planes; i++) {
--        ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
--        if (ret)
--            return ret;
-+    frame->buf[0] = wrap_avbuf(avbuf);
-+    if (frame->buf[0] == NULL)
-+        return AVERROR(ENOMEM);
-+
-+    if (buf_to_m2mctx(avbuf)->output_drm) {
-+        /* 1. get references to the actual data */
-+        frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
-+        frame->format = AV_PIX_FMT_DRM_PRIME;
-+        frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref);
-+        return 0;
-+    }
-+
- 
-+    /* 1. get references to the actual data */
-+    for (i = 0; i < avbuf->num_planes; i++) {
-+        frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset;
-         frame->linesize[i] = avbuf->plane_info[i].bytesperline;
--        frame->data[i] = frame->buf[i]->data;
-     }
- 
-     /* fixup special cases */
-@@ -337,68 +445,95 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
-     return 0;
- }
- 
-+static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h)
-+{
-+    if (dst_stride == src_stride && w + 32 >= dst_stride) {
-+        memcpy(dst, src, dst_stride * h);
-+    }
-+    else {
-+        while (--h >= 0) {
-+            memcpy(dst, src, w);
-+            dst += dst_stride;
-+            src += src_stride;
-+        }
-+    }
-+}
-+
-+static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes)
-+{
-+    return i != 0  && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
-+}
-+
- static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
- {
--    int i, ret;
--    struct v4l2_format fmt = out->context->format;
--    int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
--                       fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat;
--    int height       = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
--                       fmt.fmt.pix_mp.height : fmt.fmt.pix.height;
--    int is_planar_format = 0;
--
--    switch (pixel_format) {
--    case V4L2_PIX_FMT_YUV420M:
--    case V4L2_PIX_FMT_YVU420M:
--#ifdef V4L2_PIX_FMT_YUV422M
--    case V4L2_PIX_FMT_YUV422M:
--#endif
--#ifdef V4L2_PIX_FMT_YVU422M
--    case V4L2_PIX_FMT_YVU422M:
--#endif
--#ifdef V4L2_PIX_FMT_YUV444M
--    case V4L2_PIX_FMT_YUV444M:
--#endif
--#ifdef V4L2_PIX_FMT_YVU444M
--    case V4L2_PIX_FMT_YVU444M:
--#endif
--    case V4L2_PIX_FMT_NV12M:
--    case V4L2_PIX_FMT_NV21M:
--    case V4L2_PIX_FMT_NV12MT_16X16:
--    case V4L2_PIX_FMT_NV12MT:
--    case V4L2_PIX_FMT_NV16M:
--    case V4L2_PIX_FMT_NV61M:
--        is_planar_format = 1;
--    }
--
--    if (!is_planar_format) {
--        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
--        int planes_nb = 0;
--        int offset = 0;
--
--        for (i = 0; i < desc->nb_components; i++)
--            planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
--
--        for (i = 0; i < planes_nb; i++) {
--            int size, h = height;
--            if (i == 1 || i == 2) {
-+    int i;
-+    int num_planes = 0;
-+    int pel_strides[4] = {0};
-+
-+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
-+
-+    if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) {
-+        av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__);
-+        return -1;
-+    }
-+
-+    for (i = 0; i != desc->nb_components; ++i) {
-+        if (desc->comp[i].plane >= num_planes)
-+            num_planes = desc->comp[i].plane + 1;
-+        pel_strides[desc->comp[i].plane] = desc->comp[i].step;
-+    }
-+
-+    if (out->num_planes > 1) {
-+        if (num_planes != out->num_planes) {
-+            av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes);
-+            return -1;
-+        }
-+        for (i = 0; i != num_planes; ++i) {
-+            int w = frame->width;
-+            int h = frame->height;
-+            if (is_chroma(desc, i, num_planes)) {
-+                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
-                 h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
-             }
--            size = frame->linesize[i] * h;
--            ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset);
--            if (ret)
--                return ret;
--            offset += size;
-+
-+            cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline,
-+                   frame->data[i], frame->linesize[i],
-+                   w * pel_strides[i], h);
-+            set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length);
-         }
--        return 0;
-     }
-+    else
-+    {
-+        unsigned int offset = 0;
-+
-+        for (i = 0; i != num_planes; ++i) {
-+            int w = frame->width;
-+            int h = frame->height;
-+            int dst_stride = out->plane_info[0].bytesperline;
-+            uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset;
-+
-+            if (is_chroma(desc, i, num_planes)) {
-+                // Is chroma
-+                dst_stride >>= desc->log2_chroma_w;
-+                offset += dst_stride * (out->context->height >> desc->log2_chroma_h);
-+                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
-+                h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
-+            }
-+            else {
-+                // Is luma or alpha
-+                offset += dst_stride * out->context->height;
-+            }
-+            if (offset > out->plane_info[0].length) {
-+                av_log(NULL, AV_LOG_ERROR, "%s: Plane total %d > buffer size %d\n", __func__, offset, out->plane_info[0].length);
-+                return -1;
-+            }
- 
--    for (i = 0; i < out->num_planes; i++) {
--        ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0);
--        if (ret)
--            return ret;
-+            cpy_2d(dst, dst_stride,
-+                   frame->data[i], frame->linesize[i],
-+                   w * pel_strides[i], h);
-+        }
-+        set_buf_length(out, 0, offset, out->plane_info[0].length);
-     }
--
-     return 0;
- }
- 
-@@ -410,14 +545,15 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
- 
- int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
- {
--    v4l2_set_pts(out, frame->pts);
-+    v4l2_set_pts(out, frame->pts, 0);
- 
-     return v4l2_buffer_swframe_to_buf(frame, out);
- }
- 
--int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
-+int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_rescale_pts)
- {
-     int ret;
-+    V4L2Context * const ctx = avbuf->context;
- 
-     av_frame_unref(frame);
- 
-@@ -432,13 +568,22 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
-     frame->colorspace = v4l2_get_color_space(avbuf);
-     frame->color_range = v4l2_get_color_range(avbuf);
-     frame->color_trc = v4l2_get_color_trc(avbuf);
--    frame->pts = v4l2_get_pts(avbuf);
-+    frame->pts = v4l2_get_pts(avbuf, no_rescale_pts);
-     frame->pkt_dts = AV_NOPTS_VALUE;
- 
-     /* these values are updated also during re-init in v4l2_process_driver_event */
--    frame->height = avbuf->context->height;
--    frame->width = avbuf->context->width;
--    frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio;
-+    frame->height = ctx->height;
-+    frame->width = ctx->width;
-+    frame->sample_aspect_ratio = ctx->sample_aspect_ratio;
-+
-+    if (ctx->selection.height && ctx->selection.width) {
-+        frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0;
-+        frame->crop_top  = ctx->selection.top < frame->height ? ctx->selection.top  : 0;
-+        frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ?
-+            frame->width - (ctx->selection.left + ctx->selection.width) : 0;
-+        frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ?
-+            frame->height - (ctx->selection.top + ctx->selection.height) : 0;
-+    }
- 
-     /* 3. report errors upstream */
-     if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) {
-@@ -451,15 +596,14 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
- 
- int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
- {
--    int ret;
--
-     av_packet_unref(pkt);
--    ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf);
--    if (ret)
--        return ret;
-+
-+    pkt->buf = wrap_avbuf(avbuf);
-+    if (pkt->buf == NULL)
-+        return AVERROR(ENOMEM);
- 
-     pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
--    pkt->data = pkt->buf->data;
-+    pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
- 
-     if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
-         pkt->flags |= AV_PKT_FLAG_KEY;
-@@ -469,20 +613,27 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
-         pkt->flags |= AV_PKT_FLAG_CORRUPT;
-     }
- 
--    pkt->dts = pkt->pts = v4l2_get_pts(avbuf);
-+    pkt->dts = pkt->pts = v4l2_get_pts(avbuf, 0);
- 
-     return 0;
- }
- 
--int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
-+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-+                                    const void *extdata, size_t extlen, int no_rescale_pts)
- {
-     int ret;
- 
--    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0);
-+    if (extlen) {
-+        ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0);
-+        if (ret)
-+            return ret;
-+    }
-+
-+    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen);
-     if (ret)
-         return ret;
- 
--    v4l2_set_pts(out, pkt->pts);
-+    v4l2_set_pts(out, pkt->pts, no_rescale_pts);
- 
-     if (pkt->flags & AV_PKT_FLAG_KEY)
-         out->flags = V4L2_BUF_FLAG_KEYFRAME;
-@@ -490,15 +641,61 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
-     return 0;
- }
- 
--int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
-+int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
-+{
-+    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
-+}
-+
-+
-+static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
-+{
-+    V4L2Buffer * const avbuf = (V4L2Buffer *)data;
-+    int i;
-+
-+    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) {
-+        struct V4L2Plane_info *p = avbuf->plane_info + i;
-+        if (p->mm_addr != NULL)
-+            munmap(p->mm_addr, p->length);
-+    }
-+
-+    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
-+        if (avbuf->drm_frame.objects[i].fd != -1)
-+            close(avbuf->drm_frame.objects[i].fd);
-+    }
-+
-+    ff_weak_link_unref(&avbuf->context_wl);
-+
-+    av_free(avbuf);
-+}
-+
-+
-+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx)
- {
--    V4L2Context *ctx = avbuf->context;
-     int ret, i;
-+    V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
-+    AVBufferRef * bufref;
-+
-+    *pbufref = NULL;
-+    if (avbuf == NULL)
-+        return AVERROR(ENOMEM);
-+
-+    bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0);
-+    if (bufref == NULL) {
-+        av_free(avbuf);
-+        return AVERROR(ENOMEM);
-+    }
- 
-+    avbuf->context = ctx;
-     avbuf->buf.memory = V4L2_MEMORY_MMAP;
-     avbuf->buf.type = ctx->type;
-     avbuf->buf.index = index;
- 
-+    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
-+        avbuf->drm_frame.objects[i].fd = -1;
-+    }
-+
-+    avbuf->context_wl = ff_weak_link_ref(ctx->wl_master);
-+
-     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-         avbuf->buf.length = VIDEO_MAX_PLANES;
-         avbuf->buf.m.planes = avbuf->planes;
-@@ -506,7 +703,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
- 
-     ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
-     if (ret < 0)
--        return AVERROR(errno);
-+        goto fail;
- 
-     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-         avbuf->num_planes = 0;
-@@ -526,25 +723,33 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
- 
-         if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-             avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
--            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
--                                           PROT_READ | PROT_WRITE, MAP_SHARED,
--                                           buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
-+
-+            if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) ||
-+                !buf_to_m2mctx(avbuf)->output_drm) {
-+                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
-+                                               PROT_READ | PROT_WRITE, MAP_SHARED,
-+                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
-+            }
-         } else {
-             avbuf->plane_info[i].length = avbuf->buf.length;
--            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
--                                          PROT_READ | PROT_WRITE, MAP_SHARED,
--                                          buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
-+
-+            if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) ||
-+                !buf_to_m2mctx(avbuf)->output_drm) {
-+                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
-+                                               PROT_READ | PROT_WRITE, MAP_SHARED,
-+                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
-+            }
-         }
- 
--        if (avbuf->plane_info[i].mm_addr == MAP_FAILED)
--            return AVERROR(ENOMEM);
-+        if (avbuf->plane_info[i].mm_addr == MAP_FAILED) {
-+            avbuf->plane_info[i].mm_addr = NULL;
-+            ret = AVERROR(ENOMEM);
-+            goto fail;
-+        }
-     }
- 
-     avbuf->status = V4L2BUF_AVAILABLE;
- 
--    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
--        return 0;
--
-     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-         avbuf->buf.m.planes = avbuf->planes;
-         avbuf->buf.length   = avbuf->num_planes;
-@@ -554,7 +759,20 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
-         avbuf->buf.length    = avbuf->planes[0].length;
-     }
- 
--    return ff_v4l2_buffer_enqueue(avbuf);
-+    if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-+        if (buf_to_m2mctx(avbuf)->output_drm) {
-+            ret = v4l2_buffer_export_drm(avbuf);
-+            if (ret)
-+                    goto fail;
-+        }
-+    }
-+
-+    *pbufref = bufref;
-+    return 0;
-+
-+fail:
-+    av_buffer_unref(&bufref);
-+    return ret;
- }
- 
- int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
-@@ -563,9 +781,27 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
- 
-     avbuf->buf.flags = avbuf->flags;
- 
-+    if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) {
-+        av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
-+               avbuf->context->name, avbuf->buf.index,
-+               avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
-+               avbuf->context->q_count);
-+    }
-+
-     ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf);
--    if (ret < 0)
--        return AVERROR(errno);
-+    if (ret < 0) {
-+        int err = errno;
-+        av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n",
-+               avbuf->context->name, avbuf->buf.index,
-+               err, strerror(err));
-+        return AVERROR(err);
-+    }
-+
-+    ++avbuf->context->q_count;
-+    av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
-+           avbuf->context->name, avbuf->buf.index,
-+           avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
-+           avbuf->context->q_count);
- 
-     avbuf->status = V4L2BUF_IN_DRIVER;
- 
-diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
-index 3d2ff1b9a5d7..111526aee315 100644
---- a/libavcodec/v4l2_buffers.h
-+++ b/libavcodec/v4l2_buffers.h
-@@ -28,27 +28,37 @@
- #include <stddef.h>
- #include <linux/videodev2.h>
- 
-+#include "avcodec.h"
- #include "libavutil/buffer.h"
- #include "libavutil/frame.h"
-+#include "libavutil/hwcontext_drm.h"
- #include "packet.h"
- 
- enum V4L2Buffer_status {
-     V4L2BUF_AVAILABLE,
-     V4L2BUF_IN_DRIVER,
-+    V4L2BUF_IN_USE,
-     V4L2BUF_RET_USER,
- };
- 
- /**
-  * V4L2Buffer (wrapper for v4l2_buffer management)
-  */
-+struct V4L2Context;
-+struct ff_weak_link_client;
-+
- typedef struct V4L2Buffer {
--    /* each buffer needs to have a reference to its context */
-+    /* each buffer needs to have a reference to its context
-+     * The pointer is good enough for most operation but once the buffer has
-+     * been passed to the user the buffer may become orphaned so for free ops
-+     * the weak link must be used to ensure that the context is actually
-+     * there
-+     */
-     struct V4L2Context *context;
-+    struct ff_weak_link_client *context_wl;
- 
--    /* This object is refcounted per-plane, so we need to keep track
--     * of how many context-refs we are holding. */
--    AVBufferRef *context_ref;
--    atomic_uint context_refcount;
-+    /* DRM descriptor */
-+    AVDRMFrameDescriptor drm_frame;
- 
-     /* keep track of the mmap address and mmap length */
-     struct V4L2Plane_info {
-@@ -73,11 +83,12 @@ typedef struct V4L2Buffer {
-  *
-  * @param[in] frame The AVFRame to push the information to
-  * @param[in] buf The V4L2Buffer to get the information from
-+ * @param[in] no_rescale_pts If non-zero do not rescale PTS
-  *
-  * @returns 0 in case of success, AVERROR(EINVAL) if the number of planes is incorrect,
-  * AVERROR(ENOMEM) if the AVBufferRef can't be created.
-  */
--int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf);
-+int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf, int no_rescale_pts);
- 
- /**
-  * Extracts the data from a V4L2Buffer to an AVPacket
-@@ -101,6 +112,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);
-  */
- int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
- 
-+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-+                                    const void *extdata, size_t extlen, int no_rescale_pts);
-+
- /**
-  * Extracts the data from an AVFrame to a V4L2Buffer
-  *
-@@ -119,7 +133,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
-  *
-  * @returns 0 in case of success, a negative AVERROR code otherwise
-  */
--int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
-+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx);
- 
- /**
-  * Enqueues a V4L2Buffer
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index a40be946904e..be76068af32d 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -27,11 +27,13 @@
- #include <unistd.h>
- #include <fcntl.h>
- #include <poll.h>
-+#include "libavutil/avassert.h"
- #include "libavcodec/avcodec.h"
- #include "decode.h"
- #include "v4l2_buffers.h"
- #include "v4l2_fmt.h"
- #include "v4l2_m2m.h"
-+#include "weak_link.h"
- 
- struct v4l2_format_update {
-     uint32_t v4l2_fmt;
-@@ -153,21 +155,99 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd
-     }
- }
- 
--static int v4l2_start_decode(V4L2Context *ctx)
-+static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r)
- {
--    struct v4l2_decoder_cmd cmd = {
--        .cmd = V4L2_DEC_CMD_START,
--        .flags = 0,
-+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-+    struct v4l2_selection selection = {
-+        .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
-+        .target = V4L2_SEL_TGT_COMPOSE
-     };
--    int ret;
- 
--    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd);
--    if (ret)
-+    memset(r, 0, sizeof(*r));
-+    if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection))
-         return AVERROR(errno);
- 
-+    *r = selection.r;
-     return 0;
- }
- 
-+static int do_source_change(V4L2m2mContext * const s)
-+{
-+    AVCodecContext *const avctx = s->avctx;
-+
-+    int ret;
-+    int reinit;
-+    int full_reinit;
-+    struct v4l2_format cap_fmt = s->capture.format;
-+
-+    s->resize_pending = 0;
-+    s->capture.done = 0;
-+
-+    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
-+    if (ret) {
-+        av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name);
-+        return 0;
-+    }
-+
-+    s->output.sample_aspect_ratio = v4l2_get_sar(&s->output);
-+
-+    get_default_selection(&s->capture, &s->capture.selection);
-+
-+    reinit = v4l2_resolution_changed(&s->capture, &cap_fmt);
-+    if (reinit) {
-+        s->capture.height = v4l2_get_height(&cap_fmt);
-+        s->capture.width = v4l2_get_width(&cap_fmt);
-+    }
-+    s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
-+
-+    av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d\n",
-+           s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
-+           s->capture.selection.width, s->capture.selection.height,
-+           s->capture.selection.left, s->capture.selection.top);
-+
-+    s->reinit = 1;
-+
-+    if (reinit) {
-+        if (avctx)
-+            ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
-+        if (ret < 0)
-+            av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
-+
-+        ret = ff_v4l2_m2m_codec_reinit(s);
-+        if (ret) {
-+            av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n");
-+            return AVERROR(EINVAL);
-+        }
-+        goto reinit_run;
-+    }
-+
-+    /* Buffers are OK so just stream off to ack */
-+    av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only\n", __func__);
-+
-+    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
-+    if (ret)
-+        av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
-+    s->draining = 0;
-+
-+    /* reinit executed */
-+reinit_run:
-+    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON);
-+    return 1;
-+}
-+
-+static int ctx_done(V4L2Context * const ctx)
-+{
-+    int rv = 0;
-+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-+
-+    ctx->done = 1;
-+
-+    if (s->resize_pending && !V4L2_TYPE_IS_OUTPUT(ctx->type))
-+        rv = do_source_change(s);
-+
-+    return rv;
-+}
-+
- /**
-  * handle resolution change event and end of stream event
-  * returns 1 if reinit was successful, negative if it failed
-@@ -175,8 +255,7 @@ static int v4l2_start_decode(V4L2Context *ctx)
-  */
- static int v4l2_handle_event(V4L2Context *ctx)
- {
--    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
--    struct v4l2_format cap_fmt = s->capture.format;
-+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-     struct v4l2_event evt = { 0 };
-     int ret;
- 
-@@ -186,44 +265,22 @@ static int v4l2_handle_event(V4L2Context *ctx)
-         return 0;
-     }
- 
-+    av_log(logger(ctx), AV_LOG_INFO, "Dq event %d\n", evt.type);
-+
-     if (evt.type == V4L2_EVENT_EOS) {
--        ctx->done = 1;
-+//        ctx->done = 1;
-+        av_log(logger(ctx), AV_LOG_TRACE, "%s VIDIOC_EVENT_EOS\n", ctx->name);
-         return 0;
-     }
- 
-     if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
-         return 0;
- 
--    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
--    if (ret) {
--        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name);
--        return 0;
--    }
--
--    if (v4l2_resolution_changed(&s->capture, &cap_fmt)) {
--        s->capture.height = v4l2_get_height(&cap_fmt);
--        s->capture.width = v4l2_get_width(&cap_fmt);
--        s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
--    } else {
--        v4l2_start_decode(ctx);
-+    s->resize_pending = 1;
-+    if (!ctx->done)
-         return 0;
--    }
--
--    s->reinit = 1;
--
--    if (s->avctx)
--        ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
--    if (ret < 0)
--        av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n");
--
--    ret = ff_v4l2_m2m_codec_reinit(s);
--    if (ret) {
--        av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n");
--        return AVERROR(EINVAL);
--    }
- 
--    /* reinit executed */
--    return 1;
-+    return do_source_change(s);
- }
- 
- static int v4l2_stop_decode(V4L2Context *ctx)
-@@ -266,8 +323,26 @@ static int v4l2_stop_encode(V4L2Context *ctx)
-     return 0;
- }
- 
-+static int count_in_driver(const V4L2Context * const ctx)
-+{
-+    int i;
-+    int n = 0;
-+
-+    if (!ctx->bufrefs)
-+        return -1;
-+
-+    for (i = 0; i < ctx->num_buffers; ++i) {
-+        V4L2Buffer *const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
-+        if (avbuf->status == V4L2BUF_IN_DRIVER)
-+            ++n;
-+    }
-+    return n;
-+}
-+
- static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
- {
-+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-+    const int is_capture = !V4L2_TYPE_IS_OUTPUT(ctx->type);
-     struct v4l2_plane planes[VIDEO_MAX_PLANES];
-     struct v4l2_buffer buf = { 0 };
-     V4L2Buffer *avbuf;
-@@ -276,50 +351,84 @@ static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
-         .fd = ctx_to_m2mctx(ctx)->fd,
-     };
-     int i, ret;
-+    int no_rx_means_done = 0;
- 
--    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) {
-+    if (is_capture && ctx->bufrefs) {
-         for (i = 0; i < ctx->num_buffers; i++) {
--            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
-+            avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
-+            if (avbuf->status == V4L2BUF_IN_DRIVER)
-                 break;
-         }
-         if (i == ctx->num_buffers)
--            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to "
-+            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers (%d) returned to "
-                                                 "userspace. Increase num_capture_buffers "
-                                                 "to prevent device deadlock or dropped "
--                                                "packets/frames.\n");
-+                                                "packets/frames.\n", i);
-     }
- 
-+#if 0
-+    // I think this is true but pointless
-+    // we will get some other form of EOF signal
-+
-     /* if we are draining and there are no more capture buffers queued in the driver we are done */
--    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) {
-+    if (is_capture && ctx_to_m2mctx(ctx)->draining) {
-         for (i = 0; i < ctx->num_buffers; i++) {
-             /* capture buffer initialization happens during decode hence
-              * detection happens at runtime
-              */
--            if (!ctx->buffers)
-+            if (!ctx->bufrefs)
-                 break;
- 
--            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
-+            avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
-+            if (avbuf->status == V4L2BUF_IN_DRIVER)
-                 goto start;
-         }
-         ctx->done = 1;
-         return NULL;
-     }
-+#endif
- 
- start:
--    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
--        pfd.events =  POLLOUT | POLLWRNORM;
--    else {
-+    if (is_capture) {
-         /* no need to listen to requests for more input while draining */
-         if (ctx_to_m2mctx(ctx)->draining)
-             pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
-+    } else {
-+        pfd.events =  POLLOUT | POLLWRNORM;
-     }
-+    no_rx_means_done = s->resize_pending && is_capture;
- 
-     for (;;) {
--        ret = poll(&pfd, 1, timeout);
-+        // If we have a resize pending then all buffers should be Qed
-+        // With a resize pending we should be in drain but evidence suggests
-+        // that not all decoders do this so poll to clear
-+        int t2 = no_rx_means_done ? 0 : timeout < 0 ? 3000 : timeout;
-+        const int e = pfd.events;
-+
-+        ret = poll(&pfd, 1, t2);
-+
-         if (ret > 0)
-             break;
--        if (errno == EINTR)
--            continue;
-+
-+        if (ret < 0) {
-+            int err = errno;
-+            if (err == EINTR)
-+                continue;
-+            av_log(logger(ctx), AV_LOG_ERROR, "=== poll error %d (%s): events=%#x, cap buffers=%d\n",
-+                   err, strerror(err),
-+                   e, count_in_driver(ctx));
-+            return NULL;
-+        }
-+
-+        // ret == 0 (timeout)
-+        if (no_rx_means_done) {
-+            av_log(logger(ctx), AV_LOG_DEBUG, "Ctx done on timeout\n");
-+            ret = ctx_done(ctx);
-+            if (ret > 0)
-+                goto start;
-+        }
-+        if (timeout == -1)
-+            av_log(logger(ctx), AV_LOG_ERROR, "=== poll unexpected TIMEOUT: events=%#x, cap buffers=%d\n", e, count_in_driver(ctx));;
-         return NULL;
-     }
- 
-@@ -329,7 +438,8 @@ start:
-            no need to raise a warning */
-         if (timeout == 0) {
-             for (i = 0; i < ctx->num_buffers; i++) {
--                if (ctx->buffers[i].status != V4L2BUF_AVAILABLE)
-+                avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
-+                if (avbuf->status != V4L2BUF_AVAILABLE)
-                     av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
-             }
-         }
-@@ -347,22 +457,25 @@ start:
-             ctx->done = 1;
-             return NULL;
-         }
--        if (ret) {
--            /* if re-init was successful drop the buffer (if there was one)
--             * since we had to reconfigure capture (unmap all buffers)
--             */
--            return NULL;
--        }
-+        if (ret > 0)
-+            goto start;
-     }
- 
-     /* 2. dequeue the buffer */
-     if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) {
- 
--        if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-+        if (is_capture) {
-             /* there is a capture buffer ready */
-             if (pfd.revents & (POLLIN | POLLRDNORM))
-                 goto dequeue;
- 
-+            // CAPTURE Q drained
-+            if (no_rx_means_done) {
-+                if (ctx_done(ctx) > 0)
-+                    goto start;
-+                return NULL;
-+            }
-+
-             /* the driver is ready to accept more input; instead of waiting for the capture
-              * buffer to complete we return NULL so input can proceed (we are single threaded)
-              */
-@@ -380,37 +493,58 @@ dequeue:
-             buf.m.planes = planes;
-         }
- 
--        ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf);
--        if (ret) {
--            if (errno != EAGAIN) {
--                ctx->done = 1;
--                if (errno != EPIPE)
-+        while ((ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf)) == -1) {
-+            const int err = errno;
-+            if (err == EINTR)
-+                continue;
-+            if (err != EAGAIN) {
-+                // EPIPE on CAPTURE can be used instead of BUF_FLAG_LAST
-+                if (err != EPIPE || !is_capture)
-                     av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
--                        ctx->name, av_err2str(AVERROR(errno)));
-+                        ctx->name, av_err2str(AVERROR(err)));
-+                if (ctx_done(ctx) > 0)
-+                    goto start;
-             }
-             return NULL;
-         }
-+        --ctx->q_count;
-+        av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d\n",
-+               ctx->name, buf.index,
-+               buf.timestamp.tv_sec, buf.timestamp.tv_usec,
-+               ctx->q_count, ++ctx->dq_count);
- 
--        if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-+        avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
-+        avbuf->status = V4L2BUF_AVAILABLE;
-+        avbuf->buf = buf;
-+        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-+            memcpy(avbuf->planes, planes, sizeof(planes));
-+            avbuf->buf.m.planes = avbuf->planes;
-+        }
-+
-+        if (ctx_to_m2mctx(ctx)->draining && is_capture) {
-             int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ?
-                             buf.m.planes[0].bytesused : buf.bytesused;
-             if (bytesused == 0) {
--                ctx->done = 1;
-+                av_log(logger(ctx), AV_LOG_DEBUG, "Buffer empty - reQ\n");
-+
-+                // Must reQ so we don't leak
-+                // May not matter if the next thing we do is release all the
-+                // buffers but better to be tidy.
-+                ff_v4l2_buffer_enqueue(avbuf);
-+
-+                if (ctx_done(ctx) > 0)
-+                    goto start;
-                 return NULL;
-             }
- #ifdef V4L2_BUF_FLAG_LAST
--            if (buf.flags & V4L2_BUF_FLAG_LAST)
--                ctx->done = 1;
-+            if (buf.flags & V4L2_BUF_FLAG_LAST) {
-+                av_log(logger(ctx), AV_LOG_TRACE, "FLAG_LAST set\n");
-+                avbuf->status = V4L2BUF_IN_USE;  // Avoid flushing this buffer
-+                ctx_done(ctx);
-+            }
- #endif
-         }
- 
--        avbuf = &ctx->buffers[buf.index];
--        avbuf->status = V4L2BUF_AVAILABLE;
--        avbuf->buf = buf;
--        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
--            memcpy(avbuf->planes, planes, sizeof(planes));
--            avbuf->buf.m.planes = avbuf->planes;
--        }
-         return avbuf;
-     }
- 
-@@ -429,8 +563,9 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
-     }
- 
-     for (i = 0; i < ctx->num_buffers; i++) {
--        if (ctx->buffers[i].status == V4L2BUF_AVAILABLE)
--            return &ctx->buffers[i];
-+        V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
-+        if (avbuf->status == V4L2BUF_AVAILABLE)
-+            return avbuf;
-     }
- 
-     return NULL;
-@@ -438,25 +573,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
- 
- static int v4l2_release_buffers(V4L2Context* ctx)
- {
--    struct v4l2_requestbuffers req = {
--        .memory = V4L2_MEMORY_MMAP,
--        .type = ctx->type,
--        .count = 0, /* 0 -> unmaps buffers from the driver */
--    };
--    int i, j;
-+    int i;
-+    int ret = 0;
-+    const int fd = ctx_to_m2mctx(ctx)->fd;
- 
--    for (i = 0; i < ctx->num_buffers; i++) {
--        V4L2Buffer *buffer = &ctx->buffers[i];
-+    // Orphan any buffers in the wild
-+    ff_weak_link_break(&ctx->wl_master);
-+
-+    if (ctx->bufrefs) {
-+        for (i = 0; i < ctx->num_buffers; i++)
-+            av_buffer_unref(ctx->bufrefs + i);
-+    }
-+
-+    if (fd != -1) {
-+        struct v4l2_requestbuffers req = {
-+            .memory = V4L2_MEMORY_MMAP,
-+            .type = ctx->type,
-+            .count = 0, /* 0 -> unmap all buffers from the driver */
-+        };
-+
-+        while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) {
-+            if (errno == EINTR)
-+                continue;
-+
-+            ret = AVERROR(errno);
- 
--        for (j = 0; j < buffer->num_planes; j++) {
--            struct V4L2Plane_info *p = &buffer->plane_info[j];
--            if (p->mm_addr && p->length)
--                if (munmap(p->mm_addr, p->length) < 0)
--                    av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno)));
-+            av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n",
-+                ctx->name, av_err2str(AVERROR(errno)));
-+
-+            if (ctx_to_m2mctx(ctx)->output_drm)
-+                av_log(logger(ctx), AV_LOG_ERROR,
-+                    "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n"
-+                    "for all buffers: \n"
-+                    "  1. drmModeRmFB(..)\n"
-+                    "  2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");
-         }
-     }
-+    ctx->q_count = 0;
- 
--    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
-+    return ret;
- }
- 
- static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
-@@ -485,6 +640,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm
- 
- static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
- {
-+    V4L2m2mContext* s = ctx_to_m2mctx(ctx);
-+    V4L2m2mPriv *priv = s->avctx->priv_data;
-     enum AVPixelFormat pixfmt = ctx->av_pix_fmt;
-     struct v4l2_fmtdesc fdesc;
-     int ret;
-@@ -503,6 +660,13 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
-         if (ret)
-             return AVERROR(EINVAL);
- 
-+        if (priv->pix_fmt != AV_PIX_FMT_NONE) {
-+            if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) {
-+                fdesc.index++;
-+                continue;
-+            }
-+        }
-+
-         pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
-         ret = v4l2_try_raw_format(ctx, pixfmt);
-         if (ret){
-@@ -555,18 +719,73 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p)
-   *
-   *****************************************************************************/
- 
-+
-+static void flush_all_buffers_status(V4L2Context* const ctx)
-+{
-+    int i;
-+    for (i = 0; i < ctx->num_buffers; ++i) {
-+        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
-+        if (buf->status == V4L2BUF_IN_DRIVER)
-+            buf->status = V4L2BUF_AVAILABLE;
-+    }
-+    ctx->q_count = 0;
-+}
-+
-+static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
-+{
-+    int i;
-+    int rv;
-+
-+    if (!ctx->bufrefs) {
-+        rv = ff_v4l2_context_init(ctx);
-+        if (rv) {
-+            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
-+            return rv;
-+        }
-+    }
-+
-+    for (i = 0; i < ctx->num_buffers; ++i) {
-+        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
-+        if (buf->status == V4L2BUF_AVAILABLE) {
-+            rv = ff_v4l2_buffer_enqueue(buf);
-+            if (rv < 0)
-+                return rv;
-+        }
-+    }
-+    return 0;
-+}
-+
- int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
- {
-     int type = ctx->type;
-     int ret;
-+    AVCodecContext * const avctx = logger(ctx);
-+
-+    ff_mutex_lock(&ctx->lock);
-+
-+    if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type))
-+        stuff_all_buffers(avctx, ctx);
- 
-     ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type);
--    if (ret < 0)
--        return AVERROR(errno);
-+    if (ret < 0) {
-+        const int err = errno;
-+        av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name,
-+               cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err);
-+        ret = AVERROR(err);
-+    }
-+    else
-+    {
-+        if (cmd == VIDIOC_STREAMOFF)
-+            flush_all_buffers_status(ctx);
- 
--    ctx->streamon = (cmd == VIDIOC_STREAMON);
-+        ctx->streamon = (cmd == VIDIOC_STREAMON);
-+        av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name,
-+               cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF");
-+    }
- 
--    return 0;
-+    ff_mutex_unlock(&ctx->lock);
-+
-+    return ret;
- }
- 
- int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
-@@ -594,7 +813,8 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
-     return ff_v4l2_buffer_enqueue(avbuf);
- }
- 
--int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
-+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
-+                                   const void * extdata, size_t extlen, int no_rescale_pts)
- {
-     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-     V4L2Buffer* avbuf;
-@@ -602,8 +822,9 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
- 
-     if (!pkt->size) {
-         ret = v4l2_stop_decode(ctx);
-+        // Log but otherwise ignore stop failure
-         if (ret)
--            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name);
-+            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
-         s->draining = 1;
-         return 0;
-     }
-@@ -612,14 +833,14 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
-     if (!avbuf)
-         return AVERROR(EAGAIN);
- 
--    ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf);
-+    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, no_rescale_pts);
-     if (ret)
-         return ret;
- 
-     return ff_v4l2_buffer_enqueue(avbuf);
- }
- 
--int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
-+int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout, int no_rescale_pts)
- {
-     V4L2Buffer *avbuf;
- 
-@@ -636,7 +857,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
-         return AVERROR(EAGAIN);
-     }
- 
--    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
-+    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf, no_rescale_pts);
- }
- 
- int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
-@@ -695,54 +916,57 @@ void ff_v4l2_context_release(V4L2Context* ctx)
- {
-     int ret;
- 
--    if (!ctx->buffers)
-+    if (!ctx->bufrefs)
-         return;
- 
-     ret = v4l2_release_buffers(ctx);
-     if (ret)
-         av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name);
- 
--    av_freep(&ctx->buffers);
-+    av_freep(&ctx->bufrefs);
-+    av_buffer_unref(&ctx->frames_ref);
-+
-+    ff_mutex_destroy(&ctx->lock);
- }
- 
--int ff_v4l2_context_init(V4L2Context* ctx)
-+
-+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers)
- {
--    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-     struct v4l2_requestbuffers req;
--    int ret, i;
--
--    if (!v4l2_type_supported(ctx)) {
--        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
--        return AVERROR_PATCHWELCOME;
--    }
--
--    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
--    if (ret)
--        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name);
-+    int ret;
-+    int i;
- 
-     memset(&req, 0, sizeof(req));
--    req.count = ctx->num_buffers;
-+    req.count = req_buffers;
-     req.memory = V4L2_MEMORY_MMAP;
-     req.type = ctx->type;
--    ret = ioctl(s->fd, VIDIOC_REQBUFS, &req);
--    if (ret < 0) {
--        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno));
--        return AVERROR(errno);
-+    while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) {
-+        if (errno != EINTR) {
-+            ret = AVERROR(errno);
-+            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret));
-+            return ret;
-+        }
-     }
- 
-     ctx->num_buffers = req.count;
--    ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer));
--    if (!ctx->buffers) {
-+    ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs));
-+    if (!ctx->bufrefs) {
-         av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name);
--        return AVERROR(ENOMEM);
-+        goto fail_release;
-     }
- 
--    for (i = 0; i < req.count; i++) {
--        ctx->buffers[i].context = ctx;
--        ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i);
--        if (ret < 0) {
-+    ctx->wl_master = ff_weak_link_new(ctx);
-+    if (!ctx->wl_master) {
-+        ret = AVERROR(ENOMEM);
-+        goto fail_release;
-+    }
-+
-+    for (i = 0; i < ctx->num_buffers; i++) {
-+        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx);
-+        if (ret) {
-             av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
--            goto error;
-+            goto fail_release;
-         }
-     }
- 
-@@ -756,10 +980,62 @@ int ff_v4l2_context_init(V4L2Context* ctx)
- 
-     return 0;
- 
--error:
-+fail_release:
-     v4l2_release_buffers(ctx);
-+    av_freep(&ctx->bufrefs);
-+    return ret;
-+}
-+
-+int ff_v4l2_context_init(V4L2Context* ctx)
-+{
-+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-+    int ret;
-+
-+    // It is not valid to reinit a context without a previous release
-+    av_assert0(ctx->bufrefs == NULL);
-+
-+    if (!v4l2_type_supported(ctx)) {
-+        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
-+        return AVERROR_PATCHWELCOME;
-+    }
-+
-+    ff_mutex_init(&ctx->lock, NULL);
- 
--    av_freep(&ctx->buffers);
-+    if (s->output_drm) {
-+        AVHWFramesContext *hwframes;
-+
-+        ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
-+        if (!ctx->frames_ref) {
-+            ret = AVERROR(ENOMEM);
-+            goto fail_unlock;
-+        }
-+
-+        hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
-+        hwframes->format = AV_PIX_FMT_DRM_PRIME;
-+        hwframes->sw_format = ctx->av_pix_fmt;
-+        hwframes->width = ctx->width;
-+        hwframes->height = ctx->height;
-+        ret = av_hwframe_ctx_init(ctx->frames_ref);
-+        if (ret < 0)
-+            goto fail_unref_hwframes;
-+    }
-+
-+    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
-+    if (ret) {
-+        ret = AVERROR(errno);
-+        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret));
-+        goto fail_unref_hwframes;
-+    }
-+
-+    ret = create_buffers(ctx, ctx->num_buffers);
-+    if (ret < 0)
-+        goto fail_unref_hwframes;
-+
-+    return 0;
- 
-+fail_unref_hwframes:
-+    av_buffer_unref(&ctx->frames_ref);
-+fail_unlock:
-+    ff_mutex_destroy(&ctx->lock);
-     return ret;
- }
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 6f7460c89a9d..59009d11d1e7 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -32,6 +32,8 @@
- #include "libavutil/rational.h"
- #include "codec_id.h"
- #include "packet.h"
-+#include "libavutil/buffer.h"
-+#include "libavutil/thread.h"
- #include "v4l2_buffers.h"
- 
- typedef struct V4L2Context {
-@@ -71,11 +73,12 @@ typedef struct V4L2Context {
-      */
-     int width, height;
-     AVRational sample_aspect_ratio;
-+    struct v4l2_rect selection;
- 
-     /**
--     * Indexed array of V4L2Buffers
-+     * Indexed array of pointers to V4L2Buffers
-      */
--    V4L2Buffer *buffers;
-+    AVBufferRef **bufrefs;
- 
-     /**
-      * Readonly after init.
-@@ -93,6 +96,12 @@ typedef struct V4L2Context {
-      */
-     int done;
- 
-+    AVBufferRef *frames_ref;
-+    int q_count;
-+    int dq_count;
-+    struct ff_weak_link_master *wl_master;
-+
-+    AVMutex lock;
- } V4L2Context;
- 
- /**
-@@ -157,9 +166,12 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
-  * @param[in] ctx The V4L2Context to dequeue from.
-  * @param[inout] f The AVFrame to dequeue to.
-  * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
-+ * @param[in] no_rescale_pts (0 rescale pts, 1 use pts as
-+ *       timestamp directly)
-+ *
-  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
-  */
--int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
-+int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout, int no_rescale_pts);
- 
- /**
-  * Enqueues a buffer to a V4L2Context from an AVPacket
-@@ -171,7 +183,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
-  * @param[in] pkt A pointer to an AVPacket.
-  * @return 0 in case of success, a negative error otherwise.
-  */
--int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
-+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size, int no_rescale_pts);
- 
- /**
-  * Enqueues a buffer to a V4L2Context from an AVFrame
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index 602efb7a1605..516e6d98583d 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -216,13 +216,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
-         av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
- 
-     /* 2. unmap the capture buffers (v4l2 and ffmpeg):
--     *    we must wait for all references to be released before being allowed
--     *    to queue new buffers.
-      */
--    av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n");
--    if (atomic_load(&s->refcount))
--        while(sem_wait(&s->refsync) == -1 && errno == EINTR);
--
-     ff_v4l2_context_release(&s->capture);
- 
-     /* 3. get the new capture format */
-@@ -259,6 +253,8 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
-     av_frame_free(&s->frame);
-     av_packet_unref(&s->buf_pkt);
- 
-+    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n");
-+
-     av_free(s);
- }
- 
-@@ -270,6 +266,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
-     if (!s)
-         return 0;
- 
-+    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n");
-+
-+    if (av_codec_is_decoder(s->avctx->codec))
-+        av_packet_unref(&s->buf_pkt);
-+
-     if (s->fd >= 0) {
-         ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
-         if (ret)
-@@ -282,7 +283,14 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
- 
-     ff_v4l2_context_release(&s->output);
- 
-+    close(s->fd);
-+    s->fd = -1;
-+
-     s->self_ref = NULL;
-+    // This is only called on avctx close so after this point we don't have that
-+    // Crash sooner if we find we are using it (can still log with avctx = NULL)
-+    s->avctx = NULL;
-+    priv->context = NULL;
-     av_buffer_unref(&priv->context_ref);
- 
-     return 0;
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 04d86d7b9222..24a9c9486468 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -30,6 +30,7 @@
- #include <linux/videodev2.h>
- 
- #include "libavcodec/avcodec.h"
-+#include "libavutil/pixfmt.h"
- #include "v4l2_context.h"
- 
- #define container_of(ptr, type, member) ({ \
-@@ -40,6 +41,17 @@
-     { "num_output_buffers", "Number of buffers in the output context",\
-         OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS }
- 
-+#define FF_V4L2_M2M_TRACK_SIZE 128
-+typedef struct V4L2m2mTrackEl {
-+    int     discard;   // If we see this buffer its been flushed, so discard
-+    int     pkt_size;
-+    int64_t pts;
-+    int64_t reordered_opaque;
-+    int64_t pkt_pos;
-+    int64_t pkt_duration;
-+    int64_t track_pts;
-+} V4L2m2mTrackEl;
-+
- typedef struct V4L2m2mContext {
-     char devname[PATH_MAX];
-     int fd;
-@@ -53,6 +65,7 @@ typedef struct V4L2m2mContext {
-     sem_t refsync;
-     atomic_uint refcount;
-     int reinit;
-+    int resize_pending;
- 
-     /* null frame/packet received */
-     int draining;
-@@ -66,6 +79,23 @@ typedef struct V4L2m2mContext {
- 
-     /* reference back to V4L2m2mPriv */
-     void *priv;
-+
-+    AVBufferRef *device_ref;
-+
-+    /* generate DRM frames */
-+    int output_drm;
-+
-+    /* Frame tracking */
-+    int64_t last_pkt_dts;
-+    int64_t last_opaque;
-+    unsigned int track_no;
-+    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
-+
-+    /* req pkt */
-+    int req_pkt;
-+
-+    /* Ext data sent */
-+    int extdata_sent;
- } V4L2m2mContext;
- 
- typedef struct V4L2m2mPriv {
-@@ -76,6 +106,7 @@ typedef struct V4L2m2mPriv {
- 
-     int num_output_buffers;
-     int num_capture_buffers;
-+    enum AVPixelFormat pix_fmt;
- } V4L2m2mPriv;
- 
- /**
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 4944d0851198..7f6033ac2c41 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -23,6 +23,10 @@
- 
- #include <linux/videodev2.h>
- #include <sys/ioctl.h>
-+
-+#include "libavutil/avassert.h"
-+#include "libavutil/hwcontext.h"
-+#include "libavutil/hwcontext_drm.h"
- #include "libavutil/pixfmt.h"
- #include "libavutil/pixdesc.h"
- #include "libavutil/opt.h"
-@@ -30,26 +34,51 @@
- #include "codec_internal.h"
- #include "libavcodec/decode.h"
- 
-+#include "libavcodec/hwaccels.h"
-+#include "libavcodec/internal.h"
-+#include "libavcodec/hwconfig.h"
-+
- #include "v4l2_context.h"
- #include "v4l2_m2m.h"
- #include "v4l2_fmt.h"
- 
-+static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
-+{
-+    int ret;
-+    struct v4l2_decoder_cmd cmd = {
-+        .cmd = V4L2_DEC_CMD_START,
-+        .flags = 0,
-+    };
-+
-+    if (s->output.streamon)
-+        return 0;
-+
-+    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
-+    if (ret < 0)
-+        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context\n");
-+
-+    if (!s->capture.streamon || ret < 0)
-+        return ret;
-+
-+    ret = ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd);
-+    if (ret < 0)
-+        av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno);
-+    else
-+        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_DECODER_CMD start OK\n");
-+
-+    return ret;
-+}
-+
- static int v4l2_try_start(AVCodecContext *avctx)
- {
-     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-     V4L2Context *const capture = &s->capture;
--    V4L2Context *const output = &s->output;
-     struct v4l2_selection selection = { 0 };
-     int ret;
- 
-     /* 1. start the output process */
--    if (!output->streamon) {
--        ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON);
--        if (ret < 0) {
--            av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n");
--            return ret;
--        }
--    }
-+    if ((ret = check_output_streamon(avctx, s)) != 0)
-+        return ret;
- 
-     if (capture->streamon)
-         return 0;
-@@ -63,15 +92,29 @@ static int v4l2_try_start(AVCodecContext *avctx)
-     }
- 
-     /* 2.1 update the AVCodecContext */
--    avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
--    capture->av_pix_fmt = avctx->pix_fmt;
-+    capture->av_pix_fmt =
-+        ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
-+    if (s->output_drm) {
-+        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
-+        avctx->sw_pix_fmt = capture->av_pix_fmt;
-+    }
-+    else
-+        avctx->pix_fmt = capture->av_pix_fmt;
- 
-     /* 3. set the crop parameters */
-+#if 1
-+    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-+    selection.target = V4L2_SEL_TGT_CROP_DEFAULT;
-+    ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
-+    av_log(avctx, AV_LOG_INFO, "Post G selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height);
-+#else
-     selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-     selection.r.height = avctx->coded_height;
-     selection.r.width = avctx->coded_width;
-+    av_log(avctx, AV_LOG_INFO, "Try selection %dx%d\n", avctx->coded_width, avctx->coded_height);
-     ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
--    if (!ret) {
-+    av_log(avctx, AV_LOG_INFO, "Post S selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height);
-+    if (1) {
-         ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
-         if (ret) {
-             av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
-@@ -82,15 +125,7 @@ static int v4l2_try_start(AVCodecContext *avctx)
-             capture->width  = selection.r.width;
-         }
-     }
--
--    /* 4. init the capture context now that we have the capture format */
--    if (!capture->buffers) {
--        ret = ff_v4l2_context_init(capture);
--        if (ret) {
--            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
--            return AVERROR(ENOMEM);
--        }
--    }
-+#endif
- 
-     /* 5. start the capture process */
-     ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
-@@ -133,50 +168,287 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s)
-     return 0;
- }
- 
--static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
-+{
-+    return (int64_t)n;
-+}
-+
-+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
-+{
-+    return (unsigned int)pts;
-+}
-+
-+// FFmpeg requires us to propagate a number of vars from the coded pkt into
-+// the decoded frame. The only thing that tracks like that in V4L2 stateful
-+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
-+// guarantees about PTS being unique or specified for every frame so replace
-+// the supplied PTS with a simple incrementing number and keep a circular
-+// buffer of all the things we want preserved (including the original PTS)
-+// indexed by the tracking no.
-+static void
-+xlat_pts_in(AVCodecContext *const avctx, V4L2m2mContext *const s, AVPacket *const avpkt)
-+{
-+    int64_t track_pts;
-+
-+    // Avoid 0
-+    if (++s->track_no == 0)
-+        s->track_no = 1;
-+
-+    track_pts = track_to_pts(avctx, s->track_no);
-+
-+    av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, s->track_no);
-+    s->last_pkt_dts = avpkt->dts;
-+    s->track_els[s->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
-+        .discard          = 0,
-+        .pkt_size         = avpkt->size,
-+        .pts              = avpkt->pts,
-+        .reordered_opaque = avctx->reordered_opaque,
-+        .pkt_pos          = avpkt->pos,
-+        .pkt_duration     = avpkt->duration,
-+        .track_pts        = track_pts
-+    };
-+    avpkt->pts = track_pts;
-+}
-+
-+// Returns -1 if we should discard the frame
-+static int
-+xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *const frame)
-+{
-+    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
-+    const V4L2m2mTrackEl *const t = s->track_els + n;
-+    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
-+    {
-+        av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
-+        frame->pts              = AV_NOPTS_VALUE;
-+        frame->pkt_dts          = s->last_pkt_dts;
-+        frame->reordered_opaque = s->last_opaque;
-+        frame->pkt_pos          = -1;
-+        frame->pkt_duration     = 0;
-+        frame->pkt_size         = -1;
-+    }
-+    else if (!t->discard)
-+    {
-+        frame->pts              = t->pts;
-+        frame->pkt_dts          = s->last_pkt_dts;
-+        frame->reordered_opaque = t->reordered_opaque;
-+        frame->pkt_pos          = t->pkt_pos;
-+        frame->pkt_duration     = t->pkt_duration;
-+        frame->pkt_size         = t->pkt_size;
-+
-+        s->last_opaque = s->track_els[n].reordered_opaque;
-+        s->track_els[n].pts = AV_NOPTS_VALUE;  // If we hit this again deny accurate knowledge of PTS
-+    }
-+    else
-+    {
-+        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
-+        return -1;
-+    }
-+
-+    frame->best_effort_timestamp = frame->pts;
-+    frame->pkt_dts               = frame->pts;  // We can't emulate what s/w does in a useful manner?
-+    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 ", DTS=%" PRId64 "\n", frame->pts, frame->pkt_dts);
-+    return 0;
-+}
-+
-+static inline int stream_started(const V4L2m2mContext * const s) {
-+    return s->capture.streamon && s->output.streamon;
-+}
-+
-+#define NQ_OK        0
-+#define NQ_Q_FULL    1
-+#define NQ_SRC_EMPTY 2
-+#define NQ_DRAINING  3
-+#define NQ_DEAD      4
-+
-+#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
-+
-+// AVERROR_EOF     Flushing an already flushed stream
-+// -ve             Error (all errors except EOF are unexpected)
-+// NQ_OK (0)       OK
-+// NQ_Q_FULL       Dst full (retry if we think V4L2 Q has space now)
-+// NQ_SRC_EMPTY    Src empty (do not retry)
-+// NQ_DRAINING     At EOS, dQ dest until EOS there too
-+// NQ_DEAD         Not running (do not retry, do not attempt capture dQ)
-+
-+static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s)
- {
--    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
--    V4L2Context *const capture = &s->capture;
--    V4L2Context *const output = &s->output;
-     int ret;
- 
-+    // If we don't already have a coded packet - get a new one
-+    // We will already have a coded pkt if the output Q was full last time we
-+    // tried to Q it
-     if (!s->buf_pkt.size) {
-         ret = ff_decode_get_packet(avctx, &s->buf_pkt);
-+
-+        if (ret == AVERROR(EAGAIN)) {
-+            if (!stream_started(s)) {
-+                av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__);
-+                return NQ_DEAD;
-+            }
-+            return NQ_SRC_EMPTY;
-+        }
-+
-+        if (ret == AVERROR_EOF) {
-+            // EOF - enter drain mode
-+            av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n",
-+                   ret, s->buf_pkt.size, stream_started(s), s->draining);
-+            if (!stream_started(s)) {
-+                av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n");
-+                s->draining = 1;
-+                s->capture.done = 1;
-+                return AVERROR_EOF;
-+            }
-+
-+            if (!s->draining) {
-+                // Calling enqueue with an empty pkt starts drain
-+                av_assert0(s->buf_pkt.size == 0);
-+                ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0, 1);
-+                if (ret) {
-+                    av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret);
-+                    return ret;
-+                }
-+            }
-+            return NQ_DRAINING;
-+        }
-+
-         if (ret < 0) {
--            if (ret == AVERROR(EAGAIN))
--                return ff_v4l2_context_dequeue_frame(capture, frame, 0);
--            else if (ret != AVERROR_EOF)
--                return ret;
-+            av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
-+            return ret;
-         }
-+
-+        xlat_pts_in(avctx, s, &s->buf_pkt);
-     }
- 
--    if (s->draining)
--        goto dequeue;
-+    if ((ret = check_output_streamon(avctx, s)) != 0)
-+        return ret;
- 
--    ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt);
--    if (ret < 0 && ret != AVERROR(EAGAIN))
--        goto fail;
-+    ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt,
-+                                         avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size,
-+                                         1);
- 
--    /* if EAGAIN don't unref packet and try to enqueue in the next iteration */
--    if (ret != AVERROR(EAGAIN))
-+    if (ret == AVERROR(EAGAIN)) {
-+        // Out of input buffers - keep packet
-+        ret = NQ_Q_FULL;
-+    }
-+    else {
-+        // In all other cases we are done with this packet
-         av_packet_unref(&s->buf_pkt);
-+        s->extdata_sent = 1;
- 
--    if (!s->draining) {
--        ret = v4l2_try_start(avctx);
-         if (ret) {
--            /* cant recover */
--            if (ret != AVERROR(ENOMEM))
--                ret = 0;
--            goto fail;
-+            av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret);
-+            return ret;
-+        }
-+    }
-+
-+    // Start if we haven't
-+    {
-+        const int ret2 = v4l2_try_start(avctx);
-+        if (ret2) {
-+            av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2);
-+            ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD;
-+        }
-+    }
-+
-+    return ret;
-+}
-+
-+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-+{
-+    V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-+    int src_rv;
-+    int dst_rv = 1;  // Non-zero (done), non-negative (error) number
-+
-+    do {
-+        src_rv = try_enqueue_src(avctx, s);
-+
-+        // If we got a frame last time and we have nothing to enqueue then
-+        // return now. rv will be AVERROR(EAGAIN) indicating that we want more input
-+        // This should mean that once decode starts we enter a stable state where
-+        // we alternately ask for input and produce output
-+        if (s->req_pkt && src_rv == NQ_SRC_EMPTY)
-+            break;
-+
-+        if (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) {
-+            av_log(avctx, AV_LOG_WARNING, "Poll says src Q has space but enqueue fail");
-+            src_rv = NQ_SRC_EMPTY;  // If we can't enqueue pretend that there is nothing to enqueue
-+        }
-+
-+        // Try to get a new frame if
-+        // (a) we haven't already got one AND
-+        // (b) enqueue returned a status indicating that decode should be attempted
-+        if (dst_rv != 0 && TRY_DQ(src_rv)) {
-+            do {
-+                // Dequeue frame will unref any previous contents of frame
-+                // if it returns success so we don't need an explicit unref
-+                // when discarding
-+                // This returns AVERROR(EAGAIN) if there isn't a frame ready yet
-+                // but there is room in the input Q
-+                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, -1, 1);
-+
-+                if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
-+                    av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
-+                           s->draining, s->capture.done);
-+                else if (dst_rv && dst_rv != AVERROR(EAGAIN))
-+                    av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
-+                           s->draining, s->capture.done, dst_rv);
-+
-+                // Go again if we got a frame that we need to discard
-+            } while (dst_rv == 0 && xlat_pts_out(avctx, s, frame));
-+        }
-+
-+        // Continue trying to enqueue packets if either
-+        // (a) we succeeded last time OR
-+        // (b) enqueue failed due to input Q full AND there is now room
-+    } while (src_rv == NQ_OK || (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) );
-+
-+    // Ensure that the frame contains nothing if we aren't returning a frame
-+    // (might happen when discarding)
-+    if (dst_rv)
-+        av_frame_unref(frame);
-+
-+    // If we got a frame this time ask for a pkt next time
-+    s->req_pkt = (dst_rv == 0);
-+
-+#if 0
-+    if (dst_rv == 0)
-+    {
-+        static int z = 0;
-+        if (++z > 50) {
-+            av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n");
-+            ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
-+            return -1;
-         }
-     }
-+#endif
-+
-+    return dst_rv == 0 ? 0 :
-+        src_rv < 0 ? src_rv :
-+        dst_rv < 0 ? dst_rv :
-+            AVERROR(EAGAIN);
-+}
-+
-+#if 0
-+#include <time.h>
-+static int64_t us_time(void)
-+{
-+    struct timespec ts;
-+    clock_gettime(CLOCK_MONOTONIC, &ts);
-+    return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
-+}
- 
--dequeue:
--    return ff_v4l2_context_dequeue_frame(capture, frame, -1);
--fail:
--    av_packet_unref(&s->buf_pkt);
-+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-+{
-+    int ret;
-+    const int64_t now = us_time();
-+    int64_t done;
-+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
-+    ret = v4l2_receive_frame2(avctx, frame);
-+    done = us_time();
-+    av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret);
-     return ret;
- }
-+#endif
- 
- static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- {
-@@ -185,6 +457,9 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     V4L2m2mPriv *priv = avctx->priv_data;
-     int ret;
- 
-+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
-+    avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
-+
-     ret = ff_v4l2_m2m_create_context(priv, &s);
-     if (ret < 0)
-         return ret;
-@@ -205,6 +480,28 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
-     capture->av_pix_fmt = avctx->pix_fmt;
- 
-+    /* the client requests the codec to generate DRM frames:
-+     *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
-+     *       check the ff_v4l2_buffer_to_avframe conversion function.
-+     *   - the DRM frame format is passed in the DRM frame descriptor layer.
-+     *       check the v4l2_get_drm_frame function.
-+     */
-+    switch (ff_get_format(avctx, avctx->codec->pix_fmts)) {
-+    default:
-+        s->output_drm = 1;
-+        break;
-+    }
-+
-+    s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
-+    if (!s->device_ref) {
-+        ret = AVERROR(ENOMEM);
-+        return ret;
-+    }
-+
-+    ret = av_hwdevice_ctx_init(s->device_ref);
-+    if (ret < 0)
-+        return ret;
-+
-     s->avctx = avctx;
-     ret = ff_v4l2_m2m_codec_init(priv);
-     if (ret) {
-@@ -217,7 +514,53 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- 
- static av_cold int v4l2_decode_close(AVCodecContext *avctx)
- {
--    return ff_v4l2_m2m_codec_end(avctx->priv_data);
-+    int rv;
-+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
-+    rv = ff_v4l2_m2m_codec_end(avctx->priv_data);
-+    av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv);
-+    return rv;
-+}
-+
-+static void v4l2_decode_flush(AVCodecContext *avctx)
-+{
-+    // An alternatve and more drastic form of flush is to simply do this:
-+    //    v4l2_decode_close(avctx);
-+    //    v4l2_decode_init(avctx);
-+    // The downside is that this keeps a decoder open until all the frames
-+    // associated with it have been returned.  This is a bit wasteful on
-+    // possibly limited h/w resources and fails on a Pi for this reason unless
-+    // more GPU mem is allocated than is the default.
-+
-+    V4L2m2mPriv * const priv = avctx->priv_data;
-+    V4L2m2mContext * const s = priv->context;
-+    V4L2Context * const output = &s->output;
-+    V4L2Context * const capture = &s->capture;
-+    int ret, i;
-+
-+    av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
-+
-+    // Reflushing everything is benign, quick and avoids having to worry about
-+    // states like EOS processing so don't try to optimize out (having got it
-+    // wrong once)
-+
-+    ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
-+    if (ret < 0)
-+        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s error: %d\n", output->name, ret);
-+
-+    // V4L2 makes no guarantees about whether decoded frames are flushed or not
-+    // so mark all frames we are tracking to be discarded if they appear
-+    for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i)
-+        s->track_els[i].discard = 1;
-+
-+    // resend extradata
-+    s->extdata_sent = 0;
-+    // clear EOS status vars
-+    s->draining = 0;
-+    output->done = 0;
-+    capture->done = 0;
-+
-+    // Stream on will occur when we actually submit a new frame
-+    av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__);
- }
- 
- #define OFFSET(x) offsetof(V4L2m2mPriv, x)
-@@ -227,9 +570,15 @@ static const AVOption options[] = {
-     V4L_M2M_DEFAULT_OPTS,
-     { "num_capture_buffers", "Number of buffers in the capture context",
-         OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS },
-+    { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS },
-     { NULL},
- };
- 
-+static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
-+    HW_CONFIG_INTERNAL(DRM_PRIME),
-+    NULL
-+};
-+
- #define M2MDEC_CLASS(NAME) \
-     static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
-         .class_name = #NAME "_v4l2m2m_decoder", \
-@@ -250,11 +599,16 @@ static const AVOption options[] = {
-         .init           = v4l2_decode_init, \
-         FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \
-         .close          = v4l2_decode_close, \
-+        .flush          = v4l2_decode_flush, \
-         .bsfs           = bsf_name, \
-         .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
-         .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \
-                           FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \
-         .p.wrapper_name = "v4l2m2m", \
-+        .p.pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
-+                                                         AV_PIX_FMT_NV12, \
-+                                                         AV_PIX_FMT_NONE}, \
-+        .hw_configs     = v4l2_m2m_hw_configs, \
-     }
- 
- M2MDEC(h264,  "H.264", AV_CODEC_ID_H264,       "h264_mp4toannexb");
-
-From 8a5fcd215d3ac07df4807d207e8a337edccffaeb Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 10 Jun 2021 18:46:21 +0100
-Subject: [PATCH 017/186] Fix crash in hw_device_default_name if type not found
- (NONE)
-
----
- fftools/ffmpeg_hw.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c
-index 88fa7824701e..740a5e7153cf 100644
---- a/fftools/ffmpeg_hw.c
-+++ b/fftools/ffmpeg_hw.c
-@@ -75,6 +75,8 @@ static char *hw_device_default_name(enum AVHWDeviceType type)
-     char *name;
-     size_t index_pos;
-     int index, index_limit = 1000;
-+    if (!type_name)
-+        return NULL;
-     index_pos = strlen(type_name);
-     name = av_malloc(index_pos + 4);
-     if (!name)
-
-From 35c7187e199e4042d30165c015f525bfcb377796 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 10 Jun 2021 18:59:18 +0100
-Subject: [PATCH 018/186] Allow v4l2m2m to select non-drm_prime output formats
-
----
- libavcodec/v4l2_buffers.c |  2 +-
- libavcodec/v4l2_m2m_dec.c | 14 ++++++++++----
- 2 files changed, 11 insertions(+), 5 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index a003934ca19e..1ca1128db6f4 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -524,7 +524,7 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
-                 offset += dst_stride * out->context->height;
-             }
-             if (offset > out->plane_info[0].length) {
--                av_log(NULL, AV_LOG_ERROR, "%s: Plane total %d > buffer size %d\n", __func__, offset, out->plane_info[0].length);
-+                av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length);
-                 return -1;
-             }
- 
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 7f6033ac2c41..a4b5a4e7e991 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -455,10 +455,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     V4L2Context *capture, *output;
-     V4L2m2mContext *s;
-     V4L2m2mPriv *priv = avctx->priv_data;
-+    int gf_pix_fmt;
-     int ret;
- 
-     av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
--    avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
- 
-     ret = ff_v4l2_m2m_create_context(priv, &s);
-     if (ret < 0)
-@@ -486,10 +486,15 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-      *   - the DRM frame format is passed in the DRM frame descriptor layer.
-      *       check the v4l2_get_drm_frame function.
-      */
--    switch (ff_get_format(avctx, avctx->codec->pix_fmts)) {
--    default:
-+
-+    gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
-+    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s); get_format requested=%d (%s)\n",
-+           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
-+
-+    s->output_drm = 0;
-+    if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
-+        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
-         s->output_drm = 1;
--        break;
-     }
- 
-     s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
-@@ -607,6 +612,7 @@ static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
-         .p.wrapper_name = "v4l2m2m", \
-         .p.pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
-                                                          AV_PIX_FMT_NV12, \
-+                                                         AV_PIX_FMT_YUV420P, \
-                                                          AV_PIX_FMT_NONE}, \
-         .hw_configs     = v4l2_m2m_hw_configs, \
-     }
-
-From d5bfb5014aa4692820903ef7287bc0319ebc139f Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 10 Jun 2021 18:59:38 +0100
-Subject: [PATCH 019/186] Fix YUV420P output from v4l2m2m
-
-Also put get_width get_height inlines in header as they are generally
-useful.
----
- libavcodec/v4l2_buffers.c | 12 ++++++------
- libavcodec/v4l2_context.c | 22 ++++++----------------
- libavcodec/v4l2_m2m.h     | 12 ++++++++++++
- 3 files changed, 24 insertions(+), 22 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 1ca1128db6f4..f4c11ca8d06d 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -425,17 +425,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
-     case AV_PIX_FMT_NV21:
-         if (avbuf->num_planes > 1)
-             break;
--        frame->linesize[1] = avbuf->plane_info[0].bytesperline;
--        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
-+        frame->linesize[1] = frame->linesize[0];
-+        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
-         break;
- 
-     case AV_PIX_FMT_YUV420P:
-         if (avbuf->num_planes > 1)
-             break;
--        frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1;
--        frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1;
--        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
--        frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2);
-+        frame->linesize[1] = frame->linesize[0] / 2;
-+        frame->linesize[2] = frame->linesize[1];
-+        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
-+        frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2;
-         break;
- 
-     default:
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index be76068af32d..6fe258662786 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -55,16 +55,6 @@ static inline AVCodecContext *logger(V4L2Context *ctx)
-     return ctx_to_m2mctx(ctx)->avctx;
- }
- 
--static inline unsigned int v4l2_get_width(struct v4l2_format *fmt)
--{
--    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
--}
--
--static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
--{
--    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
--}
--
- static AVRational v4l2_get_sar(V4L2Context *ctx)
- {
-     struct AVRational sar = { 0, 1 };
-@@ -96,8 +86,8 @@ static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2
-     if (ret)
-         av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n",
-             ctx->name,
--            v4l2_get_width(fmt1), v4l2_get_height(fmt1),
--            v4l2_get_width(fmt2), v4l2_get_height(fmt2));
-+            ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1),
-+            ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2));
- 
-     return ret;
- }
-@@ -195,8 +185,8 @@ static int do_source_change(V4L2m2mContext * const s)
- 
-     reinit = v4l2_resolution_changed(&s->capture, &cap_fmt);
-     if (reinit) {
--        s->capture.height = v4l2_get_height(&cap_fmt);
--        s->capture.width = v4l2_get_width(&cap_fmt);
-+        s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
-+        s->capture.width = ff_v4l2_get_format_width(&cap_fmt);
-     }
-     s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
- 
-@@ -973,8 +963,8 @@ static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers
-     av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name,
-         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat),
-         req.count,
--        v4l2_get_width(&ctx->format),
--        v4l2_get_height(&ctx->format),
-+        ff_v4l2_get_format_width(&ctx->format),
-+        ff_v4l2_get_format_height(&ctx->format),
-         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage,
-         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline);
- 
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 24a9c9486468..8f054f2f50f9 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -160,4 +160,16 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx);
-  */
- int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
- 
-+
-+static inline unsigned int ff_v4l2_get_format_width(struct v4l2_format *fmt)
-+{
-+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
-+}
-+
-+static inline unsigned int ff_v4l2_get_format_height(struct v4l2_format *fmt)
-+{
-+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
-+}
-+
-+
- #endif /* AVCODEC_V4L2_M2M_H */
-
-From 43b65c3e3d1c8e2c35694764b7ee93e7dbf75a1a Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 10 Jun 2021 19:23:44 +0100
-Subject: [PATCH 020/186] Report buffer overflows in v4l2m2m
-
----
- libavcodec/v4l2_buffers.c | 14 ++++++++++----
- libavcodec/v4l2_context.c |  5 ++++-
- 2 files changed, 14 insertions(+), 5 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index f4c11ca8d06d..de31f7ced93c 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -364,6 +364,7 @@ static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
- static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset)
- {
-     unsigned int bytesused, length;
-+    int rv = 0;
- 
-     if (plane >= out->num_planes)
-         return AVERROR(EINVAL);
-@@ -371,11 +372,16 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i
-     length = out->plane_info[plane].length;
-     bytesused = FFMIN(size+offset, length);
- 
--    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset));
-+    if (size > length - offset) {
-+        size = length - offset;
-+        rv = AVERROR(ENOMEM);
-+    }
-+
-+    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size);
- 
-     set_buf_length(out, plane, bytesused, length);
- 
--    return 0;
-+    return rv;
- }
- 
- static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf)
-@@ -630,7 +636,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-     }
- 
-     ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen);
--    if (ret)
-+    if (ret && ret != AVERROR(ENOMEM))
-         return ret;
- 
-     v4l2_set_pts(out, pkt->pts, no_rescale_pts);
-@@ -638,7 +644,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-     if (pkt->flags & AV_PKT_FLAG_KEY)
-         out->flags = V4L2_BUF_FLAG_KEYFRAME;
- 
--    return 0;
-+    return ret;
- }
- 
- int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 6fe258662786..81aced0c2b5d 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -824,7 +824,10 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
-         return AVERROR(EAGAIN);
- 
-     ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, no_rescale_pts);
--    if (ret)
-+    if (ret == AVERROR(ENOMEM))
-+        av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
-+               __func__, pkt->size, avbuf->planes[0].length);
-+    else if (ret)
-         return ret;
- 
-     return ff_v4l2_buffer_enqueue(avbuf);
-
-From b02c14a2e1f9890370eb9d459feccacb7e652e82 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 14 Jun 2021 11:55:16 +0100
-Subject: [PATCH 021/186] Increase V4L2 H264 stateful coded buffer size
-
-Try to set a min size of frame size / 2 for bitbuffers passed to V4l2.
-This fixes a few streams that have large I-frames.  You would hope
-Annex-A gave useful minCR so an appropriate size could be calculated
-but it doesn't really.  It gives good guidance for bits required over
-time but the instantaneous limits are very weak so it is possible
-that even this won't be enough.  The correct long term solution would
-be to have resizable dmabufs but that is a greter rewrite than seems
-sensible now.
----
- libavcodec/v4l2_context.c | 24 +++++++++++++++++++++++-
- libavcodec/v4l2_context.h |  6 ++++++
- libavcodec/v4l2_m2m_dec.c | 24 ++++++++++++++++++++++++
- 3 files changed, 53 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 81aced0c2b5d..a17ae027a666 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -902,7 +902,29 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
- 
- int ff_v4l2_context_set_format(V4L2Context* ctx)
- {
--    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
-+    int ret;
-+
-+    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
-+    if (ret != 0)
-+        return ret;
-+
-+    // Check returned size against min size and if smaller have another go
-+    // Only worry about plane[0] as this is meant to enforce limits for
-+    // encoded streams where we might know a bit more about the shape
-+    // than the driver
-+    if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) {
-+        if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage)
-+            return 0;
-+        ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size;
-+    }
-+    else {
-+        if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage)
-+            return 0;
-+        ctx->format.fmt.pix.sizeimage = ctx->min_buf_size;
-+    }
-+
-+    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
-+    return ret;
- }
- 
- void ff_v4l2_context_release(V4L2Context* ctx)
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 59009d11d1e7..37b0431400d8 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -75,6 +75,12 @@ typedef struct V4L2Context {
-     AVRational sample_aspect_ratio;
-     struct v4l2_rect selection;
- 
-+    /**
-+     * If the default size of buffer is less than this then try to
-+     * set to this.
-+     */
-+    uint32_t min_buf_size;
-+
-     /**
-      * Indexed array of pointers to V4L2Buffers
-      */
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index a4b5a4e7e991..1851acbc93fe 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -450,6 +450,27 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- }
- #endif
- 
-+static uint32_t max_coded_size(const AVCodecContext * const avctx)
-+{
-+    uint32_t wxh = avctx->coded_width * avctx->coded_height;
-+    uint32_t size;
-+
-+    // Currently the only thing we try to set our own limits for is H264
-+    if (avctx->codec_id != AV_CODEC_ID_H264)
-+        return 0;
-+
-+    size = wxh * 3 / 2;
-+    // H.264 Annex A table A-1 gives minCR which is either 2 or 4
-+    // unfortunately that doesn't yield an actually useful limit
-+    // and it should be noted that frame 0 is special cased to allow
-+    // a bigger number which really isn't helpful for us. So just pick
-+    // frame_size / 2
-+    size /= 2;
-+    // Add 64k to allow for any overheads and/or encoder hopefulness
-+    // with small WxH
-+    return size + (1 << 16);
-+}
-+
- static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- {
-     V4L2Context *capture, *output;
-@@ -460,6 +481,7 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- 
-     av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
- 
-+    av_log(avctx, AV_LOG_INFO, "level=%d\n", avctx->level);
-     ret = ff_v4l2_m2m_create_context(priv, &s);
-     if (ret < 0)
-         return ret;
-@@ -476,9 +498,11 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- 
-     output->av_codec_id = avctx->codec_id;
-     output->av_pix_fmt  = AV_PIX_FMT_NONE;
-+    output->min_buf_size = max_coded_size(avctx);
- 
-     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
-     capture->av_pix_fmt = avctx->pix_fmt;
-+    capture->min_buf_size = 0;
- 
-     /* the client requests the codec to generate DRM frames:
-      *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
-
-From 1de32953e7f28a262ecb4727eaac0cfd3588379e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 28 Jun 2021 12:13:35 +0100
-Subject: [PATCH 022/186] Fix raw video s.t. it respects any remaining cropping
-
-This fixes the long standing CONFWIN_A conformance test failure for drm.
----
- libavcodec/rawenc.c       |  32 ++++++++---
- libavutil/hwcontext_drm.c | 112 ++++++++++++++++++++++++++++++++++++--
- 2 files changed, 130 insertions(+), 14 deletions(-)
-
-diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c
-index 594a77c42a64..8ca0379e1219 100644
---- a/libavcodec/rawenc.c
-+++ b/libavcodec/rawenc.c
-@@ -124,32 +124,41 @@ static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
- 
- 
- static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
--                      const AVFrame *frame, int *got_packet)
-+                      const AVFrame *src_frame, int *got_packet)
- {
-     int ret;
-+    AVFrame * frame = NULL;
- 
- #if CONFIG_SAND
--    if (av_rpi_is_sand_frame(frame)) {
--        ret = av_rpi_is_sand8_frame(frame) ? raw_sand8_as_yuv420(avctx, pkt, frame) :
--            av_rpi_is_sand16_frame(frame) ? raw_sand16_as_yuv420(avctx, pkt, frame) :
--            av_rpi_is_sand30_frame(frame) ? raw_sand30_as_yuv420(avctx, pkt, frame) : -1;
-+    if (av_rpi_is_sand_frame(src_frame)) {
-+        ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) :
-+            av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) :
-+            av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1;
-         *got_packet = (ret == 0);
-         return ret;
-     }
- #endif
- 
-+    if ((frame = av_frame_clone(src_frame)) == NULL) {
-+        ret = AVERROR(ENOMEM);
-+        goto fail;
-+    }
-+
-+    if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0)
-+        goto fail;
-+
-     ret = av_image_get_buffer_size(frame->format,
-                                        frame->width, frame->height, 1);
-     if (ret < 0)
--        return ret;
-+        goto fail;
- 
-     if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0)
--        return ret;
-+        goto fail;
-     if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
-                                        (const uint8_t **)frame->data, frame->linesize,
-                                        frame->format,
-                                        frame->width, frame->height, 1)) < 0)
--        return ret;
-+        goto fail;
- 
-     if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 &&
-        frame->format   == AV_PIX_FMT_YUYV422) {
-@@ -165,8 +174,15 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
-             AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16);
-         }
-     }
-+    pkt->flags |= AV_PKT_FLAG_KEY;
-+    av_frame_free(&frame);
-     *got_packet = 1;
-     return 0;
-+
-+fail:
-+    av_frame_free(&frame);
-+    *got_packet = 0;
-+    return ret;
- }
- 
- const FFCodec ff_rawvideo_encoder = {
-diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c
-index 7a9fdbd263d4..baf18920fa14 100644
---- a/libavutil/hwcontext_drm.c
-+++ b/libavutil/hwcontext_drm.c
-@@ -21,6 +21,7 @@
- #include <fcntl.h>
- #include <sys/mman.h>
- #include <unistd.h>
-+#include <sys/ioctl.h>
- 
- /* This was introduced in version 4.6. And may not exist all without an
-  * optional package. So to prevent a hard dependency on needing the Linux
-@@ -31,6 +32,7 @@
- #endif
- 
- #include <drm.h>
-+#include <libdrm/drm_fourcc.h>
- #include <xf86drm.h>
- 
- #include "avassert.h"
-@@ -38,7 +40,9 @@
- #include "hwcontext_drm.h"
- #include "hwcontext_internal.h"
- #include "imgutils.h"
--
-+#if CONFIG_SAND
-+#include "libavutil/rpi_sand_fns.h"
-+#endif
- 
- static void drm_device_free(AVHWDeviceContext *hwdev)
- {
-@@ -53,6 +57,11 @@ static int drm_device_create(AVHWDeviceContext *hwdev, const char *device,
-     AVDRMDeviceContext *hwctx = hwdev->hwctx;
-     drmVersionPtr version;
- 
-+    if (device == NULL) {
-+        hwctx->fd = -1;
-+        return 0;
-+    }
-+
-     hwctx->fd = open(device, O_RDWR);
-     if (hwctx->fd < 0)
-         return AVERROR(errno);
-@@ -139,6 +148,8 @@ static int drm_map_frame(AVHWFramesContext *hwfc,
-     if (flags & AV_HWFRAME_MAP_WRITE)
-         mmap_prot |= PROT_WRITE;
- 
-+    if (dst->format == AV_PIX_FMT_NONE)
-+        dst->format = hwfc->sw_format;
- #if HAVE_LINUX_DMA_BUF_H
-     if (flags & AV_HWFRAME_MAP_READ)
-         map->sync_flags |= DMA_BUF_SYNC_READ;
-@@ -185,6 +196,23 @@ static int drm_map_frame(AVHWFramesContext *hwfc,
- 
-     dst->width  = src->width;
-     dst->height = src->height;
-+    dst->crop_top    = src->crop_top;
-+    dst->crop_bottom = src->crop_bottom;
-+    dst->crop_left   = src->crop_left;
-+    dst->crop_right  = src->crop_right;
-+
-+#if CONFIG_SAND
-+    // Rework for sand frames
-+    if (av_rpi_is_sand_frame(dst)) {
-+        // As it stands the sand formats hold stride2 in linesize[3]
-+        // linesize[0] & [1] contain stride1 which is always 128 for everything we do
-+        // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1]
-+        dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier);
-+        dst->linesize[0] = 128;
-+        dst->linesize[1] = 128;
-+        // *** Are we sure src->height is actually what we want ???
-+    }
-+#endif
- 
-     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
-                                 &drm_unmap_frame, map);
-@@ -212,7 +240,15 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx,
-     if (!pix_fmts)
-         return AVERROR(ENOMEM);
- 
--    pix_fmts[0] = ctx->sw_format;
-+    // **** Offer native sand too ????
-+    pix_fmts[0] =
-+#if CONFIG_SAND
-+        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
-+            AV_PIX_FMT_YUV420P :
-+        ctx->sw_format == AV_PIX_FMT_RPI4_10 ?
-+            AV_PIX_FMT_YUV420P10LE :
-+#endif
-+            ctx->sw_format;
-     pix_fmts[1] = AV_PIX_FMT_NONE;
- 
-     *formats = pix_fmts;
-@@ -231,18 +267,79 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc,
-     map = av_frame_alloc();
-     if (!map)
-         return AVERROR(ENOMEM);
--    map->format = dst->format;
- 
-+    // Map to default
-+    map->format = AV_PIX_FMT_NONE;
-     err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
-     if (err)
-         goto fail;
- 
--    map->width  = dst->width;
--    map->height = dst->height;
-+#if 0
-+    av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__,
-+           hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE,
-+           map->width, map->height,
-+           map->linesize[0],
-+           map->linesize[1],
-+           map->linesize[2],
-+           map->linesize[3],
-+           dst->width, dst->height,
-+           dst->linesize[0],
-+           dst->linesize[1],
-+           dst->linesize[2]);
-+#endif
-+#if CONFIG_SAND
-+    if (av_rpi_is_sand_frame(map)) {
-+        // Preserve crop - later ffmpeg code assumes that we have in that it
-+        // overwrites any crop that we create with the old values
-+        const unsigned int w = FFMIN(dst->width, map->width);
-+        const unsigned int h = FFMIN(dst->height, map->height);
-+
-+        if (map->format == AV_PIX_FMT_RPI4_8 && dst->format == AV_PIX_FMT_YUV420P) {
-+            av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
-+                                     map->data[0],
-+                                     128, stride2,
-+                                     0, 0, w, h);
-+            av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
-+                                     dst->data[2], dst->linesize[2],
-+                                     map->data[1],
-+                                     128, stride2,
-+                                     0, 0, w / 2, h / 2);
-+        }
-+        else if (map->format == AV_PIX_FMT_RPI4_10 && dst->format == AV_PIX_FMT_YUV420P10LE) {
-+            av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
-+                                     map->data[0],
-+                                     128, stride2,
-+                                     0, 0, w, h);
-+            av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
-+                                     dst->data[2], dst->linesize[2],
-+                                     map->data[1],
-+                                     128, stride2,
-+                                     0, 0, w / 2, h / 2);
-+        }
-+        else
-+        {
-+            av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
-+            err = AVERROR(EINVAL);
-+            goto fail;
-+        }
-+
-+        dst->width = w;
-+        dst->height = h;
-+    }
-+    else
-+#endif
-+    {
-+        // Kludge mapped h/w s.t. frame_copy works
-+        map->width  = dst->width;
-+        map->height = dst->height;
-+        err = av_frame_copy(dst, map);
-+    }
- 
--    err = av_frame_copy(dst, map);
-     if (err)
-+    {
-+        av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__);
-         goto fail;
-+    }
- 
-     err = 0;
- fail:
-@@ -257,7 +354,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc,
-     int err;
- 
-     if (src->width > hwfc->width || src->height > hwfc->height)
-+    {
-+        av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height);
-         return AVERROR(EINVAL);
-+    }
- 
-     map = av_frame_alloc();
-     if (!map)
-
-From 2214c119c420bb213917f1c6f85cb82d905772dc Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 13 Aug 2021 15:38:28 +0100
-Subject: [PATCH 023/186] Set frame interlace from V4L2 buffer field
-
----
- libavcodec/v4l2_buffers.c | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index de31f7ced93c..97b8eb1db362 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -222,6 +222,16 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
-     return AVCOL_TRC_UNSPECIFIED;
- }
- 
-+static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf)
-+{
-+    return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
-+}
-+
-+static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
-+{
-+    return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
-+}
-+
- static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
- {
-     AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
-@@ -576,6 +586,8 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_resc
-     frame->color_trc = v4l2_get_color_trc(avbuf);
-     frame->pts = v4l2_get_pts(avbuf, no_rescale_pts);
-     frame->pkt_dts = AV_NOPTS_VALUE;
-+    frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf);
-+    frame->top_field_first = v4l2_buf_is_top_first(avbuf);
- 
-     /* these values are updated also during re-init in v4l2_process_driver_event */
-     frame->height = ctx->height;
-
-From b81ad61c52f3b35cd1b1b4f0ea715758c524522b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 13 Aug 2021 16:11:53 +0100
-Subject: [PATCH 024/186] Fix V4L2 stateful to avoid crash if flush before
- start
-
----
- libavcodec/v4l2_context.c | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index a17ae027a666..eb901e8fabf6 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -713,6 +713,10 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p)
- static void flush_all_buffers_status(V4L2Context* const ctx)
- {
-     int i;
-+
-+    if (!ctx->bufrefs)
-+        return;
-+
-     for (i = 0; i < ctx->num_buffers; ++i) {
-         struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
-         if (buf->status == V4L2BUF_IN_DRIVER)
-
-From 2d975c0fbcb97b930b1e7164f439830ab2594d1d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 9 Sep 2021 17:44:13 +0100
-Subject: [PATCH 025/186] Copy properties from frame to v4l2 buffer
-
-Now copies all the properties in ff_v4l2_buffer_avframe_to_buf that
-ff_v4l2_buffer_buf_to_avframe copies
----
- libavcodec/v4l2_buffers.c | 126 ++++++++++++++++++++++++++++++++++++++
- 1 file changed, 126 insertions(+)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 97b8eb1db362..126d2a17f4fe 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -128,6 +128,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
-     return AVCOL_PRI_UNSPECIFIED;
- }
- 
-+static void v4l2_set_color(V4L2Buffer *buf,
-+                           const enum AVColorPrimaries avcp,
-+                           const enum AVColorSpace avcs,
-+                           const enum AVColorTransferCharacteristic avxc)
-+{
-+    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
-+    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
-+    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
-+
-+    switch (avcp) {
-+    case AVCOL_PRI_BT709:
-+        cs = V4L2_COLORSPACE_REC709;
-+        ycbcr = V4L2_YCBCR_ENC_709;
-+        break;
-+    case AVCOL_PRI_BT470M:
-+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
-+        ycbcr = V4L2_YCBCR_ENC_601;
-+        break;
-+    case AVCOL_PRI_BT470BG:
-+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
-+        break;
-+    case AVCOL_PRI_SMPTE170M:
-+        cs = V4L2_COLORSPACE_SMPTE170M;
-+        break;
-+    case AVCOL_PRI_SMPTE240M:
-+        cs = V4L2_COLORSPACE_SMPTE240M;
-+        break;
-+    case AVCOL_PRI_BT2020:
-+        cs = V4L2_COLORSPACE_BT2020;
-+        break;
-+    case AVCOL_PRI_SMPTE428:
-+    case AVCOL_PRI_SMPTE431:
-+    case AVCOL_PRI_SMPTE432:
-+    case AVCOL_PRI_EBU3213:
-+    case AVCOL_PRI_RESERVED:
-+    case AVCOL_PRI_FILM:
-+    case AVCOL_PRI_UNSPECIFIED:
-+    default:
-+        break;
-+    }
-+
-+    switch (avcs) {
-+    case AVCOL_SPC_RGB:
-+        cs = V4L2_COLORSPACE_SRGB;
-+        break;
-+    case AVCOL_SPC_BT709:
-+        cs = V4L2_COLORSPACE_REC709;
-+        break;
-+    case AVCOL_SPC_FCC:
-+        cs = V4L2_COLORSPACE_470_SYSTEM_M;
-+        break;
-+    case AVCOL_SPC_BT470BG:
-+        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
-+        break;
-+    case AVCOL_SPC_SMPTE170M:
-+        cs = V4L2_COLORSPACE_SMPTE170M;
-+        break;
-+    case AVCOL_SPC_SMPTE240M:
-+        cs = V4L2_COLORSPACE_SMPTE240M;
-+        break;
-+    case AVCOL_SPC_BT2020_CL:
-+        cs = V4L2_COLORSPACE_BT2020;
-+        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
-+        break;
-+    case AVCOL_SPC_BT2020_NCL:
-+        cs = V4L2_COLORSPACE_BT2020;
-+        break;
-+    default:
-+        break;
-+    }
-+
-+    switch (xfer) {
-+    case AVCOL_TRC_BT709:
-+        xfer = V4L2_XFER_FUNC_709;
-+        break;
-+    case AVCOL_TRC_IEC61966_2_1:
-+        xfer = V4L2_XFER_FUNC_SRGB;
-+        break;
-+    case AVCOL_TRC_SMPTE240M:
-+        xfer = V4L2_XFER_FUNC_SMPTE240M;
-+        break;
-+    case AVCOL_TRC_SMPTE2084:
-+        xfer = V4L2_XFER_FUNC_SMPTE2084;
-+        break;
-+    default:
-+        break;
-+    }
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
-+        buf->context->format.fmt.pix_mp.colorspace = cs;
-+        buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr;
-+        buf->context->format.fmt.pix_mp.xfer_func = xfer;
-+    } else {
-+        buf->context->format.fmt.pix.colorspace = cs;
-+        buf->context->format.fmt.pix.ycbcr_enc = ycbcr;
-+        buf->context->format.fmt.pix.xfer_func = xfer;
-+    }
-+}
-+
- static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
- {
-     enum v4l2_quantization qt;
-@@ -146,6 +245,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
-      return AVCOL_RANGE_UNSPECIFIED;
- }
- 
-+static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr)
-+{
-+    const enum v4l2_quantization q =
-+        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
-+        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
-+            V4L2_QUANTIZATION_DEFAULT;
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
-+        buf->context->format.fmt.pix_mp.quantization = q;
-+    } else {
-+        buf->context->format.fmt.pix.quantization = q;
-+    }
-+}
-+
- static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
- {
-     enum v4l2_ycbcr_encoding ycbcr;
-@@ -232,6 +345,12 @@ static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
-     return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
- }
- 
-+static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
-+{
-+    buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
-+        is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
-+}
-+
- static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
- {
-     AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
-@@ -561,7 +680,14 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
- 
- int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
- {
-+    out->buf.flags = frame->key_frame ? (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME) : (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME);
-+    // Beware that colour info is held in format rather than the actual
-+    // v4l2 buffer struct so this may not be as useful as you might hope
-+    v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
-+    v4l2_set_color_range(out, frame->color_range);
-+    // PTS & interlace are buffer vars
-     v4l2_set_pts(out, frame->pts, 0);
-+    v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first);
- 
-     return v4l2_buffer_swframe_to_buf(frame, out);
- }
-
-From 29e4140983c9922f7375153d5ba515bb70b047be Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 17 Nov 2021 16:49:01 +0000
-Subject: [PATCH 026/186] ffmpeg: Do not inc DTS on no decode output
-
-V4L2 H264 decode has long latency and sometimes spits out a long stream
-of output without input. In this case incrementing DTS is wrong. There
-may be cases where the condition as written is correct so only "fix" in
-the cases which cause problems
----
- fftools/ffmpeg.c | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
-index 719463016216..04bea4ef4fe9 100644
---- a/fftools/ffmpeg.c
-+++ b/fftools/ffmpeg.c
-@@ -2612,7 +2612,12 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
-         case AVMEDIA_TYPE_VIDEO:
-             ret = decode_video    (ist, repeating ? NULL : avpkt, &got_output, &duration_pts, !pkt,
-                                    &decode_failed);
--            if (!repeating || !pkt || got_output) {
-+            // Pi: Do not inc dts if no_cvt_hw set
-+            // V4L2 H264 decode has long latency and sometimes spits out a long
-+            // stream of output without input. In this case incrementing DTS is wrong.
-+            // There may be cases where the condition as written is correct so only
-+            // "fix" in the cases which cause problems
-+            if (!repeating || !pkt || (got_output && !no_cvt_hw)) {
-                 if (pkt && pkt->duration) {
-                     duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
-                 } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
-
-From eebda1bffbbf81eb486665c73ace4a86303c1e69 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 17 Nov 2021 17:32:59 +0000
-Subject: [PATCH 027/186] v4l2_m2m_dec: Adjust timebase if H264
-
-Adjust AVCodecContext time_base if H264 in the same way that the
-software decoder does.
----
- libavcodec/v4l2_m2m_dec.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 1851acbc93fe..aa1e5c159720 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -481,6 +481,16 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- 
-     av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
- 
-+    if (avctx->codec_id == AV_CODEC_ID_H264) {
-+        if (avctx->ticks_per_frame == 1) {
-+            if(avctx->time_base.den < INT_MAX/2) {
-+                avctx->time_base.den *= 2;
-+            } else
-+                avctx->time_base.num /= 2;
-+        }
-+        avctx->ticks_per_frame = 2;
-+    }
-+
-     av_log(avctx, AV_LOG_INFO, "level=%d\n", avctx->level);
-     ret = ff_v4l2_m2m_create_context(priv, &s);
-     if (ret < 0)
-
-From 4e12f09479a88b648f17fad0e475fdb60ac93541 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 17 Nov 2021 17:38:27 +0000
-Subject: [PATCH 028/186] v4l2_m2m_dec: Produce best guess PTSs if none
- supplied
-
-Filter scheduling gets confused by missing PTSs and makes poor guesses
-more often than not.  Try to generate plausible timestamps where we are
-missing them.
----
- libavcodec/v4l2_m2m.h     | 12 ++++++++
- libavcodec/v4l2_m2m_dec.c | 64 +++++++++++++++++++++++++++++++++++++--
- 2 files changed, 74 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 8f054f2f50f9..82feb0afdbe3 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -52,6 +52,16 @@ typedef struct V4L2m2mTrackEl {
-     int64_t track_pts;
- } V4L2m2mTrackEl;
- 
-+typedef struct pts_stats_s
-+{
-+    void * logctx;
-+    const char * name;  // For debug
-+    unsigned int last_count;
-+    unsigned int last_interval;
-+    int64_t last_pts;
-+    int64_t guess;
-+} pts_stats_t;
-+
- typedef struct V4L2m2mContext {
-     char devname[PATH_MAX];
-     int fd;
-@@ -91,6 +101,8 @@ typedef struct V4L2m2mContext {
-     unsigned int track_no;
-     V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
- 
-+    pts_stats_t pts_stat;
-+
-     /* req pkt */
-     int req_pkt;
- 
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index aa1e5c159720..a5a2afbd273c 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -42,6 +42,62 @@
- #include "v4l2_m2m.h"
- #include "v4l2_fmt.h"
- 
-+// Pick 64 for max last count - that is >1sec at 60fps
-+#define STATS_LAST_COUNT_MAX 64
-+#define STATS_INTERVAL_MAX (1 << 30)
-+
-+static int64_t pts_stats_guess(const pts_stats_t * const stats)
-+{
-+    if (stats->last_pts == AV_NOPTS_VALUE ||
-+            stats->last_interval == 0 ||
-+            stats->last_count >= STATS_LAST_COUNT_MAX)
-+        return AV_NOPTS_VALUE;
-+    return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
-+}
-+
-+static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
-+{
-+    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
-+        if (stats->last_count < STATS_LAST_COUNT_MAX)
-+            ++stats->last_count;
-+        return;
-+    }
-+
-+    if (stats->last_pts != AV_NOPTS_VALUE) {
-+        const int64_t interval = pts - stats->last_pts;
-+
-+        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
-+            stats->last_count >= STATS_LAST_COUNT_MAX) {
-+            if (stats->last_interval != 0)
-+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
-+                       __func__, stats->name, interval, stats->last_count);
-+            stats->last_interval = 0;
-+        }
-+        else {
-+            const int64_t frame_time = interval / (int64_t)stats->last_count;
-+
-+            if (frame_time != stats->last_interval)
-+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
-+                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
-+            stats->last_interval = frame_time;
-+        }
-+    }
-+
-+    stats->last_pts = pts;
-+    stats->last_count = 1;
-+}
-+
-+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
-+{
-+    *stats = (pts_stats_t){
-+        .logctx = logctx,
-+        .name = name,
-+        .last_count = 1,
-+        .last_interval = 0,
-+        .last_pts = AV_NOPTS_VALUE
-+    };
-+}
-+
- static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
- {
-     int ret;
-@@ -244,9 +300,11 @@ xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *cons
-         return -1;
-     }
- 
--    frame->best_effort_timestamp = frame->pts;
-+    pts_stats_add(&s->pts_stat, frame->pts);
-+
-+    frame->best_effort_timestamp = pts_stats_guess(&s->pts_stat);
-     frame->pkt_dts               = frame->pts;  // We can't emulate what s/w does in a useful manner?
--    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 ", DTS=%" PRId64 "\n", frame->pts, frame->pkt_dts);
-+    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
-     return 0;
- }
- 
-@@ -496,6 +554,8 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     if (ret < 0)
-         return ret;
- 
-+    pts_stats_init(&s->pts_stat, avctx, "decoder");
-+
-     capture = &s->capture;
-     output = &s->output;
- 
-
-From 3a0fa83da24d5ec8739acf9f4cc713b7b1e49038 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 17 Nov 2021 17:59:27 +0000
-Subject: [PATCH 029/186] v4l2_m2m_dec: Try harder to get an initial frame
-
-If the input Q is full then wait on a short timeout for a capture frame
-rather than stuffing yet still another frame into the input if we could
-do that first. This attempts to restrict the sometimes daft initial
-buffering that ends up confusing the rest of the system.
----
- libavcodec/v4l2_context.c | 2 +-
- libavcodec/v4l2_m2m_dec.c | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index eb901e8fabf6..ee5dc7b8d41d 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -381,7 +381,7 @@ static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
- start:
-     if (is_capture) {
-         /* no need to listen to requests for more input while draining */
--        if (ctx_to_m2mctx(ctx)->draining)
-+        if (ctx_to_m2mctx(ctx)->draining || timeout > 0)
-             pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
-     } else {
-         pfd.events =  POLLOUT | POLLWRNORM;
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index a5a2afbd273c..b49f470c0a1e 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -442,7 +442,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-                 // when discarding
-                 // This returns AVERROR(EAGAIN) if there isn't a frame ready yet
-                 // but there is room in the input Q
--                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, -1, 1);
-+                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, src_rv == NQ_Q_FULL ? 100 : -1, 1);
- 
-                 if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
-                     av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
-
-From 33aa90c53d570527c8a8da70d6c805a5431d2f86 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 17 Nov 2021 18:04:56 +0000
-Subject: [PATCH 030/186] Add a V4L2 M2M deinterlace filter
-
-Add a V4L2 deinterlace filter that will accept DRMPRIME frames.
-
-Multiple people have contributed to this:
-Jernej Skrabec <jernej.skrabec@siol.net>
-Alex Bee <knaerzche@gmail.com>
-popcornmix <popcornmix@gmail.com>
-John Cox <jc@kynesim.co.uk>
-
-There is an unknown delay through the filter of typically one or three
-fields which translates to 1 or 2 frames. Frames that are delayed are
-lost at end of stream as the V4L2 filter has no flush control.
----
- libavcodec/v4l2_context.c            |    4 +-
- libavfilter/Makefile                 |    1 +
- libavfilter/allfilters.c             |    1 +
- libavfilter/vf_deinterlace_v4l2m2m.c | 1269 ++++++++++++++++++++++++++
- 4 files changed, 1273 insertions(+), 2 deletions(-)
- create mode 100644 libavfilter/vf_deinterlace_v4l2m2m.c
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index ee5dc7b8d41d..440dfaaba551 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -498,10 +498,10 @@ dequeue:
-             return NULL;
-         }
-         --ctx->q_count;
--        av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d\n",
-+        av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d field=%d\n",
-                ctx->name, buf.index,
-                buf.timestamp.tv_sec, buf.timestamp.tv_usec,
--               ctx->q_count, ++ctx->dq_count);
-+               ctx->q_count, ++ctx->dq_count, buf.field);
- 
-         avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
-         avbuf->status = V4L2BUF_AVAILABLE;
-diff --git a/libavfilter/Makefile b/libavfilter/Makefile
-index c14fc995a0b5..0e7b5856bdd4 100644
---- a/libavfilter/Makefile
-+++ b/libavfilter/Makefile
-@@ -262,6 +262,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER)                += vf_neighbor.o
- OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
- OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_vpp_qsv.o
- OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o vaapi_vpp.o
-+OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER)    += vf_deinterlace_v4l2m2m.o
- OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
- OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
- OBJS-$(CONFIG_DENOISE_VAAPI_FILTER)          += vf_misc_vaapi.o vaapi_vpp.o
-diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
-index b990a001529b..357ff61ca803 100644
---- a/libavfilter/allfilters.c
-+++ b/libavfilter/allfilters.c
-@@ -248,6 +248,7 @@ extern const AVFilter ff_vf_derain;
- extern const AVFilter ff_vf_deshake;
- extern const AVFilter ff_vf_deshake_opencl;
- extern const AVFilter ff_vf_despill;
-+extern const AVFilter ff_vf_deinterlace_v4l2m2m;
- extern const AVFilter ff_vf_detelecine;
- extern const AVFilter ff_vf_dilation;
- extern const AVFilter ff_vf_dilation_opencl;
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-new file mode 100644
-index 000000000000..1a933b7e0a5f
---- /dev/null
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -0,0 +1,1269 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+/**
-+ * @file
-+ * deinterlace video filter - V4L2 M2M
-+ */
-+
-+#include <drm_fourcc.h>
-+
-+#include <linux/videodev2.h>
-+
-+#include <dirent.h>
-+#include <fcntl.h>
-+#include <poll.h>
-+#include <stdatomic.h>
-+#include <stdio.h>
-+#include <string.h>
-+#include <sys/ioctl.h>
-+#include <sys/mman.h>
-+#include <unistd.h>
-+
-+#include "libavutil/avassert.h"
-+#include "libavutil/avstring.h"
-+#include "libavutil/common.h"
-+#include "libavutil/hwcontext.h"
-+#include "libavutil/hwcontext_drm.h"
-+#include "libavutil/internal.h"
-+#include "libavutil/mathematics.h"
-+#include "libavutil/opt.h"
-+#include "libavutil/pixdesc.h"
-+#include "libavutil/time.h"
-+
-+#define FF_INTERNAL_FIELDS 1
-+#include "framequeue.h"
-+#include "filters.h"
-+#include "avfilter.h"
-+#include "formats.h"
-+#include "internal.h"
-+#include "video.h"
-+
-+typedef struct V4L2Queue V4L2Queue;
-+typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
-+
-+typedef struct V4L2PlaneInfo {
-+    int bytesperline;
-+    size_t length;
-+} V4L2PlaneInfo;
-+
-+typedef struct V4L2Buffer {
-+    int enqueued;
-+    int reenqueue;
-+    int fd;
-+    struct v4l2_buffer buffer;
-+    AVFrame frame;
-+    struct v4l2_plane planes[VIDEO_MAX_PLANES];
-+    int num_planes;
-+    V4L2PlaneInfo plane_info[VIDEO_MAX_PLANES];
-+    AVDRMFrameDescriptor drm_frame;
-+    V4L2Queue *q;
-+} V4L2Buffer;
-+
-+typedef struct V4L2Queue {
-+    struct v4l2_format format;
-+    int num_buffers;
-+    V4L2Buffer *buffers;
-+    DeintV4L2M2MContextShared *ctx;
-+} V4L2Queue;
-+
-+typedef struct pts_stats_s
-+{
-+    void * logctx;
-+    const char * name;  // For debug
-+    unsigned int last_count;
-+    unsigned int last_interval;
-+    int64_t last_pts;
-+} pts_stats_t;
-+
-+#define PTS_TRACK_SIZE 32
-+typedef struct pts_track_el_s
-+{
-+    uint32_t n;
-+    unsigned int interval;
-+    AVFrame * props;
-+} pts_track_el_t;
-+
-+typedef struct pts_track_s
-+{
-+    uint32_t n;
-+    uint32_t last_n;
-+    int got_2;
-+    void * logctx;
-+    pts_stats_t stats;
-+    pts_track_el_t a[PTS_TRACK_SIZE];
-+} pts_track_t;
-+
-+typedef struct DeintV4L2M2MContextShared {
-+    void * logctx;  // For logging - will be NULL when done
-+
-+    int fd;
-+    int done;
-+    int width;
-+    int height;
-+    int orig_width;
-+    int orig_height;
-+    atomic_uint refcount;
-+
-+    AVBufferRef *hw_frames_ctx;
-+
-+    unsigned int field_order;
-+
-+    pts_track_t track;
-+
-+    V4L2Queue output;
-+    V4L2Queue capture;
-+} DeintV4L2M2MContextShared;
-+
-+typedef struct DeintV4L2M2MContext {
-+    const AVClass *class;
-+
-+    DeintV4L2M2MContextShared *shared;
-+} DeintV4L2M2MContext;
-+
-+static unsigned int pts_stats_interval(const pts_stats_t * const stats)
-+{
-+    return stats->last_interval;
-+}
-+
-+// Pick 64 for max last count - that is >1sec at 60fps
-+#define STATS_LAST_COUNT_MAX 64
-+#define STATS_INTERVAL_MAX (1 << 30)
-+static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
-+{
-+    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
-+        if (stats->last_count < STATS_LAST_COUNT_MAX)
-+            ++stats->last_count;
-+        return;
-+    }
-+
-+    if (stats->last_pts != AV_NOPTS_VALUE) {
-+        const int64_t interval = pts - stats->last_pts;
-+
-+        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
-+            stats->last_count >= STATS_LAST_COUNT_MAX) {
-+            if (stats->last_interval != 0)
-+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
-+                       __func__, stats->name, interval, stats->last_count);
-+            stats->last_interval = 0;
-+        }
-+        else {
-+            const int64_t frame_time = interval / (int64_t)stats->last_count;
-+
-+            if (frame_time != stats->last_interval)
-+                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
-+                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
-+            stats->last_interval = frame_time;
-+        }
-+    }
-+
-+    stats->last_pts = pts;
-+    stats->last_count = 1;
-+}
-+
-+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
-+{
-+    *stats = (pts_stats_t){
-+        .logctx = logctx,
-+        .name = name,
-+        .last_count = 1,
-+        .last_interval = 0,
-+        .last_pts = AV_NOPTS_VALUE
-+    };
-+}
-+
-+static inline uint32_t pts_track_next_n(pts_track_t * const trk)
-+{
-+    if (++trk->n == 0)
-+        trk->n = 1;
-+    return trk->n;
-+}
-+
-+static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst)
-+{
-+    uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000);
-+    pts_track_el_t * t;
-+
-+    // As a first guess assume that n==0 means last frame
-+    if (n == 0) {
-+        n = trk->last_n;
-+        if (n == 0)
-+            goto fail;
-+    }
-+
-+    t = trk->a + (n & (PTS_TRACK_SIZE - 1));
-+
-+    if (t->n != n) {
-+        av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n);
-+        goto fail;
-+    }
-+
-+    // 1st frame is simple - just believe it
-+    if (n != trk->last_n) {
-+        trk->last_n = n;
-+        trk->got_2 = 0;
-+        return av_frame_copy_props(dst, t->props);
-+    }
-+
-+    // Only believe in a single interpolated frame
-+    if (trk->got_2)
-+        goto fail;
-+    trk->got_2 = 1;
-+
-+    av_frame_copy_props(dst, t->props);
-+
-+
-+    // If we can't guess - don't
-+    if (t->interval == 0) {
-+        dst->best_effort_timestamp = AV_NOPTS_VALUE;
-+        dst->pts = AV_NOPTS_VALUE;
-+        dst->pkt_dts = AV_NOPTS_VALUE;
-+    }
-+    else {
-+        if (dst->best_effort_timestamp != AV_NOPTS_VALUE)
-+            dst->best_effort_timestamp += t->interval / 2;
-+        if (dst->pts != AV_NOPTS_VALUE)
-+            dst->pts += t->interval / 2;
-+        if (dst->pkt_dts != AV_NOPTS_VALUE)
-+            dst->pkt_dts += t->interval / 2;
-+    }
-+
-+    return 0;
-+
-+fail:
-+    trk->last_n = 0;
-+    trk->got_2 = 0;
-+    dst->pts = AV_NOPTS_VALUE;
-+    dst->pkt_dts = AV_NOPTS_VALUE;
-+    return 0;
-+}
-+
-+static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src)
-+{
-+    const uint32_t n = pts_track_next_n(trk);
-+    pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1));
-+
-+    pts_stats_add(&trk->stats, src->pts);
-+
-+    t->n = n;
-+    t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last
-+    av_frame_unref(t->props);
-+    av_frame_copy_props(t->props, src);
-+
-+    // We now know what the previous interval was, rather than having to guess,
-+    // so set it.  There is a better than decent chance that this is before
-+    // we use it.
-+    if (t->interval != 0) {
-+        pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1));
-+        prev_t->interval = t->interval;
-+    }
-+
-+    // In case deinterlace interpolates frames use every other usec
-+    return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2};
-+}
-+
-+static void pts_track_uninit(pts_track_t * const trk)
-+{
-+    unsigned int i;
-+    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
-+        trk->a[i].n = 0;
-+        av_frame_free(&trk->a[i].props);
-+    }
-+}
-+
-+static int pts_track_init(pts_track_t * const trk, void *logctx)
-+{
-+    unsigned int i;
-+    trk->n = 1;
-+    pts_stats_init(&trk->stats, logctx, "track");
-+    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
-+        trk->a[i].n = 0;
-+        if ((trk->a[i].props = av_frame_alloc()) == NULL) {
-+            pts_track_uninit(trk);
-+            return AVERROR(ENOMEM);
-+        }
-+    }
-+    return 0;
-+}
-+
-+static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
-+{
-+    struct v4l2_capability cap;
-+    int ret;
-+
-+    memset(&cap, 0, sizeof(cap));
-+    ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap);
-+    if (ret < 0)
-+        return ret;
-+
-+    if (!(cap.capabilities & V4L2_CAP_STREAMING))
-+        return AVERROR(EINVAL);
-+
-+    if (cap.capabilities & V4L2_CAP_VIDEO_M2M) {
-+        ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-+        ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-+
-+        return 0;
-+    }
-+
-+    if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) {
-+        ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
-+        ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
-+
-+        return 0;
-+    }
-+
-+    return AVERROR(EINVAL);
-+}
-+
-+static int deint_v4l2m2m_try_format(V4L2Queue *queue)
-+{
-+    struct v4l2_format *fmt        = &queue->format;
-+    DeintV4L2M2MContextShared *ctx = queue->ctx;
-+    int ret, field;
-+
-+    ret = ioctl(ctx->fd, VIDIOC_G_FMT, fmt);
-+    if (ret)
-+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret);
-+
-+    if (V4L2_TYPE_IS_OUTPUT(fmt->type))
-+        field = V4L2_FIELD_INTERLACED_TB;
-+    else
-+        field = V4L2_FIELD_NONE;
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        fmt->fmt.pix_mp.pixelformat = V4L2_PIX_FMT_YUV420;
-+        fmt->fmt.pix_mp.field = field;
-+        fmt->fmt.pix_mp.width = ctx->width;
-+        fmt->fmt.pix_mp.height = ctx->height;
-+    } else {
-+        fmt->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420;
-+        fmt->fmt.pix.field = field;
-+        fmt->fmt.pix.width = ctx->width;
-+        fmt->fmt.pix.height = ctx->height;
-+    }
-+
-+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
-+		 fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height,
-+		 fmt->fmt.pix_mp.pixelformat,
-+		 fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline);
-+
-+    ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, fmt);
-+    if (ret)
-+        return AVERROR(EINVAL);
-+
-+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
-+		 fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height,
-+		 fmt->fmt.pix_mp.pixelformat,
-+		 fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline);
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        if (fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 ||
-+            fmt->fmt.pix_mp.field != field) {
-+            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type);
-+
-+            return AVERROR(EINVAL);
-+        }
-+    } else {
-+        if (fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 ||
-+            fmt->fmt.pix.field != field) {
-+            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type);
-+
-+            return AVERROR(EINVAL);
-+        }
-+    }
-+
-+    return 0;
-+}
-+
-+static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, int height, int pitch, int ysize)
-+{
-+    struct v4l2_format *fmt        = &queue->format;
-+    DeintV4L2M2MContextShared *ctx = queue->ctx;
-+    int ret;
-+
-+    struct v4l2_selection sel = {
-+        .type = fmt->type,
-+        .target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP_BOUNDS : V4L2_SEL_TGT_COMPOSE_BOUNDS,
-+    };
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        fmt->fmt.pix_mp.field = field;
-+        fmt->fmt.pix_mp.width = width;
-+        fmt->fmt.pix_mp.height = ysize / pitch;
-+        fmt->fmt.pix_mp.plane_fmt[0].bytesperline = pitch;
-+        fmt->fmt.pix_mp.plane_fmt[0].sizeimage = ysize + (ysize >> 1);
-+    } else {
-+        fmt->fmt.pix.field = field;
-+        fmt->fmt.pix.width = width;
-+        fmt->fmt.pix.height = height;
-+        fmt->fmt.pix.sizeimage = 0;
-+        fmt->fmt.pix.bytesperline = 0;
-+    }
-+
-+    ret = ioctl(ctx->fd, VIDIOC_S_FMT, fmt);
-+    if (ret)
-+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %d\n", ret);
-+
-+    ret = ioctl(ctx->fd, VIDIOC_G_SELECTION, &sel);
-+    if (ret)
-+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_SELECTION failed: %d\n", ret);
-+
-+    sel.r.width = width;
-+    sel.r.height = height;
-+    sel.r.left = 0;
-+    sel.r.top = 0;
-+    sel.target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
-+    sel.flags = V4L2_SEL_FLAG_LE;
-+
-+    ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &sel);
-+    if (ret)
-+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_SELECTION failed: %d\n", ret);
-+
-+    return ret;
-+}
-+
-+static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
-+{
-+    int ret;
-+
-+    ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0);
-+    if (ctx->fd < 0)
-+        return AVERROR(errno);
-+
-+    ret = deint_v4l2m2m_prepare_context(ctx);
-+    if (ret)
-+        goto fail;
-+
-+    ret = deint_v4l2m2m_try_format(&ctx->capture);
-+    if (ret)
-+        goto fail;
-+
-+    ret = deint_v4l2m2m_try_format(&ctx->output);
-+    if (ret)
-+        goto fail;
-+
-+    return 0;
-+
-+fail:
-+    close(ctx->fd);
-+    ctx->fd = -1;
-+
-+    return ret;
-+}
-+
-+static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx)
-+{
-+    int ret = AVERROR(EINVAL);
-+    struct dirent *entry;
-+    char node[PATH_MAX];
-+    DIR *dirp;
-+
-+    dirp = opendir("/dev");
-+    if (!dirp)
-+        return AVERROR(errno);
-+
-+    for (entry = readdir(dirp); entry; entry = readdir(dirp)) {
-+
-+        if (strncmp(entry->d_name, "video", 5))
-+            continue;
-+
-+        snprintf(node, sizeof(node), "/dev/%s", entry->d_name);
-+        av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node);
-+        ret = deint_v4l2m2m_probe_device(ctx, node);
-+        if (!ret)
-+            break;
-+    }
-+
-+    closedir(dirp);
-+
-+    if (ret) {
-+        av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n");
-+        ctx->fd = -1;
-+
-+        return ret;
-+    }
-+
-+    av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node);
-+
-+    return 0;
-+}
-+
-+static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf)
-+{
-+    int ret;
-+
-+    ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer);
-+    if (ret < 0)
-+        return AVERROR(errno);
-+
-+    buf->enqueued = 1;
-+
-+    return 0;
-+}
-+
-+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
-+{
-+    struct v4l2_exportbuffer expbuf;
-+    int i, ret;
-+
-+    for (i = 0; i < avbuf->num_planes; i++) {
-+        memset(&expbuf, 0, sizeof(expbuf));
-+
-+        expbuf.index = avbuf->buffer.index;
-+        expbuf.type = avbuf->buffer.type;
-+        expbuf.plane = i;
-+
-+        ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf);
-+        if (ret < 0)
-+            return AVERROR(errno);
-+
-+        avbuf->fd = expbuf.fd;
-+
-+        if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type)) {
-+            /* drm frame */
-+            avbuf->drm_frame.objects[i].size = avbuf->buffer.m.planes[i].length;
-+            avbuf->drm_frame.objects[i].fd = expbuf.fd;
-+            avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        } else {
-+            /* drm frame */
-+            avbuf->drm_frame.objects[0].size = avbuf->buffer.length;
-+            avbuf->drm_frame.objects[0].fd = expbuf.fd;
-+            avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+        }
-+    }
-+
-+    return 0;
-+}
-+
-+static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
-+{
-+    struct v4l2_format *fmt = &queue->format;
-+    DeintV4L2M2MContextShared *ctx = queue->ctx;
-+    struct v4l2_requestbuffers req;
-+    int ret, i, j, multiplanar;
-+    uint32_t memory;
-+
-+    memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ?
-+        V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
-+
-+    multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type);
-+
-+    memset(&req, 0, sizeof(req));
-+    req.count = queue->num_buffers;
-+    req.memory = memory;
-+    req.type = fmt->type;
-+
-+    ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req);
-+    if (ret < 0) {
-+        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno));
-+
-+        return AVERROR(errno);
-+    }
-+
-+    queue->num_buffers = req.count;
-+    queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer));
-+    if (!queue->buffers) {
-+        av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n");
-+
-+        return AVERROR(ENOMEM);
-+    }
-+
-+    for (i = 0; i < queue->num_buffers; i++) {
-+        V4L2Buffer *buf = &queue->buffers[i];
-+
-+        buf->enqueued = 0;
-+        buf->fd = -1;
-+        buf->q = queue;
-+
-+        buf->buffer.type = fmt->type;
-+        buf->buffer.memory = memory;
-+        buf->buffer.index = i;
-+
-+        if (multiplanar) {
-+            buf->buffer.length = VIDEO_MAX_PLANES;
-+            buf->buffer.m.planes = buf->planes;
-+        }
-+
-+        ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer);
-+        if (ret < 0) {
-+            ret = AVERROR(errno);
-+
-+            goto fail;
-+        }
-+
-+        if (multiplanar)
-+            buf->num_planes = buf->buffer.length;
-+        else
-+            buf->num_planes = 1;
-+
-+        for (j = 0; j < buf->num_planes; j++) {
-+            V4L2PlaneInfo *info = &buf->plane_info[j];
-+
-+            if (multiplanar) {
-+                info->bytesperline = fmt->fmt.pix_mp.plane_fmt[j].bytesperline;
-+                info->length = buf->buffer.m.planes[j].length;
-+            } else {
-+                info->bytesperline = fmt->fmt.pix.bytesperline;
-+                info->length = buf->buffer.length;
-+            }
-+        }
-+
-+        if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) {
-+            ret = deint_v4l2m2m_enqueue_buffer(buf);
-+            if (ret)
-+                goto fail;
-+
-+            ret = v4l2_buffer_export_drm(buf);
-+            if (ret)
-+                goto fail;
-+        }
-+    }
-+
-+    return 0;
-+
-+fail:
-+    for (i = 0; i < queue->num_buffers; i++)
-+        if (queue->buffers[i].fd >= 0)
-+            close(queue->buffers[i].fd);
-+    av_free(queue->buffers);
-+    queue->buffers = NULL;
-+
-+    return ret;
-+}
-+
-+static int deint_v4l2m2m_streamon(V4L2Queue *queue)
-+{
-+    DeintV4L2M2MContextShared * const ctx = queue->ctx;
-+    int type = queue->format.type;
-+    int ret;
-+
-+    ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type);
-+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
-+    if (ret < 0)
-+        return AVERROR(errno);
-+
-+    return 0;
-+}
-+
-+static int deint_v4l2m2m_streamoff(V4L2Queue *queue)
-+{
-+    DeintV4L2M2MContextShared * const ctx = queue->ctx;
-+    int type = queue->format.type;
-+    int ret;
-+
-+    ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type);
-+    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
-+    if (ret < 0)
-+        return AVERROR(errno);
-+
-+    return 0;
-+}
-+
-+// timeout in ms
-+static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout)
-+{
-+    struct v4l2_plane planes[VIDEO_MAX_PLANES];
-+    DeintV4L2M2MContextShared *ctx = queue->ctx;
-+    struct v4l2_buffer buf = { 0 };
-+    V4L2Buffer* avbuf = NULL;
-+    struct pollfd pfd;
-+    short events;
-+    int ret;
-+
-+    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
-+        events =  POLLOUT | POLLWRNORM;
-+    else
-+        events = POLLIN | POLLRDNORM;
-+
-+    pfd.events = events;
-+    pfd.fd = ctx->fd;
-+
-+    for (;;) {
-+        ret = poll(&pfd, 1, timeout);
-+        if (ret > 0)
-+            break;
-+        if (errno == EINTR)
-+            continue;
-+        return NULL;
-+    }
-+
-+    if (pfd.revents & POLLERR)
-+        return NULL;
-+
-+    if (pfd.revents & events) {
-+        memset(&buf, 0, sizeof(buf));
-+        buf.memory = V4L2_MEMORY_MMAP;
-+        buf.type = queue->format.type;
-+        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
-+            memset(planes, 0, sizeof(planes));
-+            buf.length = VIDEO_MAX_PLANES;
-+            buf.m.planes = planes;
-+        }
-+
-+        ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf);
-+        if (ret) {
-+            if (errno != EAGAIN)
-+                av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n",
-+                       av_err2str(AVERROR(errno)));
-+            return NULL;
-+        }
-+
-+        avbuf = &queue->buffers[buf.index];
-+        avbuf->enqueued = 0;
-+        avbuf->buffer = buf;
-+        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
-+            memcpy(avbuf->planes, planes, sizeof(planes));
-+            avbuf->buffer.m.planes = avbuf->planes;
-+        }
-+        return avbuf;
-+    }
-+
-+    return NULL;
-+}
-+
-+static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue)
-+{
-+    int i;
-+    V4L2Buffer *buf = NULL;
-+
-+    for (i = 0; i < queue->num_buffers; i++)
-+        if (!queue->buffers[i].enqueued) {
-+            buf = &queue->buffers[i];
-+            break;
-+        }
-+    return buf;
-+}
-+
-+static void deint_v4l2m2m_unref_queued(V4L2Queue *queue)
-+{
-+    int i;
-+    V4L2Buffer *buf = NULL;
-+
-+    if (!queue || !queue->buffers)
-+        return;
-+    for (i = 0; i < queue->num_buffers; i++) {
-+        buf = &queue->buffers[i];
-+        if (queue->buffers[i].enqueued)
-+            av_frame_unref(&buf->frame);
-+    }
-+}
-+
-+static void recycle_q(V4L2Queue * const queue)
-+{
-+    V4L2Buffer* avbuf;
-+    while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) {
-+        av_frame_unref(&avbuf->frame);
-+    }
-+}
-+
-+static int count_enqueued(V4L2Queue *queue)
-+{
-+    int i;
-+    int n = 0;
-+
-+    if (queue->buffers == NULL)
-+        return 0;
-+
-+    for (i = 0; i < queue->num_buffers; i++)
-+        if (queue->buffers[i].enqueued)
-+            ++n;
-+    return n;
-+}
-+
-+static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame)
-+{
-+    DeintV4L2M2MContextShared *const ctx = queue->ctx;
-+    AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0];
-+    V4L2Buffer *buf;
-+    int i;
-+
-+    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
-+        recycle_q(queue);
-+
-+    buf = deint_v4l2m2m_find_free_buf(queue);
-+    if (!buf) {
-+        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0);
-+        return AVERROR(EAGAIN);
-+    }
-+    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type))
-+        for (i = 0; i < drm_desc->nb_objects; i++)
-+            buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd;
-+    else
-+        buf->buffer.m.fd = drm_desc->objects[0].fd;
-+
-+    buf->buffer.field = !frame->interlaced_frame ? V4L2_FIELD_NONE :
-+        frame->top_field_first ? V4L2_FIELD_INTERLACED_TB :
-+            V4L2_FIELD_INTERLACED_BT;
-+
-+    if (ctx->field_order != buf->buffer.field) {
-+        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field);
-+        ctx->field_order = buf->buffer.field;
-+    }
-+
-+    buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame);
-+
-+    buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd;
-+
-+    av_frame_move_ref(&buf->frame, frame);
-+
-+    return deint_v4l2m2m_enqueue_buffer(buf);
-+}
-+
-+static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx)
-+{
-+    if (atomic_fetch_sub(&ctx->refcount, 1) == 1) {
-+        V4L2Queue *capture = &ctx->capture;
-+        V4L2Queue *output  = &ctx->output;
-+        int i;
-+
-+        av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__);
-+
-+        if (ctx->fd >= 0) {
-+            deint_v4l2m2m_streamoff(capture);
-+            deint_v4l2m2m_streamoff(output);
-+        }
-+
-+        if (capture->buffers)
-+            for (i = 0; i < capture->num_buffers; i++) {
-+                capture->buffers[i].q = NULL;
-+                if (capture->buffers[i].fd >= 0)
-+                    close(capture->buffers[i].fd);
-+            }
-+
-+        deint_v4l2m2m_unref_queued(output);
-+
-+        av_buffer_unref(&ctx->hw_frames_ctx);
-+
-+        if (capture->buffers)
-+            av_free(capture->buffers);
-+
-+        if (output->buffers)
-+            av_free(output->buffers);
-+
-+        if (ctx->fd >= 0) {
-+            close(ctx->fd);
-+            ctx->fd = -1;
-+        }
-+
-+        av_free(ctx);
-+    }
-+}
-+
-+static void v4l2_free_buffer(void *opaque, uint8_t *unused)
-+{
-+    V4L2Buffer *buf                = opaque;
-+    DeintV4L2M2MContextShared *ctx = buf->q->ctx;
-+
-+    if (!ctx->done)
-+        deint_v4l2m2m_enqueue_buffer(buf);
-+
-+    deint_v4l2m2m_destroy_context(ctx);
-+}
-+
-+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height)
-+{
-+    int av_pix_fmt = AV_PIX_FMT_YUV420P;
-+    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
-+    AVDRMLayerDescriptor *layer;
-+
-+    /* fill the DRM frame descriptor */
-+    drm_desc->nb_objects = avbuf->num_planes;
-+    drm_desc->nb_layers = 1;
-+
-+    layer = &drm_desc->layers[0];
-+    layer->nb_planes = avbuf->num_planes;
-+
-+    for (int i = 0; i < avbuf->num_planes; i++) {
-+        layer->planes[i].object_index = i;
-+        layer->planes[i].offset = 0;
-+        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
-+    }
-+
-+    switch (av_pix_fmt) {
-+    case AV_PIX_FMT_YUYV422:
-+
-+        layer->format = DRM_FORMAT_YUYV;
-+        layer->nb_planes = 1;
-+
-+        break;
-+
-+    case AV_PIX_FMT_NV12:
-+    case AV_PIX_FMT_NV21:
-+
-+        layer->format = av_pix_fmt == AV_PIX_FMT_NV12 ?
-+            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
-+
-+        if (avbuf->num_planes > 1)
-+            break;
-+
-+        layer->nb_planes = 2;
-+
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
-+            height;
-+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
-+        break;
-+
-+    case AV_PIX_FMT_YUV420P:
-+
-+        layer->format = DRM_FORMAT_YUV420;
-+
-+        if (avbuf->num_planes > 1)
-+            break;
-+
-+        layer->nb_planes = 3;
-+
-+        layer->planes[1].object_index = 0;
-+        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
-+            height;
-+        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
-+
-+        layer->planes[2].object_index = 0;
-+        layer->planes[2].offset = layer->planes[1].offset +
-+            ((avbuf->plane_info[0].bytesperline *
-+              height) >> 2);
-+        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
-+        break;
-+
-+    default:
-+        drm_desc->nb_layers = 0;
-+        break;
-+    }
-+
-+    return (uint8_t *) drm_desc;
-+}
-+
-+// timeout in ms
-+static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout)
-+{
-+    DeintV4L2M2MContextShared *ctx = queue->ctx;
-+    V4L2Buffer* avbuf;
-+
-+    av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
-+
-+    avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout);
-+    if (!avbuf) {
-+        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout);
-+        return AVERROR(EAGAIN);
-+    }
-+
-+    // Fill in PTS and anciliary info from src frame
-+    // we will want to overwrite some fields as only the pts/dts
-+    // fields are updated with new timing in this fn
-+    pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
-+
-+    frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame,
-+                            sizeof(avbuf->drm_frame), v4l2_free_buffer,
-+                            avbuf, AV_BUFFER_FLAG_READONLY);
-+    if (!frame->buf[0]) {
-+        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0);
-+        return AVERROR(ENOMEM);
-+    }
-+
-+    atomic_fetch_add(&ctx->refcount, 1);
-+
-+    frame->data[0] = (uint8_t *)v4l2_get_drm_frame(avbuf, ctx->orig_height);
-+    frame->format = AV_PIX_FMT_DRM_PRIME;
-+    if (ctx->hw_frames_ctx)
-+        frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
-+    frame->height = ctx->height;
-+    frame->width = ctx->width;
-+
-+    // Not interlaced now
-+    frame->interlaced_frame = 0;
-+    frame->top_field_first = 0;
-+    // Pkt duration halved
-+    frame->pkt_duration /= 2;
-+
-+    if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) {
-+        av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n");
-+        frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM;
-+    }
-+
-+    av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts);
-+    return 0;
-+}
-+
-+static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
-+{
-+    AVFilterLink *inlink           = outlink->src->inputs[0];
-+    AVFilterContext *avctx         = outlink->src;
-+    DeintV4L2M2MContext *priv      = avctx->priv;
-+    DeintV4L2M2MContextShared *ctx = priv->shared;
-+    int ret;
-+
-+    ctx->height = avctx->inputs[0]->h;
-+    ctx->width = avctx->inputs[0]->w;
-+
-+    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d\n", __func__, ctx->width, ctx->height);
-+
-+    outlink->time_base           = inlink->time_base;
-+    outlink->w                   = inlink->w;
-+    outlink->h                   = inlink->h;
-+    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
-+    outlink->format              = inlink->format;
-+    outlink->frame_rate = (AVRational) {1, 0};  // Deny knowledge of frame rate
-+
-+    ret = deint_v4l2m2m_find_device(ctx);
-+    if (ret)
-+        return ret;
-+
-+    if (inlink->hw_frames_ctx) {
-+        ctx->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
-+        if (!ctx->hw_frames_ctx)
-+            return AVERROR(ENOMEM);
-+    }
-+    return 0;
-+}
-+
-+static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
-+{
-+    AVFilterContext *avctx         = link->dst;
-+    DeintV4L2M2MContext *priv      = avctx->priv;
-+    DeintV4L2M2MContextShared *ctx = priv->shared;
-+    V4L2Queue *capture             = &ctx->capture;
-+    V4L2Queue *output              = &ctx->output;
-+    int ret;
-+
-+    av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" (%"PRId64") field :%d interlaced: %d aspect:%d/%d\n",
-+          __func__, in->pts, AV_NOPTS_VALUE, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den);
-+    av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__,
-+           avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out);
-+
-+    if (ctx->field_order == V4L2_FIELD_ANY) {
-+        AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)in->data[0];
-+        ctx->orig_width = drm_desc->layers[0].planes[0].pitch;
-+        ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width;
-+
-+        av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
-+           drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
-+
-+        if (in->top_field_first)
-+            ctx->field_order = V4L2_FIELD_INTERLACED_TB;
-+        else
-+            ctx->field_order = V4L2_FIELD_INTERLACED_BT;
-+
-+        ret = deint_v4l2m2m_set_format(output, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_set_format(capture, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_allocate_buffers(capture);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_streamon(capture);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_allocate_buffers(output);
-+        if (ret)
-+            return ret;
-+
-+        ret = deint_v4l2m2m_streamon(output);
-+        if (ret)
-+            return ret;
-+    }
-+
-+    ret = deint_v4l2m2m_enqueue_frame(output, in);
-+
-+    av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret));
-+    return ret;
-+}
-+
-+static int deint_v4l2m2m_activate(AVFilterContext *avctx)
-+{
-+    DeintV4L2M2MContext * const priv = avctx->priv;
-+    DeintV4L2M2MContextShared *const s = priv->shared;
-+    AVFilterLink * const outlink = avctx->outputs[0];
-+    AVFilterLink * const inlink = avctx->inputs[0];
-+    int n = 0;
-+    int cn = 99;
-+    int instatus = 0;
-+    int64_t inpts = 0;
-+    int did_something = 0;
-+
-+    av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__);
-+
-+    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx);
-+
-+    ff_inlink_acknowledge_status(inlink, &instatus, &inpts);
-+
-+    if (!ff_outlink_frame_wanted(outlink)) {
-+        av_log(priv, AV_LOG_TRACE, "%s: Not wanted out\n", __func__);
-+    }
-+    else if (s->field_order != V4L2_FIELD_ANY)  // Can't DQ if no setup!
-+    {
-+        AVFrame * frame = av_frame_alloc();
-+        int rv;
-+
-+again:
-+        recycle_q(&s->output);
-+        n = count_enqueued(&s->output);
-+
-+        if (frame == NULL) {
-+            av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__);
-+            return AVERROR(ENOMEM);
-+        }
-+
-+        rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, n > 4 ? 300 : 0);
-+        if (rv != 0) {
-+            av_frame_free(&frame);
-+            if (rv != AVERROR(EAGAIN)) {
-+                av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv));
-+                return rv;
-+            }
-+        }
-+        else {
-+            frame->interlaced_frame = 0;
-+            // frame is always consumed by filter_frame - even on error despite
-+            // a somewhat confusing comment in the header
-+            rv = ff_filter_frame(outlink, frame);
-+
-+            if (instatus != 0) {
-+                av_log(priv, AV_LOG_TRACE, "%s: eof loop\n", __func__);
-+                goto again;
-+            }
-+
-+            av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv));
-+            did_something = 1;
-+        }
-+
-+        cn = count_enqueued(&s->capture);
-+    }
-+
-+    if (instatus != 0) {
-+        ff_outlink_set_status(outlink, instatus, inpts);
-+        av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(instatus));
-+        return 0;
-+    }
-+
-+    {
-+        AVFrame * frame;
-+        int rv;
-+
-+        recycle_q(&s->output);
-+        n = count_enqueued(&s->output);
-+
-+        while (n < 6) {
-+            if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
-+                av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
-+                return rv;
-+            }
-+
-+            if (frame == NULL) {
-+                av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
-+                break;
-+            }
-+
-+            deint_v4l2m2m_filter_frame(inlink, frame);
-+            av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
-+            ++n;
-+        }
-+    }
-+
-+    if (n < 6) {
-+        ff_inlink_request_frame(inlink);
-+        did_something = 1;
-+        av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__);
-+    }
-+
-+    if (n > 4 && ff_outlink_frame_wanted(outlink)) {
-+        ff_filter_set_ready(avctx, 1);
-+        did_something = 1;
-+        av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__);
-+    }
-+
-+    av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn);
-+    return did_something ? 0 : FFERROR_NOT_READY;
-+}
-+
-+static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
-+{
-+    DeintV4L2M2MContext * const priv = avctx->priv;
-+    DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared));
-+
-+    if (!ctx) {
-+        av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0);
-+        return AVERROR(ENOMEM);
-+    }
-+    priv->shared = ctx;
-+    ctx->logctx = priv;
-+    ctx->fd = -1;
-+    ctx->output.ctx = ctx;
-+    ctx->output.num_buffers = 8;
-+    ctx->capture.ctx = ctx;
-+    ctx->capture.num_buffers = 12;
-+    ctx->done = 0;
-+    ctx->field_order = V4L2_FIELD_ANY;
-+
-+    pts_track_init(&ctx->track, priv);
-+
-+    atomic_init(&ctx->refcount, 1);
-+
-+    return 0;
-+}
-+
-+static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
-+{
-+    DeintV4L2M2MContext *priv = avctx->priv;
-+    DeintV4L2M2MContextShared *ctx = priv->shared;
-+
-+    ctx->done = 1;
-+    ctx->logctx = NULL;  // Log to NULL works, log to missing crashes
-+    pts_track_uninit(&ctx->track);
-+    deint_v4l2m2m_destroy_context(ctx);
-+}
-+
-+static const AVOption deinterlace_v4l2m2m_options[] = {
-+    { NULL },
-+};
-+
-+AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m);
-+
-+static const AVFilterPad deint_v4l2m2m_inputs[] = {
-+    {
-+        .name         = "default",
-+        .type         = AVMEDIA_TYPE_VIDEO,
-+    },
-+};
-+
-+static const AVFilterPad deint_v4l2m2m_outputs[] = {
-+    {
-+        .name          = "default",
-+        .type          = AVMEDIA_TYPE_VIDEO,
-+        .config_props  = deint_v4l2m2m_config_props,
-+    },
-+};
-+
-+AVFilter ff_vf_deinterlace_v4l2m2m = {
-+    .name           = "deinterlace_v4l2m2m",
-+    .description    = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"),
-+    .priv_size      = sizeof(DeintV4L2M2MContext),
-+    .init           = &deint_v4l2m2m_init,
-+    .uninit         = &deint_v4l2m2m_uninit,
-+    FILTER_INPUTS(deint_v4l2m2m_inputs),
-+    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
-+    FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME),
-+    .priv_class     = &deinterlace_v4l2m2m_class,
-+    .activate       = deint_v4l2m2m_activate,
-+};
-
-From 1956533e4c9b3f45f9fcb83da6e04beec0e0b517 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 2 Dec 2021 17:49:55 +0000
-Subject: [PATCH 031/186] Put no_pts_rescale in context which makes more sense
- than an arg
-
----
- libavcodec/v4l2_buffers.c | 28 ++++++++++++++--------------
- libavcodec/v4l2_buffers.h |  5 ++---
- libavcodec/v4l2_context.c |  8 ++++----
- libavcodec/v4l2_context.h | 13 +++++++++----
- libavcodec/v4l2_m2m_dec.c |  9 +++++----
- 5 files changed, 34 insertions(+), 29 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 126d2a17f4fe..22da6bd72234 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -39,7 +39,7 @@
- #define USEC_PER_SEC 1000000
- static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
- 
--static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf)
-+static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf)
- {
-     return V4L2_TYPE_IS_OUTPUT(buf->context->type) ?
-         container_of(buf->context, V4L2m2mContext, output) :
-@@ -51,34 +51,34 @@ static inline AVCodecContext *logger(V4L2Buffer *buf)
-     return buf_to_m2mctx(buf)->avctx;
- }
- 
--static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf)
-+static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf)
- {
--    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
-+    const V4L2m2mContext *s = buf_to_m2mctx(avbuf);
-     const AVRational tb = s->avctx->pkt_timebase.num ?
-         s->avctx->pkt_timebase :
-         s->avctx->time_base;
-     return tb.num && tb.den ? tb : v4l2_timebase;
- }
- 
--static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts, int no_rescale)
-+static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
- {
-     /* convert pts to v4l2 timebase */
-     const int64_t v4l2_pts =
--        no_rescale ? pts :
-+        out->context->no_pts_rescale ? pts :
-         pts == AV_NOPTS_VALUE ? 0 :
-             av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
-     out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
-     out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
- }
- 
--static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf, int no_rescale)
-+static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf)
- {
-     /* convert pts back to encoder timebase */
-     const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
-                         avbuf->buf.timestamp.tv_usec;
- 
-     return
--        no_rescale ? v4l2_pts :
-+        avbuf->context->no_pts_rescale ? v4l2_pts :
-         v4l2_pts == 0 ? AV_NOPTS_VALUE :
-             av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
- }
-@@ -686,13 +686,13 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
-     v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
-     v4l2_set_color_range(out, frame->color_range);
-     // PTS & interlace are buffer vars
--    v4l2_set_pts(out, frame->pts, 0);
-+    v4l2_set_pts(out, frame->pts);
-     v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first);
- 
-     return v4l2_buffer_swframe_to_buf(frame, out);
- }
- 
--int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_rescale_pts)
-+int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
- {
-     int ret;
-     V4L2Context * const ctx = avbuf->context;
-@@ -710,7 +710,7 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_resc
-     frame->colorspace = v4l2_get_color_space(avbuf);
-     frame->color_range = v4l2_get_color_range(avbuf);
-     frame->color_trc = v4l2_get_color_trc(avbuf);
--    frame->pts = v4l2_get_pts(avbuf, no_rescale_pts);
-+    frame->pts = v4l2_get_pts(avbuf);
-     frame->pkt_dts = AV_NOPTS_VALUE;
-     frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf);
-     frame->top_field_first = v4l2_buf_is_top_first(avbuf);
-@@ -757,13 +757,13 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
-         pkt->flags |= AV_PKT_FLAG_CORRUPT;
-     }
- 
--    pkt->dts = pkt->pts = v4l2_get_pts(avbuf, 0);
-+    pkt->dts = pkt->pts = v4l2_get_pts(avbuf);
- 
-     return 0;
- }
- 
- int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
--                                    const void *extdata, size_t extlen, int no_rescale_pts)
-+                                    const void *extdata, size_t extlen)
- {
-     int ret;
- 
-@@ -777,7 +777,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-     if (ret && ret != AVERROR(ENOMEM))
-         return ret;
- 
--    v4l2_set_pts(out, pkt->pts, no_rescale_pts);
-+    v4l2_set_pts(out, pkt->pts);
- 
-     if (pkt->flags & AV_PKT_FLAG_KEY)
-         out->flags = V4L2_BUF_FLAG_KEYFRAME;
-@@ -787,7 +787,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
- 
- int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
- {
--    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
-+    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0);
- }
- 
- 
-diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
-index 111526aee315..641e0e147b19 100644
---- a/libavcodec/v4l2_buffers.h
-+++ b/libavcodec/v4l2_buffers.h
-@@ -83,12 +83,11 @@ typedef struct V4L2Buffer {
-  *
-  * @param[in] frame The AVFRame to push the information to
-  * @param[in] buf The V4L2Buffer to get the information from
-- * @param[in] no_rescale_pts If non-zero do not rescale PTS
-  *
-  * @returns 0 in case of success, AVERROR(EINVAL) if the number of planes is incorrect,
-  * AVERROR(ENOMEM) if the AVBufferRef can't be created.
-  */
--int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf, int no_rescale_pts);
-+int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf);
- 
- /**
-  * Extracts the data from a V4L2Buffer to an AVPacket
-@@ -113,7 +112,7 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);
- int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
- 
- int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
--                                    const void *extdata, size_t extlen, int no_rescale_pts);
-+                                    const void *extdata, size_t extlen);
- 
- /**
-  * Extracts the data from an AVFrame to a V4L2Buffer
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 440dfaaba551..64540a37b32e 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -808,7 +808,7 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
- }
- 
- int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
--                                   const void * extdata, size_t extlen, int no_rescale_pts)
-+                                   const void * extdata, size_t extlen)
- {
-     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-     V4L2Buffer* avbuf;
-@@ -827,7 +827,7 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
-     if (!avbuf)
-         return AVERROR(EAGAIN);
- 
--    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, no_rescale_pts);
-+    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen);
-     if (ret == AVERROR(ENOMEM))
-         av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
-                __func__, pkt->size, avbuf->planes[0].length);
-@@ -837,7 +837,7 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
-     return ff_v4l2_buffer_enqueue(avbuf);
- }
- 
--int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout, int no_rescale_pts)
-+int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
- {
-     V4L2Buffer *avbuf;
- 
-@@ -854,7 +854,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout,
-         return AVERROR(EAGAIN);
-     }
- 
--    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf, no_rescale_pts);
-+    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
- }
- 
- int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 37b0431400d8..4cc164886c3b 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -102,6 +102,13 @@ typedef struct V4L2Context {
-      */
-     int done;
- 
-+    /**
-+     * PTS rescale not wanted
-+     * If the PTS is just a dummy frame count then rescale is
-+     * actively harmful
-+     */
-+    int no_pts_rescale;
-+
-     AVBufferRef *frames_ref;
-     int q_count;
-     int dq_count;
-@@ -172,12 +179,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
-  * @param[in] ctx The V4L2Context to dequeue from.
-  * @param[inout] f The AVFrame to dequeue to.
-  * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
-- * @param[in] no_rescale_pts (0 rescale pts, 1 use pts as
-- *       timestamp directly)
-  *
-  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
-  */
--int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout, int no_rescale_pts);
-+int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
- 
- /**
-  * Enqueues a buffer to a V4L2Context from an AVPacket
-@@ -189,7 +194,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout, int
-  * @param[in] pkt A pointer to an AVPacket.
-  * @return 0 in case of success, a negative error otherwise.
-  */
--int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size, int no_rescale_pts);
-+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size);
- 
- /**
-  * Enqueues a buffer to a V4L2Context from an AVFrame
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index b49f470c0a1e..36754b314a4b 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -360,7 +360,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-             if (!s->draining) {
-                 // Calling enqueue with an empty pkt starts drain
-                 av_assert0(s->buf_pkt.size == 0);
--                ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0, 1);
-+                ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
-                 if (ret) {
-                     av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret);
-                     return ret;
-@@ -381,8 +381,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-         return ret;
- 
-     ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt,
--                                         avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size,
--                                         1);
-+                                         avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size);
- 
-     if (ret == AVERROR(EAGAIN)) {
-         // Out of input buffers - keep packet
-@@ -442,7 +441,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-                 // when discarding
-                 // This returns AVERROR(EAGAIN) if there isn't a frame ready yet
-                 // but there is room in the input Q
--                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, src_rv == NQ_Q_FULL ? 100 : -1, 1);
-+                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, src_rv == NQ_Q_FULL ? 100 : -1);
- 
-                 if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
-                     av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
-@@ -569,10 +568,12 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     output->av_codec_id = avctx->codec_id;
-     output->av_pix_fmt  = AV_PIX_FMT_NONE;
-     output->min_buf_size = max_coded_size(avctx);
-+    output->no_pts_rescale = 1;
- 
-     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
-     capture->av_pix_fmt = avctx->pix_fmt;
-     capture->min_buf_size = 0;
-+    capture->no_pts_rescale = 1;
- 
-     /* the client requests the codec to generate DRM frames:
-      *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
-
-From fdcdb8519c90f3d2038244b21abf165f56224f08 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 8 Dec 2021 15:00:37 +0000
-Subject: [PATCH 032/186] Use bitbuf min size for all streams
-
----
- libavcodec/v4l2_m2m_dec.c | 5 +----
- 1 file changed, 1 insertion(+), 4 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 36754b314a4b..48a6810d18b6 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -507,15 +507,12 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- }
- #endif
- 
-+// This heuristic is for H264 but use for everything
- static uint32_t max_coded_size(const AVCodecContext * const avctx)
- {
-     uint32_t wxh = avctx->coded_width * avctx->coded_height;
-     uint32_t size;
- 
--    // Currently the only thing we try to set our own limits for is H264
--    if (avctx->codec_id != AV_CODEC_ID_H264)
--        return 0;
--
-     size = wxh * 3 / 2;
-     // H.264 Annex A table A-1 gives minCR which is either 2 or 4
-     // unfortunately that doesn't yield an actually useful limit
-
-From 6703c26d1e71bcb7a077b07e8cbef015a1204da9 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 3 Dec 2021 12:54:18 +0000
-Subject: [PATCH 033/186] Track pending frames in v4l2 stateful
-
-Track which frames are pending decode in the v4l2 stateful decoder.
-This relies on DTS & PTS having some relationship to reality, so
-any use of this code must cope with the results being wrong.
-
-Also moves the xlat state vars out of the main context and into their
-own structure.
----
- libavcodec/v4l2_m2m.h     |  15 ++++--
- libavcodec/v4l2_m2m_dec.c | 100 +++++++++++++++++++++++++++++---------
- 2 files changed, 89 insertions(+), 26 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 82feb0afdbe3..3f8680962342 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -44,8 +44,10 @@
- #define FF_V4L2_M2M_TRACK_SIZE 128
- typedef struct V4L2m2mTrackEl {
-     int     discard;   // If we see this buffer its been flushed, so discard
-+    int     pending;
-     int     pkt_size;
-     int64_t pts;
-+    int64_t dts;
-     int64_t reordered_opaque;
-     int64_t pkt_pos;
-     int64_t pkt_duration;
-@@ -62,6 +64,14 @@ typedef struct pts_stats_s
-     int64_t guess;
- } pts_stats_t;
- 
-+typedef struct xlat_track_s {
-+    unsigned int track_no;
-+    int64_t last_pts;
-+    int64_t last_pkt_dts;
-+    int64_t last_opaque;
-+    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
-+} xlat_track_t;
-+
- typedef struct V4L2m2mContext {
-     char devname[PATH_MAX];
-     int fd;
-@@ -96,10 +106,7 @@ typedef struct V4L2m2mContext {
-     int output_drm;
- 
-     /* Frame tracking */
--    int64_t last_pkt_dts;
--    int64_t last_opaque;
--    unsigned int track_no;
--    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
-+    xlat_track_t xlat;
- 
-     pts_stats_t pts_stat;
- 
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 48a6810d18b6..d8ebb466cd56 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -242,22 +242,24 @@ static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts
- // buffer of all the things we want preserved (including the original PTS)
- // indexed by the tracking no.
- static void
--xlat_pts_in(AVCodecContext *const avctx, V4L2m2mContext *const s, AVPacket *const avpkt)
-+xlat_pts_in(AVCodecContext *const avctx, xlat_track_t *const x, AVPacket *const avpkt)
- {
-     int64_t track_pts;
- 
-     // Avoid 0
--    if (++s->track_no == 0)
--        s->track_no = 1;
-+    if (++x->track_no == 0)
-+        x->track_no = 1;
- 
--    track_pts = track_to_pts(avctx, s->track_no);
-+    track_pts = track_to_pts(avctx, x->track_no);
- 
--    av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, s->track_no);
--    s->last_pkt_dts = avpkt->dts;
--    s->track_els[s->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
-+    av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
-+    x->last_pkt_dts = avpkt->dts;
-+    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
-         .discard          = 0,
-+        .pending          = 1,
-         .pkt_size         = avpkt->size,
-         .pts              = avpkt->pts,
-+        .dts              = avpkt->dts,
-         .reordered_opaque = avctx->reordered_opaque,
-         .pkt_pos          = avpkt->pos,
-         .pkt_duration     = avpkt->duration,
-@@ -268,31 +270,36 @@ xlat_pts_in(AVCodecContext *const avctx, V4L2m2mContext *const s, AVPacket *cons
- 
- // Returns -1 if we should discard the frame
- static int
--xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *const frame)
-+xlat_pts_out(AVCodecContext *const avctx,
-+             xlat_track_t * const x,
-+             pts_stats_t * const ps,
-+             AVFrame *const frame)
- {
-     unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
--    const V4L2m2mTrackEl *const t = s->track_els + n;
-+    V4L2m2mTrackEl *const t = x->track_els + n;
-     if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
-     {
-         av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
-         frame->pts              = AV_NOPTS_VALUE;
--        frame->pkt_dts          = s->last_pkt_dts;
--        frame->reordered_opaque = s->last_opaque;
-+        frame->pkt_dts          = x->last_pkt_dts;
-+        frame->reordered_opaque = x->last_opaque;
-         frame->pkt_pos          = -1;
-         frame->pkt_duration     = 0;
-         frame->pkt_size         = -1;
-     }
-     else if (!t->discard)
-     {
--        frame->pts              = t->pts;
--        frame->pkt_dts          = s->last_pkt_dts;
-+        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
-+        frame->pkt_dts          = x->last_pkt_dts;
-         frame->reordered_opaque = t->reordered_opaque;
-         frame->pkt_pos          = t->pkt_pos;
-         frame->pkt_duration     = t->pkt_duration;
-         frame->pkt_size         = t->pkt_size;
- 
--        s->last_opaque = s->track_els[n].reordered_opaque;
--        s->track_els[n].pts = AV_NOPTS_VALUE;  // If we hit this again deny accurate knowledge of PTS
-+        x->last_opaque = x->track_els[n].reordered_opaque;
-+        if (frame->pts != AV_NOPTS_VALUE)
-+            x->last_pts = frame->pts;
-+        t->pending = 0;
-     }
-     else
-     {
-@@ -300,14 +307,62 @@ xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *cons
-         return -1;
-     }
- 
--    pts_stats_add(&s->pts_stat, frame->pts);
-+    pts_stats_add(ps, frame->pts);
- 
--    frame->best_effort_timestamp = pts_stats_guess(&s->pts_stat);
-+    frame->best_effort_timestamp = pts_stats_guess(ps);
-     frame->pkt_dts               = frame->pts;  // We can't emulate what s/w does in a useful manner?
-     av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
-     return 0;
- }
- 
-+static void
-+xlat_flush(xlat_track_t * const x)
-+{
-+    unsigned int i;
-+    for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) {
-+        x->track_els[i].pending = 0;
-+        x->track_els[i].discard = 1;
-+    }
-+    x->last_pts = AV_NOPTS_VALUE;
-+}
-+
-+static void
-+xlat_init(xlat_track_t * const x)
-+{
-+    memset(x, 0, sizeof(*x));
-+    x->last_pts = AV_NOPTS_VALUE;
-+}
-+
-+static int
-+xlat_pending(const xlat_track_t * const x)
-+{
-+    unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE;
-+    unsigned int i;
-+    int r = 0;
-+    int64_t now = AV_NOPTS_VALUE;
-+
-+    for (i = 0; i < 32; ++i, n = (n - 1) % FF_V4L2_M2M_TRACK_SIZE) {
-+        const V4L2m2mTrackEl * const t = x->track_els + n;
-+
-+        if (!t->pending)
-+            continue;
-+
-+        if (now == AV_NOPTS_VALUE)
-+            now = t->dts;
-+
-+        if (t->pts == AV_NOPTS_VALUE ||
-+            ((now == AV_NOPTS_VALUE || t->pts <= now) &&
-+             (x->last_pts == AV_NOPTS_VALUE || t->pts > x->last_pts)))
-+            ++r;
-+    }
-+
-+    // If we never get any ideas about PTS vs DTS allow a lot more buffer
-+    if (now == AV_NOPTS_VALUE)
-+        r -= 16;
-+
-+    return r;
-+}
-+
- static inline int stream_started(const V4L2m2mContext * const s) {
-     return s->capture.streamon && s->output.streamon;
- }
-@@ -374,7 +429,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-             return ret;
-         }
- 
--        xlat_pts_in(avctx, s, &s->buf_pkt);
-+        xlat_pts_in(avctx, &s->xlat, &s->buf_pkt);
-     }
- 
-     if ((ret = check_output_streamon(avctx, s)) != 0)
-@@ -417,6 +472,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-     int dst_rv = 1;  // Non-zero (done), non-negative (error) number
- 
-     do {
-+        av_log(avctx, AV_LOG_INFO, "Pending=%d\n", xlat_pending(&s->xlat));
-         src_rv = try_enqueue_src(avctx, s);
- 
-         // If we got a frame last time and we have nothing to enqueue then
-@@ -451,7 +507,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-                            s->draining, s->capture.done, dst_rv);
- 
-                 // Go again if we got a frame that we need to discard
--            } while (dst_rv == 0 && xlat_pts_out(avctx, s, frame));
-+            } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame));
-         }
- 
-         // Continue trying to enqueue packets if either
-@@ -550,6 +606,7 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     if (ret < 0)
-         return ret;
- 
-+    xlat_init(&s->xlat);
-     pts_stats_init(&s->pts_stat, avctx, "decoder");
- 
-     capture = &s->capture;
-@@ -632,7 +689,7 @@ static void v4l2_decode_flush(AVCodecContext *avctx)
-     V4L2m2mContext * const s = priv->context;
-     V4L2Context * const output = &s->output;
-     V4L2Context * const capture = &s->capture;
--    int ret, i;
-+    int ret;
- 
-     av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
- 
-@@ -646,8 +703,7 @@ static void v4l2_decode_flush(AVCodecContext *avctx)
- 
-     // V4L2 makes no guarantees about whether decoded frames are flushed or not
-     // so mark all frames we are tracking to be discarded if they appear
--    for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i)
--        s->track_els[i].discard = 1;
-+    xlat_flush(&s->xlat);
- 
-     // resend extradata
-     s->extdata_sent = 0;
-
-From 74854095e6aac7647a2a04d53110150dd83f3b09 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 15 Dec 2021 17:58:21 +0000
-Subject: [PATCH 034/186] Use pending tracking to reduce v4l2 latency
-
-If there are more than 5 pending decodes outstanding then add a small
-timeout to the capture poll to reduce the rate at which frames are
-added.
----
- libavcodec/v4l2_m2m_dec.c | 58 ++++++++++++++++++++++++---------------
- 1 file changed, 36 insertions(+), 22 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index d8ebb466cd56..7e7e4729d08b 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -370,16 +370,19 @@ static inline int stream_started(const V4L2m2mContext * const s) {
- #define NQ_OK        0
- #define NQ_Q_FULL    1
- #define NQ_SRC_EMPTY 2
--#define NQ_DRAINING  3
--#define NQ_DEAD      4
-+#define NQ_NONE      3
-+#define NQ_DRAINING  4
-+#define NQ_DEAD      5
- 
- #define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
-+#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE)
- 
- // AVERROR_EOF     Flushing an already flushed stream
- // -ve             Error (all errors except EOF are unexpected)
- // NQ_OK (0)       OK
- // NQ_Q_FULL       Dst full (retry if we think V4L2 Q has space now)
- // NQ_SRC_EMPTY    Src empty (do not retry)
-+// NQ_NONE         Enqueue not attempted
- // NQ_DRAINING     At EOS, dQ dest until EOS there too
- // NQ_DEAD         Not running (do not retry, do not attempt capture dQ)
- 
-@@ -468,23 +471,28 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
- static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- {
-     V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
--    int src_rv;
-+    int src_rv = NQ_NONE;
-     int dst_rv = 1;  // Non-zero (done), non-negative (error) number
-+    unsigned int i = 0;
- 
-     do {
--        av_log(avctx, AV_LOG_INFO, "Pending=%d\n", xlat_pending(&s->xlat));
--        src_rv = try_enqueue_src(avctx, s);
--
--        // If we got a frame last time and we have nothing to enqueue then
--        // return now. rv will be AVERROR(EAGAIN) indicating that we want more input
--        // This should mean that once decode starts we enter a stable state where
--        // we alternately ask for input and produce output
--        if (s->req_pkt && src_rv == NQ_SRC_EMPTY)
--            break;
--
--        if (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) {
--            av_log(avctx, AV_LOG_WARNING, "Poll says src Q has space but enqueue fail");
--            src_rv = NQ_SRC_EMPTY;  // If we can't enqueue pretend that there is nothing to enqueue
-+        const int pending = xlat_pending(&s->xlat);
-+        const int prefer_dq = (pending > 5);
-+
-+        // Enqueue another pkt for decode if
-+        // (a) We don't have a lot of stuff in the buffer already OR
-+        // (b) ... we (think we) do but we've failed to get a frame already OR
-+        // (c) We've dequeued a lot of frames without asking for input
-+        if (!prefer_dq || i != 0 || s->req_pkt > 2) {
-+            src_rv = try_enqueue_src(avctx, s);
-+
-+            // If we got a frame last time or we've already tried to get a frame and
-+            // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN)
-+            // indicating that we want more input.
-+            // This should mean that once decode starts we enter a stable state where
-+            // we alternately ask for input and produce output
-+            if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
-+                break;
-         }
- 
-         // Try to get a new frame if
-@@ -495,9 +503,9 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-                 // Dequeue frame will unref any previous contents of frame
-                 // if it returns success so we don't need an explicit unref
-                 // when discarding
--                // This returns AVERROR(EAGAIN) if there isn't a frame ready yet
--                // but there is room in the input Q
--                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, src_rv == NQ_Q_FULL ? 100 : -1);
-+                // This returns AVERROR(EAGAIN) on timeout or if
-+                // there is room in the input Q and timeout == -1
-+                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, prefer_dq ? 5 : -1);
- 
-                 if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
-                     av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
-@@ -510,10 +518,16 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-             } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame));
-         }
- 
-+        ++i;
-+        if (i >= 256) {
-+            av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i);
-+            src_rv = AVERROR(EIO);
-+        }
-+
-         // Continue trying to enqueue packets if either
-         // (a) we succeeded last time OR
--        // (b) enqueue failed due to input Q full AND there is now room
--    } while (src_rv == NQ_OK || (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) );
-+        // (b) we didn't ret a frame and we can retry the input
-+    } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv)));
- 
-     // Ensure that the frame contains nothing if we aren't returning a frame
-     // (might happen when discarding)
-@@ -521,7 +535,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-         av_frame_unref(frame);
- 
-     // If we got a frame this time ask for a pkt next time
--    s->req_pkt = (dst_rv == 0);
-+    s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0;
- 
- #if 0
-     if (dst_rv == 0)
-
-From 584445b9041ff6faed2a9a1ad455f0ab30bf04c6 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 15 Dec 2021 12:23:54 +0000
-Subject: [PATCH 035/186] Allow logger() to take const ctx
-
----
- libavcodec/v4l2_buffers.c | 2 +-
- libavcodec/v4l2_context.c | 4 ++--
- 2 files changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 22da6bd72234..39c0094aec10 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -46,7 +46,7 @@ static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf)
-         container_of(buf->context, V4L2m2mContext, capture);
- }
- 
--static inline AVCodecContext *logger(V4L2Buffer *buf)
-+static inline AVCodecContext *logger(const V4L2Buffer * const buf)
- {
-     return buf_to_m2mctx(buf)->avctx;
- }
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 64540a37b32e..d3df48aed499 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -43,14 +43,14 @@ struct v4l2_format_update {
-     int update_avfmt;
- };
- 
--static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx)
-+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
- {
-     return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
-         container_of(ctx, V4L2m2mContext, output) :
-         container_of(ctx, V4L2m2mContext, capture);
- }
- 
--static inline AVCodecContext *logger(V4L2Context *ctx)
-+static inline AVCodecContext *logger(const V4L2Context *ctx)
- {
-     return ctx_to_m2mctx(ctx)->avctx;
- }
-
-From 80ff275f1183fe466edbdfaadf17b2c40a45fa3e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 15 Dec 2021 13:00:27 +0000
-Subject: [PATCH 036/186] Track numbere of bufs qed with an atomic
-
-Safer and faster than counting status
----
- libavcodec/v4l2_buffers.c | 6 +++---
- libavcodec/v4l2_context.c | 3 ++-
- libavcodec/v4l2_context.h | 3 +--
- 3 files changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 39c0094aec10..2cf7be663263 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -922,6 +922,7 @@ fail:
- int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
- {
-     int ret;
-+    int qc;
- 
-     avbuf->buf.flags = avbuf->flags;
- 
-@@ -941,11 +942,10 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
-         return AVERROR(err);
-     }
- 
--    ++avbuf->context->q_count;
-+    qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1;
-     av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
-            avbuf->context->name, avbuf->buf.index,
--           avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
--           avbuf->context->q_count);
-+           avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc);
- 
-     avbuf->status = V4L2BUF_IN_DRIVER;
- 
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index d3df48aed499..268a057e53cc 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -599,7 +599,7 @@ static int v4l2_release_buffers(V4L2Context* ctx)
-                     "  2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");
-         }
-     }
--    ctx->q_count = 0;
-+    atomic_store(&ctx->q_count, 0);
- 
-     return ret;
- }
-@@ -1019,6 +1019,7 @@ int ff_v4l2_context_init(V4L2Context* ctx)
-     }
- 
-     ff_mutex_init(&ctx->lock, NULL);
-+    atomic_init(&ctx->q_count, 0);
- 
-     if (s->output_drm) {
-         AVHWFramesContext *hwframes;
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 4cc164886c3b..a4176448d595 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -110,8 +110,7 @@ typedef struct V4L2Context {
-     int no_pts_rescale;
- 
-     AVBufferRef *frames_ref;
--    int q_count;
--    int dq_count;
-+    atomic_int q_count;
-     struct ff_weak_link_master *wl_master;
- 
-     AVMutex lock;
-
-From c103328f28905a96632afece258c14cb726a7c48 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 9 Dec 2021 12:01:25 +0000
-Subject: [PATCH 037/186] Clear pkt_buf on flush
-
----
- libavcodec/v4l2_m2m_dec.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 7e7e4729d08b..09ec4963517b 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -715,6 +715,9 @@ static void v4l2_decode_flush(AVCodecContext *avctx)
-     if (ret < 0)
-         av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s error: %d\n", output->name, ret);
- 
-+    // Clear any buffered input packet
-+    av_packet_unref(&s->buf_pkt);
-+
-     // V4L2 makes no guarantees about whether decoded frames are flushed or not
-     // so mark all frames we are tracking to be discarded if they appear
-     xlat_flush(&s->xlat);
-
-From b7552e6e913b0b894106f735465742dbba915bba Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 15 Dec 2021 12:52:56 +0000
-Subject: [PATCH 038/186] Rework v4l2 buffer dequeue
-
----
- libavcodec/v4l2_context.c | 543 ++++++++++++++++++--------------------
- libavcodec/v4l2_context.h |   2 +
- libavcodec/v4l2_m2m.c     |   1 -
- libavcodec/v4l2_m2m.h     |  16 +-
- libavcodec/v4l2_m2m_dec.c | 138 ++++------
- 5 files changed, 327 insertions(+), 373 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 268a057e53cc..d765181645fb 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -73,19 +73,27 @@ static AVRational v4l2_get_sar(V4L2Context *ctx)
-     return sar;
- }
- 
--static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2)
-+static inline int ctx_buffers_alloced(const V4L2Context * const ctx)
- {
--    struct v4l2_format *fmt1 = &ctx->format;
--    int ret =  V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
--        fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
--        fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
--        :
--        fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
--        fmt1->fmt.pix.height != fmt2->fmt.pix.height;
-+    return ctx->bufrefs != NULL;
-+}
-+
-+// Width/Height changed or we don't have an alloc in the first place?
-+static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2)
-+{
-+    const struct v4l2_format *fmt1 = &ctx->format;
-+    int ret = !ctx_buffers_alloced(ctx) ||
-+        (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
-+            fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
-+            fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
-+            :
-+            fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
-+            fmt1->fmt.pix.height != fmt2->fmt.pix.height);
- 
-     if (ret)
--        av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n",
-+        av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n",
-             ctx->name,
-+            ctx_buffers_alloced(ctx),
-             ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1),
-             ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2));
- 
-@@ -167,10 +175,8 @@ static int do_source_change(V4L2m2mContext * const s)
- 
-     int ret;
-     int reinit;
--    int full_reinit;
-     struct v4l2_format cap_fmt = s->capture.format;
- 
--    s->resize_pending = 0;
-     s->capture.done = 0;
- 
-     ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
-@@ -179,15 +185,21 @@ static int do_source_change(V4L2m2mContext * const s)
-         return 0;
-     }
- 
--    s->output.sample_aspect_ratio = v4l2_get_sar(&s->output);
--
-     get_default_selection(&s->capture, &s->capture.selection);
- 
--    reinit = v4l2_resolution_changed(&s->capture, &cap_fmt);
-+    reinit = ctx_resolution_changed(&s->capture, &cap_fmt);
-+    s->capture.format = cap_fmt;
-     if (reinit) {
-         s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
-         s->capture.width = ff_v4l2_get_format_width(&cap_fmt);
-     }
-+
-+    // If we don't support selection (or it is bust) and we obviously have HD then kludge
-+    if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) &&
-+        (s->capture.height == 1088 && s->capture.width == 1920)) {
-+        s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080};
-+    }
-+
-     s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
- 
-     av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d\n",
-@@ -195,11 +207,11 @@ static int do_source_change(V4L2m2mContext * const s)
-            s->capture.selection.width, s->capture.selection.height,
-            s->capture.selection.left, s->capture.selection.top);
- 
--    s->reinit = 1;
--
-     if (reinit) {
-         if (avctx)
--            ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
-+            ret = ff_set_dimensions(s->avctx,
-+                                    s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width,
-+                                    s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height);
-         if (ret < 0)
-             av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
- 
-@@ -208,11 +220,22 @@ static int do_source_change(V4L2m2mContext * const s)
-             av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n");
-             return AVERROR(EINVAL);
-         }
-+
-+        // Update pixel format - should only actually do something on initial change
-+        s->capture.av_pix_fmt =
-+        ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
-+        if (s->output_drm) {
-+            avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
-+            avctx->sw_pix_fmt = s->capture.av_pix_fmt;
-+        }
-+        else
-+            avctx->pix_fmt = s->capture.av_pix_fmt;
-+
-         goto reinit_run;
-     }
- 
-     /* Buffers are OK so just stream off to ack */
--    av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only\n", __func__);
-+    av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__);
- 
-     ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
-     if (ret)
-@@ -225,54 +248,6 @@ reinit_run:
-     return 1;
- }
- 
--static int ctx_done(V4L2Context * const ctx)
--{
--    int rv = 0;
--    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
--
--    ctx->done = 1;
--
--    if (s->resize_pending && !V4L2_TYPE_IS_OUTPUT(ctx->type))
--        rv = do_source_change(s);
--
--    return rv;
--}
--
--/**
-- * handle resolution change event and end of stream event
-- * returns 1 if reinit was successful, negative if it failed
-- * returns 0 if reinit was not executed
-- */
--static int v4l2_handle_event(V4L2Context *ctx)
--{
--    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
--    struct v4l2_event evt = { 0 };
--    int ret;
--
--    ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt);
--    if (ret < 0) {
--        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name);
--        return 0;
--    }
--
--    av_log(logger(ctx), AV_LOG_INFO, "Dq event %d\n", evt.type);
--
--    if (evt.type == V4L2_EVENT_EOS) {
--//        ctx->done = 1;
--        av_log(logger(ctx), AV_LOG_TRACE, "%s VIDIOC_EVENT_EOS\n", ctx->name);
--        return 0;
--    }
--
--    if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
--        return 0;
--
--    s->resize_pending = 1;
--    if (!ctx->done)
--        return 0;
--
--    return do_source_change(s);
--}
--
- static int v4l2_stop_decode(V4L2Context *ctx)
- {
-     struct v4l2_decoder_cmd cmd = {
-@@ -313,243 +288,252 @@ static int v4l2_stop_encode(V4L2Context *ctx)
-     return 0;
- }
- 
--static int count_in_driver(const V4L2Context * const ctx)
-+// DQ a buffer
-+// Amalgamates all the various ways there are of signalling EOS/Event to
-+// generate a consistant EPIPE.
-+//
-+// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped)
-+//
-+// Returns:
-+//  0               Success
-+//  AVERROR(EPIPE)  Nothing more to read
-+//  *               AVERROR(..)
-+
-+ static int
-+dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf)
- {
--    int i;
--    int n = 0;
-+    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
-+    AVCodecContext * const avctx = m->avctx;
-+    V4L2Buffer * avbuf;
-+    const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type);
- 
--    if (!ctx->bufrefs)
--        return -1;
--
--    for (i = 0; i < ctx->num_buffers; ++i) {
--        V4L2Buffer *const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
--        if (avbuf->status == V4L2BUF_IN_DRIVER)
--            ++n;
--    }
--    return n;
--}
-+    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
- 
--static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
--{
--    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
--    const int is_capture = !V4L2_TYPE_IS_OUTPUT(ctx->type);
--    struct v4l2_plane planes[VIDEO_MAX_PLANES];
--    struct v4l2_buffer buf = { 0 };
--    V4L2Buffer *avbuf;
--    struct pollfd pfd = {
--        .events =  POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */
--        .fd = ctx_to_m2mctx(ctx)->fd,
-+    struct v4l2_buffer buf = {
-+        .type = ctx->type,
-+        .memory = V4L2_MEMORY_MMAP,
-     };
--    int i, ret;
--    int no_rx_means_done = 0;
--
--    if (is_capture && ctx->bufrefs) {
--        for (i = 0; i < ctx->num_buffers; i++) {
--            avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
--            if (avbuf->status == V4L2BUF_IN_DRIVER)
--                break;
--        }
--        if (i == ctx->num_buffers)
--            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers (%d) returned to "
--                                                "userspace. Increase num_capture_buffers "
--                                                "to prevent device deadlock or dropped "
--                                                "packets/frames.\n", i);
-+
-+    *ppavbuf = NULL;
-+
-+    if (ctx->flag_last)
-+        return AVERROR(EPIPE);
-+
-+    if (is_mp) {
-+        buf.length = VIDEO_MAX_PLANES;
-+        buf.m.planes = planes;
-     }
- 
--#if 0
--    // I think this is true but pointless
--    // we will get some other form of EOF signal
--
--    /* if we are draining and there are no more capture buffers queued in the driver we are done */
--    if (is_capture && ctx_to_m2mctx(ctx)->draining) {
--        for (i = 0; i < ctx->num_buffers; i++) {
--            /* capture buffer initialization happens during decode hence
--             * detection happens at runtime
--             */
--            if (!ctx->bufrefs)
--                break;
--
--            avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
--            if (avbuf->status == V4L2BUF_IN_DRIVER)
--                goto start;
-+    while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) {
-+        const int err = errno;
-+        av_assert0(AVERROR(err) < 0);
-+        if (err != EINTR) {
-+            av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
-+                ctx->name, av_err2str(AVERROR(err)));
-+
-+            if (err == EPIPE)
-+                ctx->flag_last = 1;
-+
-+            return AVERROR(err);
-         }
--        ctx->done = 1;
--        return NULL;
-     }
--#endif
--
--start:
--    if (is_capture) {
--        /* no need to listen to requests for more input while draining */
--        if (ctx_to_m2mctx(ctx)->draining || timeout > 0)
--            pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
--    } else {
--        pfd.events =  POLLOUT | POLLWRNORM;
-+    atomic_fetch_sub(&ctx->q_count, 1);
-+
-+    avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
-+    avbuf->status = V4L2BUF_AVAILABLE;
-+    avbuf->buf = buf;
-+    if (is_mp) {
-+        memcpy(avbuf->planes, planes, sizeof(planes));
-+        avbuf->buf.m.planes = avbuf->planes;
-     }
--    no_rx_means_done = s->resize_pending && is_capture;
- 
--    for (;;) {
--        // If we have a resize pending then all buffers should be Qed
--        // With a resize pending we should be in drain but evidence suggests
--        // that not all decoders do this so poll to clear
--        int t2 = no_rx_means_done ? 0 : timeout < 0 ? 3000 : timeout;
--        const int e = pfd.events;
--
--        ret = poll(&pfd, 1, t2);
-+    if (V4L2_TYPE_IS_CAPTURE(ctx->type)) {
-+        // Zero length cap buffer return == EOS
-+        if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) {
-+            av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n");
- 
--        if (ret > 0)
--            break;
-+            // Must reQ so we don't leak
-+            // May not matter if the next thing we do is release all the
-+            // buffers but better to be tidy.
-+            ff_v4l2_buffer_enqueue(avbuf);
- 
--        if (ret < 0) {
--            int err = errno;
--            if (err == EINTR)
--                continue;
--            av_log(logger(ctx), AV_LOG_ERROR, "=== poll error %d (%s): events=%#x, cap buffers=%d\n",
--                   err, strerror(err),
--                   e, count_in_driver(ctx));
--            return NULL;
-+            ctx->flag_last = 1;
-+            return AVERROR(EPIPE);
-         }
- 
--        // ret == 0 (timeout)
--        if (no_rx_means_done) {
--            av_log(logger(ctx), AV_LOG_DEBUG, "Ctx done on timeout\n");
--            ret = ctx_done(ctx);
--            if (ret > 0)
--                goto start;
--        }
--        if (timeout == -1)
--            av_log(logger(ctx), AV_LOG_ERROR, "=== poll unexpected TIMEOUT: events=%#x, cap buffers=%d\n", e, count_in_driver(ctx));;
--        return NULL;
-+#ifdef V4L2_BUF_FLAG_LAST
-+        // If flag_last set then this contains data but is the last frame
-+        // so remember that but return OK
-+        if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0)
-+            ctx->flag_last = 1;
-+#endif
-     }
- 
--    /* 0. handle errors */
--    if (pfd.revents & POLLERR) {
--        /* if we are trying to get free buffers but none have been queued yet
--           no need to raise a warning */
--        if (timeout == 0) {
--            for (i = 0; i < ctx->num_buffers; i++) {
--                avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
--                if (avbuf->status != V4L2BUF_AVAILABLE)
--                    av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
--            }
--        }
--        else
--            av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
-+    *ppavbuf = avbuf;
-+    return 0;
-+}
- 
--        return NULL;
--    }
-+/**
-+ * handle resolution change event and end of stream event
-+ * Expects to be called after the stream has stopped
-+ *
-+ * returns 1 if reinit was successful, negative if it failed
-+ * returns 0 if reinit was not executed
-+ */
-+static int
-+get_event(V4L2m2mContext * const m)
-+{
-+    AVCodecContext * const avctx = m->avctx;
-+    struct v4l2_event evt = { 0 };
- 
--    /* 1. handle resolution changes */
--    if (pfd.revents & POLLPRI) {
--        ret = v4l2_handle_event(ctx);
--        if (ret < 0) {
--            /* if re-init failed, abort */
--            ctx->done = 1;
--            return NULL;
-+    while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) {
-+        const int rv = AVERROR(errno);
-+        if (rv == AVERROR(EINTR))
-+            continue;
-+        if (rv == AVERROR(EAGAIN)) {
-+            av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n");
-+            return AVERROR_EOF;
-         }
--        if (ret > 0)
--            goto start;
-+        av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv));
-+        return rv;
-+    }
-+
-+    av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type);
-+
-+    if (evt.type == V4L2_EVENT_EOS) {
-+        av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n");
-+        return AVERROR_EOF;
-     }
- 
--    /* 2. dequeue the buffer */
--    if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) {
-+    if (evt.type == V4L2_EVENT_SOURCE_CHANGE)
-+        return do_source_change(m);
- 
--        if (is_capture) {
--            /* there is a capture buffer ready */
--            if (pfd.revents & (POLLIN | POLLRDNORM))
--                goto dequeue;
-+    return 0;
-+}
- 
--            // CAPTURE Q drained
--            if (no_rx_means_done) {
--                if (ctx_done(ctx) > 0)
--                    goto start;
--                return NULL;
--            }
- 
--            /* the driver is ready to accept more input; instead of waiting for the capture
--             * buffer to complete we return NULL so input can proceed (we are single threaded)
--             */
--            if (pfd.revents & (POLLOUT | POLLWRNORM))
--                return NULL;
-+// Get a buffer
-+// If output then just gets the buffer in the expected way
-+// If capture then runs the capture state m/c to deal with res change etc.
-+// If return value == 0 then *ppavbuf != NULL
-+
-+static int
-+get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout)
-+{
-+    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
-+    AVCodecContext * const avctx = m->avctx;
-+    const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type);
-+
-+    const unsigned int poll_cap = (POLLIN | POLLRDNORM);
-+    const unsigned int poll_out = (POLLOUT | POLLWRNORM);
-+    const unsigned int poll_event = POLLPRI;
-+
-+    *ppavbuf = NULL;
-+
-+    for (;;) {
-+        struct pollfd pfd = {
-+            .fd = m->fd,
-+            // If capture && stream not started then assume we are waiting for the initial event
-+            .events = !is_cap ? poll_out :
-+                !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap :
-+                    poll_event,
-+        };
-+        int ret;
-+
-+        if (ctx->done) {
-+            av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name);
-+            return AVERROR_EOF;
-         }
- 
--dequeue:
--        memset(&buf, 0, sizeof(buf));
--        buf.memory = V4L2_MEMORY_MMAP;
--        buf.type = ctx->type;
--        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
--            memset(planes, 0, sizeof(planes));
--            buf.length = VIDEO_MAX_PLANES;
--            buf.m.planes = planes;
-+        // If capture && timeout == -1 then also wait for rx buffer free
-+        if (is_cap && timeout == -1 && m->output.streamon && !m->draining)
-+            pfd.events |= poll_out;
-+
-+        // If nothing Qed all we will get is POLLERR - avoid that
-+        if ((pfd.events == poll_out && atomic_load(&m->output.q_count) == 0) ||
-+            (pfd.events == poll_cap && atomic_load(&m->capture.q_count) == 0) ||
-+            (pfd.events == (poll_cap | poll_out) && atomic_load(&m->capture.q_count) == 0 && atomic_load(&m->output.q_count) == 0)) {
-+            av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name);
-+            return AVERROR(EAGAIN);
-         }
- 
--        while ((ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf)) == -1) {
--            const int err = errno;
--            if (err == EINTR)
-+        // Timeout kludged s.t. "forever" eventually gives up & produces logging
-+        // If waiting for an event when we have seen a last_frame then we expect
-+        //   it to be ready already so force a short timeout
-+        ret = poll(&pfd, 1,
-+                   ff_v4l2_ctx_eos(ctx) ? 10 :
-+                   timeout == -1 ? 3000 : timeout);
-+        if (ret < 0) {
-+            ret = AVERROR(errno);  // Remember errno before logging etc.
-+            av_assert0(ret < 0);
-+        }
-+
-+        av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n",
-+               ctx->name, ret, timeout, pfd.events, pfd.revents);
-+
-+        if (ret < 0) {
-+            if (ret == AVERROR(EINTR))
-                 continue;
--            if (err != EAGAIN) {
--                // EPIPE on CAPTURE can be used instead of BUF_FLAG_LAST
--                if (err != EPIPE || !is_capture)
--                    av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
--                        ctx->name, av_err2str(AVERROR(err)));
--                if (ctx_done(ctx) > 0)
--                    goto start;
-+            av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret));
-+            return ret;
-+        }
-+
-+        if (ret == 0) {
-+            if (timeout == -1)
-+                av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events);
-+            if (ff_v4l2_ctx_eos(ctx)) {
-+                av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name);
-+                ret = get_event(m);
-+                if (ret < 0) {
-+                    ctx->done = 1;
-+                    return ret;
-+                }
-             }
--            return NULL;
-+            return AVERROR(EAGAIN);
-         }
--        --ctx->q_count;
--        av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d field=%d\n",
--               ctx->name, buf.index,
--               buf.timestamp.tv_sec, buf.timestamp.tv_usec,
--               ctx->q_count, ++ctx->dq_count, buf.field);
--
--        avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
--        avbuf->status = V4L2BUF_AVAILABLE;
--        avbuf->buf = buf;
--        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
--            memcpy(avbuf->planes, planes, sizeof(planes));
--            avbuf->buf.m.planes = avbuf->planes;
-+
-+        if ((pfd.revents & POLLERR) != 0) {
-+            av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name);
-+            return AVERROR_UNKNOWN;
-         }
- 
--        if (ctx_to_m2mctx(ctx)->draining && is_capture) {
--            int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ?
--                            buf.m.planes[0].bytesused : buf.bytesused;
--            if (bytesused == 0) {
--                av_log(logger(ctx), AV_LOG_DEBUG, "Buffer empty - reQ\n");
-+        if ((pfd.revents & poll_event) != 0) {
-+            ret = get_event(m);
-+            if (ret < 0) {
-+                ctx->done = 1;
-+                return ret;
-+            }
-+            continue;
-+        }
- 
--                // Must reQ so we don't leak
--                // May not matter if the next thing we do is release all the
--                // buffers but better to be tidy.
--                ff_v4l2_buffer_enqueue(avbuf);
-+        if ((pfd.revents & poll_cap) != 0) {
-+            ret = dq_buf(ctx, ppavbuf);
-+            if (ret == AVERROR(EPIPE))
-+                continue;
-+            return ret;
-+        }
- 
--                if (ctx_done(ctx) > 0)
--                    goto start;
--                return NULL;
--            }
--#ifdef V4L2_BUF_FLAG_LAST
--            if (buf.flags & V4L2_BUF_FLAG_LAST) {
--                av_log(logger(ctx), AV_LOG_TRACE, "FLAG_LAST set\n");
--                avbuf->status = V4L2BUF_IN_USE;  // Avoid flushing this buffer
--                ctx_done(ctx);
--            }
--#endif
-+        if ((pfd.revents & poll_out) != 0) {
-+            if (is_cap)
-+                return AVERROR(EAGAIN);
-+            return dq_buf(ctx, ppavbuf);
-         }
- 
--        return avbuf;
-+        av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents);
-+        return AVERROR_UNKNOWN;
-     }
--
--    return NULL;
- }
- 
- static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
- {
--    int timeout = 0; /* return when no more buffers to dequeue */
-     int i;
- 
-     /* get back as many output buffers as possible */
-     if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
--          do {
--          } while (v4l2_dequeue_v4l2buf(ctx, timeout));
-+        V4L2Buffer * avbuf;
-+        do {
-+            get_qbuf(ctx, &avbuf, 0);
-+        } while (avbuf);
-     }
- 
-     for (i = 0; i < ctx->num_buffers; i++) {
-@@ -722,7 +706,7 @@ static void flush_all_buffers_status(V4L2Context* const ctx)
-         if (buf->status == V4L2BUF_IN_DRIVER)
-             buf->status = V4L2BUF_AVAILABLE;
-     }
--    ctx->q_count = 0;
-+    atomic_store(&ctx->q_count, 0);
- }
- 
- static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
-@@ -755,6 +739,10 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
-     int ret;
-     AVCodecContext * const avctx = logger(ctx);
- 
-+    // Avoid doing anything if there is nothing we can do
-+    if (cmd == VIDIOC_STREAMOFF && !ctx_buffers_alloced(ctx) && !ctx->streamon)
-+        return 0;
-+
-     ff_mutex_lock(&ctx->lock);
- 
-     if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type))
-@@ -777,6 +765,9 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
-                cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF");
-     }
- 
-+    // Both stream off & on effectively clear flag_last
-+    ctx->flag_last = 0;
-+
-     ff_mutex_unlock(&ctx->lock);
- 
-     return ret;
-@@ -840,19 +831,10 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
- int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
- {
-     V4L2Buffer *avbuf;
-+    int rv;
- 
--    /*
--     * timeout=-1 blocks until:
--     *  1. decoded frame available
--     *  2. an input buffer is ready to be dequeued
--     */
--    avbuf = v4l2_dequeue_v4l2buf(ctx, timeout);
--    if (!avbuf) {
--        if (ctx->done)
--            return AVERROR_EOF;
--
--        return AVERROR(EAGAIN);
--    }
-+    if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
-+        return rv;
- 
-     return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
- }
-@@ -860,19 +842,10 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
- int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
- {
-     V4L2Buffer *avbuf;
-+    int rv;
- 
--    /*
--     * blocks until:
--     *  1. encoded packet available
--     *  2. an input buffer ready to be dequeued
--     */
--    avbuf = v4l2_dequeue_v4l2buf(ctx, -1);
--    if (!avbuf) {
--        if (ctx->done)
--            return AVERROR_EOF;
--
--        return AVERROR(EAGAIN);
--    }
-+    if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0)
-+        return rv;
- 
-     return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
- }
-@@ -956,6 +929,8 @@ static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers
-     int ret;
-     int i;
- 
-+    av_assert0(ctx->bufrefs == NULL);
-+
-     memset(&req, 0, sizeof(req));
-     req.count = req_buffers;
-     req.memory = V4L2_MEMORY_MMAP;
-@@ -1033,8 +1008,8 @@ int ff_v4l2_context_init(V4L2Context* ctx)
-         hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
-         hwframes->format = AV_PIX_FMT_DRM_PRIME;
-         hwframes->sw_format = ctx->av_pix_fmt;
--        hwframes->width = ctx->width;
--        hwframes->height = ctx->height;
-+        hwframes->width = ctx->width != 0 ? ctx->width : s->avctx->width;
-+        hwframes->height = ctx->height != 0 ? ctx->height : s->avctx->height;
-         ret = av_hwframe_ctx_init(ctx->frames_ref);
-         if (ret < 0)
-             goto fail_unref_hwframes;
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index a4176448d595..565858a1ed17 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -102,6 +102,8 @@ typedef struct V4L2Context {
-      */
-     int done;
- 
-+    int flag_last;
-+
-     /**
-      * PTS rescale not wanted
-      * If the PTS is just a dummy frame count then rescale is
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index 516e6d98583d..e26bd74c3e9a 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -235,7 +235,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
- 
-     /* 5. complete reinit */
-     s->draining = 0;
--    s->reinit = 0;
- 
-     return 0;
- }
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 3f8680962342..d71f6b721c94 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -84,8 +84,6 @@ typedef struct V4L2m2mContext {
-     AVCodecContext *avctx;
-     sem_t refsync;
-     atomic_uint refcount;
--    int reinit;
--    int resize_pending;
- 
-     /* null frame/packet received */
-     int draining;
-@@ -180,15 +178,25 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx);
- int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
- 
- 
--static inline unsigned int ff_v4l2_get_format_width(struct v4l2_format *fmt)
-+static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt)
- {
-     return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
- }
- 
--static inline unsigned int ff_v4l2_get_format_height(struct v4l2_format *fmt)
-+static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt)
- {
-     return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
- }
- 
-+static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt)
-+{
-+    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
-+}
-+
-+static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx)
-+{
-+    return ctx->flag_last;
-+}
-+
- 
- #endif /* AVCODEC_V4L2_M2M_H */
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 09ec4963517b..e4b6569ba5bd 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -113,9 +113,6 @@ static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *co
-     if (ret < 0)
-         av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context\n");
- 
--    if (!s->capture.streamon || ret < 0)
--        return ret;
--
-     ret = ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd);
-     if (ret < 0)
-         av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno);
-@@ -127,69 +124,12 @@ static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *co
- 
- static int v4l2_try_start(AVCodecContext *avctx)
- {
--    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
--    V4L2Context *const capture = &s->capture;
--    struct v4l2_selection selection = { 0 };
-+    V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-     int ret;
- 
-     /* 1. start the output process */
-     if ((ret = check_output_streamon(avctx, s)) != 0)
-         return ret;
--
--    if (capture->streamon)
--        return 0;
--
--    /* 2. get the capture format */
--    capture->format.type = capture->type;
--    ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format);
--    if (ret) {
--        av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n");
--        return ret;
--    }
--
--    /* 2.1 update the AVCodecContext */
--    capture->av_pix_fmt =
--        ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
--    if (s->output_drm) {
--        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
--        avctx->sw_pix_fmt = capture->av_pix_fmt;
--    }
--    else
--        avctx->pix_fmt = capture->av_pix_fmt;
--
--    /* 3. set the crop parameters */
--#if 1
--    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
--    selection.target = V4L2_SEL_TGT_CROP_DEFAULT;
--    ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
--    av_log(avctx, AV_LOG_INFO, "Post G selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height);
--#else
--    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
--    selection.r.height = avctx->coded_height;
--    selection.r.width = avctx->coded_width;
--    av_log(avctx, AV_LOG_INFO, "Try selection %dx%d\n", avctx->coded_width, avctx->coded_height);
--    ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
--    av_log(avctx, AV_LOG_INFO, "Post S selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height);
--    if (1) {
--        ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
--        if (ret) {
--            av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
--        } else {
--            av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height);
--            /* update the size of the resulting frame */
--            capture->height = selection.r.height;
--            capture->width  = selection.r.width;
--        }
--    }
--#endif
--
--    /* 5. start the capture process */
--    ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
--    if (ret) {
--        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n");
--        return ret;
--    }
--
-     return 0;
- }
- 
-@@ -364,7 +304,7 @@ xlat_pending(const xlat_track_t * const x)
- }
- 
- static inline int stream_started(const V4L2m2mContext * const s) {
--    return s->capture.streamon && s->output.streamon;
-+    return s->output.streamon;
- }
- 
- #define NQ_OK        0
-@@ -377,6 +317,9 @@ static inline int stream_started(const V4L2m2mContext * const s) {
- #define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
- #define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE)
- 
-+// do_not_get      If true then no new packet will be got but status will
-+//                  be set appropriately
-+
- // AVERROR_EOF     Flushing an already flushed stream
- // -ve             Error (all errors except EOF are unexpected)
- // NQ_OK (0)       OK
-@@ -386,14 +329,14 @@ static inline int stream_started(const V4L2m2mContext * const s) {
- // NQ_DRAINING     At EOS, dQ dest until EOS there too
- // NQ_DEAD         Not running (do not retry, do not attempt capture dQ)
- 
--static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s)
-+static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get)
- {
-     int ret;
- 
-     // If we don't already have a coded packet - get a new one
-     // We will already have a coded pkt if the output Q was full last time we
-     // tried to Q it
--    if (!s->buf_pkt.size) {
-+    if (!s->buf_pkt.size && !do_not_get) {
-         ret = ff_decode_get_packet(avctx, &s->buf_pkt);
- 
-         if (ret == AVERROR(EAGAIN)) {
-@@ -435,6 +378,17 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-         xlat_pts_in(avctx, &s->xlat, &s->buf_pkt);
-     }
- 
-+    if (s->draining) {
-+        if (s->buf_pkt.size) {
-+            av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n");
-+            av_packet_unref(&s->buf_pkt);
-+        }
-+        return NQ_DRAINING;
-+    }
-+
-+    if (!s->buf_pkt.size)
-+        return NQ_NONE;
-+
-     if ((ret = check_output_streamon(avctx, s)) != 0)
-         return ret;
- 
-@@ -471,7 +425,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
- static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- {
-     V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
--    int src_rv = NQ_NONE;
-+    int src_rv;
-     int dst_rv = 1;  // Non-zero (done), non-negative (error) number
-     unsigned int i = 0;
- 
-@@ -483,31 +437,40 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-         // (a) We don't have a lot of stuff in the buffer already OR
-         // (b) ... we (think we) do but we've failed to get a frame already OR
-         // (c) We've dequeued a lot of frames without asking for input
--        if (!prefer_dq || i != 0 || s->req_pkt > 2) {
--            src_rv = try_enqueue_src(avctx, s);
--
--            // If we got a frame last time or we've already tried to get a frame and
--            // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN)
--            // indicating that we want more input.
--            // This should mean that once decode starts we enter a stable state where
--            // we alternately ask for input and produce output
--            if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
--                break;
--        }
-+        src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2));
-+
-+        // If we got a frame last time or we've already tried to get a frame and
-+        // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN)
-+        // indicating that we want more input.
-+        // This should mean that once decode starts we enter a stable state where
-+        // we alternately ask for input and produce output
-+        if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
-+            break;
- 
-         // Try to get a new frame if
-         // (a) we haven't already got one AND
-         // (b) enqueue returned a status indicating that decode should be attempted
-         if (dst_rv != 0 && TRY_DQ(src_rv)) {
-+            // Pick a timeout depending on state
-+            const int t =
-+                src_rv == NQ_DRAINING ? 300 :
-+                prefer_dq ? 5 :
-+                src_rv == NQ_Q_FULL ? -1 : 0;
-+
-             do {
-                 // Dequeue frame will unref any previous contents of frame
-                 // if it returns success so we don't need an explicit unref
-                 // when discarding
-                 // This returns AVERROR(EAGAIN) on timeout or if
-                 // there is room in the input Q and timeout == -1
--                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, prefer_dq ? 5 : -1);
-+                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
- 
--                if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
-+                if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
-+                    av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
-+                    dst_rv = AVERROR_EOF;
-+                    s->capture.done = 1;
-+                }
-+                else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
-                     av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
-                            s->draining, s->capture.done);
-                 else if (dst_rv && dst_rv != AVERROR(EAGAIN))
-@@ -630,8 +593,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-      * by the v4l2 driver; this event will trigger a full pipeline reconfig and
-      * the proper values will be retrieved from the kernel driver.
-      */
--    output->height = capture->height = avctx->coded_height;
--    output->width = capture->width = avctx->coded_width;
-+//    output->height = capture->height = avctx->coded_height;
-+//    output->width = capture->width = avctx->coded_width;
-+    output->height = capture->height = 0;
-+    output->width = capture->width = 0;
- 
-     output->av_codec_id = avctx->codec_id;
-     output->av_pix_fmt  = AV_PIX_FMT_NONE;
-@@ -703,7 +668,6 @@ static void v4l2_decode_flush(AVCodecContext *avctx)
-     V4L2m2mContext * const s = priv->context;
-     V4L2Context * const output = &s->output;
-     V4L2Context * const capture = &s->capture;
--    int ret;
- 
-     av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
- 
-@@ -711,13 +675,19 @@ static void v4l2_decode_flush(AVCodecContext *avctx)
-     // states like EOS processing so don't try to optimize out (having got it
-     // wrong once)
- 
--    ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
--    if (ret < 0)
--        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s error: %d\n", output->name, ret);
-+    ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
- 
-     // Clear any buffered input packet
-     av_packet_unref(&s->buf_pkt);
- 
-+    // Clear a pending EOS
-+    if (ff_v4l2_ctx_eos(capture)) {
-+        // Arguably we could delay this but this is easy and doesn't require
-+        // thought or extra vars
-+        ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF);
-+        ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
-+    }
-+
-     // V4L2 makes no guarantees about whether decoded frames are flushed or not
-     // so mark all frames we are tracking to be discarded if they appear
-     xlat_flush(&s->xlat);
-
-From 935dad1739bafaa8bf8e24d9461207b71af0d617 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 9 Dec 2021 18:51:00 +0000
-Subject: [PATCH 039/186] Honor result of ff_get_format if possible
-
----
- libavcodec/v4l2_m2m_dec.c | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index e4b6569ba5bd..c9655bcc3b43 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -615,15 +615,19 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-      *       check the v4l2_get_drm_frame function.
-      */
- 
-+    avctx->sw_pix_fmt = avctx->pix_fmt;
-     gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
-     av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s); get_format requested=%d (%s)\n",
-            avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
- 
--    s->output_drm = 0;
-     if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
-         avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
-         s->output_drm = 1;
-     }
-+    else {
-+        capture->av_pix_fmt = gf_pix_fmt;
-+        s->output_drm = 0;
-+    }
- 
-     s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
-     if (!s->device_ref) {
-
-From 18e485cf1252bba30cfd5feef626ad9d90fcde6a Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 14 Dec 2021 16:11:10 +0000
-Subject: [PATCH 040/186] Add an always-reinit quirk
-
----
- libavcodec/v4l2_context.c |  7 +++++--
- libavcodec/v4l2_m2m.h     |  5 +++++
- libavcodec/v4l2_m2m_dec.c | 33 ++++++++++++++++++++++++++++++++-
- 3 files changed, 42 insertions(+), 3 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index d765181645fb..c11b5e68637d 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -188,6 +188,9 @@ static int do_source_change(V4L2m2mContext * const s)
-     get_default_selection(&s->capture, &s->capture.selection);
- 
-     reinit = ctx_resolution_changed(&s->capture, &cap_fmt);
-+    if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0)
-+        reinit = 1;
-+
-     s->capture.format = cap_fmt;
-     if (reinit) {
-         s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
-@@ -202,10 +205,10 @@ static int do_source_change(V4L2m2mContext * const s)
- 
-     s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
- 
--    av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d\n",
-+    av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d, reinit=%d\n",
-            s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
-            s->capture.selection.width, s->capture.selection.height,
--           s->capture.selection.left, s->capture.selection.top);
-+           s->capture.selection.left, s->capture.selection.top, reinit);
- 
-     if (reinit) {
-         if (avctx)
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index d71f6b721c94..f1923bb26d57 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -113,6 +113,11 @@ typedef struct V4L2m2mContext {
- 
-     /* Ext data sent */
-     int extdata_sent;
-+
-+#define FF_V4L2_QUIRK_REINIT_ALWAYS     1
-+    /* Quirks */
-+    unsigned int quirks;
-+
- } V4L2m2mContext;
- 
- typedef struct V4L2m2mPriv {
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index c9655bcc3b43..e2b10f5e3ac3 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -540,6 +540,34 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- }
- #endif
- 
-+static int
-+get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s)
-+{
-+    struct v4l2_capability cap;
-+
-+    memset(&cap, 0, sizeof(cap));
-+    while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) {
-+        int err = errno;
-+        if (err == EINTR)
-+            continue;
-+        av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err));
-+        return AVERROR(err);
-+    }
-+
-+    // Could be made table driven if we have a few more but right now there
-+    // seems no point
-+
-+    // Meson (amlogic) always gives a resolution changed event after output
-+    // streamon and userspace must (re)allocate capture buffers and streamon
-+    // capture to clear the event even if the capture buffers were the right
-+    // size in the first place.
-+    if (strcmp(cap.driver, "meson-vdec") == 0)
-+        s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS;
-+
-+    av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks);
-+    return 0;
-+}
-+
- // This heuristic is for H264 but use for everything
- static uint32_t max_coded_size(const AVCodecContext * const avctx)
- {
-@@ -646,7 +674,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-         return ret;
-     }
- 
--    return v4l2_prepare_decoder(s);
-+    if ((ret = v4l2_prepare_decoder(s)) < 0)
-+        return ret;
-+
-+    return get_quirks(avctx, s);
- }
- 
- static av_cold int v4l2_decode_close(AVCodecContext *avctx)
-
-From 9536a97e9a1119192cdb67b33799e68f39ce7630 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 4 Jan 2022 16:58:31 +0000
-Subject: [PATCH 041/186] v4l2_buffers: rework flags for keyframe
-
-Previously flags could become confused and keyframe info could be lost.
-This fixes that and removes the duplicate flags field in V4L2Buffer.
----
- libavcodec/v4l2_buffers.c | 15 ++++++++++-----
- libavcodec/v4l2_buffers.h |  1 -
- libavcodec/v4l2_context.c | 18 +++++++++++++++++-
- 3 files changed, 27 insertions(+), 7 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 2cf7be663263..62d1c2605363 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -680,7 +680,9 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
- 
- int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
- {
--    out->buf.flags = frame->key_frame ? (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME) : (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME);
-+    out->buf.flags = frame->key_frame ?
-+        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
-+        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
-     // Beware that colour info is held in format rather than the actual
-     // v4l2 buffer struct so this may not be as useful as you might hope
-     v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
-@@ -706,6 +708,10 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
- 
-     /* 2. get frame information */
-     frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME);
-+    frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I :
-+        (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P :
-+        (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B :
-+            AV_PICTURE_TYPE_NONE;
-     frame->color_primaries = v4l2_get_color_primaries(avbuf);
-     frame->colorspace = v4l2_get_color_space(avbuf);
-     frame->color_range = v4l2_get_color_range(avbuf);
-@@ -779,8 +785,9 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
- 
-     v4l2_set_pts(out, pkt->pts);
- 
--    if (pkt->flags & AV_PKT_FLAG_KEY)
--        out->flags = V4L2_BUF_FLAG_KEYFRAME;
-+    out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ?
-+        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
-+        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
- 
-     return ret;
- }
-@@ -924,8 +931,6 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
-     int ret;
-     int qc;
- 
--    avbuf->buf.flags = avbuf->flags;
--
-     if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) {
-         av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
-                avbuf->context->name, avbuf->buf.index,
-diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
-index 641e0e147b19..3b7ca4d99e1e 100644
---- a/libavcodec/v4l2_buffers.h
-+++ b/libavcodec/v4l2_buffers.h
-@@ -73,7 +73,6 @@ typedef struct V4L2Buffer {
-     struct v4l2_buffer buf;
-     struct v4l2_plane planes[VIDEO_MAX_PLANES];
- 
--    int flags;
-     enum V4L2Buffer_status status;
- 
- } V4L2Buffer;
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index c11b5e68637d..53b522d43e09 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -527,6 +527,22 @@ get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout
-     }
- }
- 
-+// Clear out flags and timestamps that should should be set by the user
-+// Returns the passed avbuf
-+static V4L2Buffer *
-+clean_v4l2_buffer(V4L2Buffer * const avbuf)
-+{
-+    struct v4l2_buffer *const buf = &avbuf->buf;
-+
-+    buf->flags = 0;
-+    buf->field = V4L2_FIELD_ANY;
-+    buf->timestamp = (struct timeval){0};
-+    buf->timecode = (struct v4l2_timecode){0};
-+    buf->sequence = 0;
-+
-+    return avbuf;
-+}
-+
- static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
- {
-     int i;
-@@ -542,7 +558,7 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
-     for (i = 0; i < ctx->num_buffers; i++) {
-         V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
-         if (avbuf->status == V4L2BUF_AVAILABLE)
--            return avbuf;
-+            return clean_v4l2_buffer(avbuf);
-     }
- 
-     return NULL;
-
-From a1280d98cefbf5ef7d92a51261ad3485e0a2ca74 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 22 Mar 2022 11:44:30 +0000
-Subject: [PATCH 042/186] v4l2m2m: Rework decode to wait for missing buffer,
- add dynamic pending
-
-Previously receive_frame exited with EAGAIN if no capture buffer
-availble in the Q.  Now it waits in the hope that another thread will
-post one.
-
-The prefer dQ logic is now dynamic to help with cases where PTS/DTS
-lies.  If it looks like we are never getting a frame then the
-threshold is increased.  It then slowly decays over time to cope with
-false alarms.
----
- libavcodec/v4l2_buffers.c |  6 +++--
- libavcodec/v4l2_context.c |  7 +++--
- libavcodec/v4l2_context.h |  3 +++
- libavcodec/v4l2_m2m.h     |  2 ++
- libavcodec/v4l2_m2m_dec.c | 57 +++++++++++++++++++++++++++++++++++++--
- 5 files changed, 69 insertions(+), 6 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 62d1c2605363..8c4f18dbede2 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -947,12 +947,14 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
-         return AVERROR(err);
-     }
- 
-+    // Lock not wanted - if called from buffer free then lock already obtained
-     qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1;
-+    avbuf->status = V4L2BUF_IN_DRIVER;
-+    pthread_cond_broadcast(&avbuf->context->cond);
-+
-     av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
-            avbuf->context->name, avbuf->buf.index,
-            avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc);
- 
--    avbuf->status = V4L2BUF_IN_DRIVER;
--
-     return 0;
- }
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 53b522d43e09..7ddb7598109c 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -300,6 +300,7 @@ static int v4l2_stop_encode(V4L2Context *ctx)
- // Returns:
- //  0               Success
- //  AVERROR(EPIPE)  Nothing more to read
-+//  AVERROR(ENOSPC) No buffers in Q to put result in
- //  *               AVERROR(..)
- 
-  static int
-@@ -457,7 +458,7 @@ get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout
-             (pfd.events == poll_cap && atomic_load(&m->capture.q_count) == 0) ||
-             (pfd.events == (poll_cap | poll_out) && atomic_load(&m->capture.q_count) == 0 && atomic_load(&m->output.q_count) == 0)) {
-             av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name);
--            return AVERROR(EAGAIN);
-+            return AVERROR(ENOSPC);
-         }
- 
-         // Timeout kludged s.t. "forever" eventually gives up & produces logging
-@@ -864,7 +865,7 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
-     int rv;
- 
-     if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0)
--        return rv;
-+        return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
- 
-     return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
- }
-@@ -938,6 +939,7 @@ void ff_v4l2_context_release(V4L2Context* ctx)
-     av_buffer_unref(&ctx->frames_ref);
- 
-     ff_mutex_destroy(&ctx->lock);
-+    pthread_cond_destroy(&ctx->cond);
- }
- 
- 
-@@ -1013,6 +1015,7 @@ int ff_v4l2_context_init(V4L2Context* ctx)
-     }
- 
-     ff_mutex_init(&ctx->lock, NULL);
-+    pthread_cond_init(&ctx->cond, NULL);
-     atomic_init(&ctx->q_count, 0);
- 
-     if (s->output_drm) {
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 565858a1ed17..0efff58f1892 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -116,6 +116,7 @@ typedef struct V4L2Context {
-     struct ff_weak_link_master *wl_master;
- 
-     AVMutex lock;
-+    pthread_cond_t cond;
- } V4L2Context;
- 
- /**
-@@ -182,6 +183,8 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
-  * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
-  *
-  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
-+ *                AVERROR(ENOSPC) if no buffer availible to put
-+ *                the frame in
-  */
- int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
- 
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index f1923bb26d57..9a20447030e2 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -105,6 +105,8 @@ typedef struct V4L2m2mContext {
- 
-     /* Frame tracking */
-     xlat_track_t xlat;
-+    int pending_hw;
-+    int pending_n;
- 
-     pts_stats_t pts_stat;
- 
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index e2b10f5e3ac3..2e30449dfc1b 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -251,7 +251,8 @@ xlat_pts_out(AVCodecContext *const avctx,
- 
-     frame->best_effort_timestamp = pts_stats_guess(ps);
-     frame->pkt_dts               = frame->pts;  // We can't emulate what s/w does in a useful manner?
--    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
-+    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
-+           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
-     return 0;
- }
- 
-@@ -422,6 +423,36 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-     return ret;
- }
- 
-+static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
-+{
-+    int rv = 0;
-+
-+    ff_mutex_lock(&ctx->lock);
-+
-+    while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) {
-+        if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) {
-+            rv = AVERROR(errno);
-+            av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv));
-+            break;
-+        }
-+    }
-+
-+    ff_mutex_unlock(&ctx->lock);
-+    return rv;
-+}
-+
-+// Number of frames over what xlat_pending returns that we keep *16
-+// This is a min value - if it appears to be too small the threshold should
-+// adjust dynamically.
-+#define PENDING_HW_MIN      (3 * 16)
-+// Offset to use when setting dynamically
-+// Set to %16 == 15 to avoid the threshold changing immediately as we relax
-+#define PENDING_HW_OFFSET   (PENDING_HW_MIN - 1)
-+// Number of consecutive times we've failed to get a frame when we prefer it
-+// before we increase the prefer threshold (5ms * N = max expected decode
-+// time)
-+#define PENDING_N_THRESHOLD 6
-+
- static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- {
-     V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-@@ -431,7 +462,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- 
-     do {
-         const int pending = xlat_pending(&s->xlat);
--        const int prefer_dq = (pending > 5);
-+        const int prefer_dq = (pending > s->pending_hw / 16);
- 
-         // Enqueue another pkt for decode if
-         // (a) We don't have a lot of stuff in the buffer already OR
-@@ -465,6 +496,27 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-                 // there is room in the input Q and timeout == -1
-                 dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
- 
-+                // Failure due to no buffer in Q?
-+                if (dst_rv == AVERROR(ENOSPC)) {
-+                    // Wait & retry
-+                    if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
-+                        dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
-+                    }
-+                }
-+
-+                // Adjust dynamic pending threshold
-+                if (dst_rv == 0) {
-+                    if (--s->pending_hw < PENDING_HW_MIN)
-+                        s->pending_hw = PENDING_HW_MIN;
-+                    s->pending_n = 0;
-+                }
-+                else if (dst_rv == AVERROR(EAGAIN)) {
-+                    if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) {
-+                        s->pending_hw = pending * 16 + PENDING_HW_OFFSET;
-+                        s->pending_n = 0;
-+                    }
-+                }
-+
-                 if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
-                     av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
-                     dst_rv = AVERROR_EOF;
-@@ -613,6 +665,7 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- 
-     xlat_init(&s->xlat);
-     pts_stats_init(&s->pts_stat, avctx, "decoder");
-+    s->pending_hw = PENDING_HW_MIN;
- 
-     capture = &s->capture;
-     output = &s->output;
-
-From b88b2c555f42688db681aab4d612c29d862246f7 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 25 Mar 2022 15:37:58 +0000
-Subject: [PATCH 043/186] v4l2_m2m2_dec: Avoid loop if unable to resize buffers
-
-If source change signals a buffer size that cannot be honored give up
-rather than looping indefinitely.  This happens on Pi if (say) a
-2560x1440 h264 stream is presented to the decode.
----
- libavcodec/v4l2_context.c | 13 +++++++++++--
- 1 file changed, 11 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 7ddb7598109c..007a58c8f1db 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -205,8 +205,9 @@ static int do_source_change(V4L2m2mContext * const s)
- 
-     s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
- 
--    av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d, reinit=%d\n",
-+    av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n",
-            s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
-+           s->capture.width, s->capture.height,
-            s->capture.selection.width, s->capture.selection.height,
-            s->capture.selection.left, s->capture.selection.top, reinit);
- 
-@@ -224,9 +225,17 @@ static int do_source_change(V4L2m2mContext * const s)
-             return AVERROR(EINVAL);
-         }
- 
-+        if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) ||
-+            s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) {
-+            av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n",
-+                   s->capture.width, s->capture.height,
-+                   ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format));
-+            return AVERROR(EINVAL);
-+        }
-+
-         // Update pixel format - should only actually do something on initial change
-         s->capture.av_pix_fmt =
--        ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
-+            ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
-         if (s->output_drm) {
-             avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
-             avctx->sw_pix_fmt = s->capture.av_pix_fmt;
-
-From 0ec92a29d4cd4baa7820f20918d78cf117200a3b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 25 Mar 2022 18:14:40 +0000
-Subject: [PATCH 044/186] v4l2dec: Improve size/format validation on init
-
----
- libavcodec/v4l2_m2m_dec.c      | 84 ++++++++++++++++++++++++++++++++--
- libavcodec/v4l2_request_hevc.c | 11 +++++
- 2 files changed, 92 insertions(+), 3 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 2e30449dfc1b..8dcadf461bb8 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -592,6 +592,76 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- }
- #endif
- 
-+static int
-+check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
-+{
-+    unsigned int i;
-+    const uint32_t fcc = ff_v4l2_get_format_pixelformat(&s->capture.format);
-+    const uint32_t w = avctx->coded_width;
-+    const uint32_t h = avctx->coded_height;
-+
-+    if (w == 0 || h == 0 || fcc == 0) {
-+        av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc));
-+        return 0;
-+    }
-+
-+    for (i = 0;; ++i) {
-+        struct v4l2_frmsizeenum fs = {
-+            .index = i,
-+            .pixel_format = fcc,
-+        };
-+
-+        while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) {
-+            const int err = AVERROR(errno);
-+            if (err == AVERROR(EINTR))
-+                continue;
-+            if (i == 0 && err == AVERROR(ENOTTY)) {
-+                av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n");
-+                return 0;
-+            }
-+            if (err != AVERROR(EINVAL)) {
-+                av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err));
-+                return err;
-+            }
-+            av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in frame size enums\n",
-+                   w, h, av_fourcc2str(fcc));
-+            return err;
-+        }
-+
-+        switch (fs.type) {
-+            case V4L2_FRMSIZE_TYPE_DISCRETE:
-+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i,
-+                       fs.discrete.width,fs.discrete.height);
-+                if (w == fs.discrete.width && h == fs.discrete.height)
-+                    return 0;
-+                break;
-+            case V4L2_FRMSIZE_TYPE_STEPWISE:
-+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
-+                       fs.stepwise.min_width, fs.stepwise.min_height,
-+                       fs.stepwise.max_width, fs.stepwise.max_height,
-+                       fs.stepwise.step_width,fs.stepwise.step_height);
-+                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
-+                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height &&
-+                    (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 &&
-+                    (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0)
-+                    return 0;
-+                break;
-+            case V4L2_FRMSIZE_TYPE_CONTINUOUS:
-+                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
-+                       fs.stepwise.min_width, fs.stepwise.min_height,
-+                       fs.stepwise.max_width, fs.stepwise.max_height,
-+                       fs.stepwise.step_width,fs.stepwise.step_height);
-+                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
-+                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height)
-+                    return 0;
-+                break;
-+            default:
-+                av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type);
-+                return AVERROR(EINVAL);
-+        }
-+    }
-+}
-+
- static int
- get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s)
- {
-@@ -698,8 +768,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- 
-     avctx->sw_pix_fmt = avctx->pix_fmt;
-     gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
--    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s); get_format requested=%d (%s)\n",
--           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
-+    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
-+           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
-+           avctx->coded_width, avctx->coded_height,
-+           gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
- 
-     if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
-         avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
-@@ -730,7 +802,13 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     if ((ret = v4l2_prepare_decoder(s)) < 0)
-         return ret;
- 
--    return get_quirks(avctx, s);
-+    if ((ret = get_quirks(avctx, s)) != 0)
-+        return ret;
-+
-+    if ((ret = check_size(avctx, s)) != 0)
-+        return ret;
-+
-+    return 0;
- }
- 
- static av_cold int v4l2_decode_close(AVCodecContext *avctx)
-diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-index b0a5930844a8..76ab0916cd6a 100644
---- a/libavcodec/v4l2_request_hevc.c
-+++ b/libavcodec/v4l2_request_hevc.c
-@@ -147,6 +147,17 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
- 
-     av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
- 
-+    // Give up immediately if this is something that we have no code to deal with
-+    if (h->ps.sps->chroma_format_idc != 1) {
-+        av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", h->ps.sps->chroma_format_idc);
-+        return AVERROR_PATCHWELCOME;
-+    }
-+    if (!(h->ps.sps->bit_depth == 10 || h->ps.sps->bit_depth == 8) ||
-+        h->ps.sps->bit_depth != h->ps.sps->bit_depth_chroma) {
-+        av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", h->ps.sps->bit_depth, h->ps.sps->bit_depth_chroma);
-+        return AVERROR_PATCHWELCOME;
-+    }
-+
-     if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) {
-         av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n");
-         return (AVERROR(-ret));
-
-From 5d3752246afe17f69f896bfdee6faa61162c948a Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 13 Apr 2022 16:05:56 +0000
-Subject: [PATCH 045/186] v4l2 stateless hevc: Add another API variation for
- linux 5.18
-
-This is probably going to be a short lived variation and may end up
-being reverted if no release using it ever ends up in the wild.
----
- libavcodec/Makefile            |   2 +-
- libavcodec/hevc-ctrls-v3.h     | 255 +++++++++++++++++++++++++++++++++
- libavcodec/v4l2_req_hevc_v3.c  |   3 +
- libavcodec/v4l2_req_hevc_vx.c  |  17 +++
- libavcodec/v4l2_req_media.c    |  15 +-
- libavcodec/v4l2_req_media.h    |   3 +
- libavcodec/v4l2_request_hevc.c |   6 +-
- libavcodec/v4l2_request_hevc.h |   1 +
- 8 files changed, 295 insertions(+), 7 deletions(-)
- create mode 100644 libavcodec/hevc-ctrls-v3.h
- create mode 100644 libavcodec/v4l2_req_hevc_v3.c
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index e1aa0ba014ed..2b3c16185d75 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -1000,7 +1000,7 @@ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
- OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
- OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec.o
- OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o\
--                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o
-+                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o
- OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
- OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o h265_profile_level.o
- OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
 diff --git a/libavcodec/hevc-ctrls-v3.h b/libavcodec/hevc-ctrls-v3.h
 new file mode 100644
 index 000000000000..4e35bd583d58
@@ -20101,3363 +1080,12 @@ index 000000000000..4e35bd583d58
 +#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP	(V4L2_CID_CODEC_HANTRO_BASE + 0)
 +
 +#endif
-diff --git a/libavcodec/v4l2_req_hevc_v3.c b/libavcodec/v4l2_req_hevc_v3.c
-new file mode 100644
-index 000000000000..dcc8d9563209
---- /dev/null
-+++ b/libavcodec/v4l2_req_hevc_v3.c
-@@ -0,0 +1,3 @@
-+#define HEVC_CTRLS_VERSION 3
-+#include "v4l2_req_hevc_vx.c"
-+
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-index 0ae03b10c4a8..611fa21cc319 100644
---- a/libavcodec/v4l2_req_hevc_vx.c
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -16,6 +16,8 @@
- 
- #elif HEVC_CTRLS_VERSION == 2
- #include "hevc-ctrls-v2.h"
-+#elif HEVC_CTRLS_VERSION == 3
-+#include "hevc-ctrls-v3.h"
- #else
- #error Unknown HEVC_CTRLS_VERSION
- #endif
-@@ -147,6 +149,7 @@ static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_t
-     }
- }
- 
-+#if HEVC_CTRLS_VERSION <= 2
- static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp)
- {
-     const HEVCFrame *frame;
-@@ -172,6 +175,7 @@ static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp)
- 
-     return 0;
- }
-+#endif
- 
- static unsigned int
- get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame,
-@@ -247,7 +251,12 @@ fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const
-             struct v4l2_hevc_dpb_entry * const entry = entries + n++;
- 
-             entry->timestamp = frame_capture_dpb(frame->frame);
-+#if HEVC_CTRLS_VERSION <= 2
-             entry->rps = find_frame_rps_type(h, entry->timestamp);
-+#else
-+            entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 :
-+                V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE;
-+#endif
-             entry->field_pic = frame->frame->interlaced_frame;
- 
-             /* TODO: Interleaved: Get the POC for each field. */
-@@ -1011,6 +1020,14 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-     };
-     const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc);
- 
-+#if HEVC_CTRLS_VERSION == 2
-+    if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0))
-+        return AVERROR(EINVAL);
-+#elif HEVC_CTRLS_VERSION == 3
-+    if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0))
-+        return AVERROR(EINVAL);
-+#endif
-+
-     if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) {
-         av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION);
-         return AVERROR(EINVAL);
-diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c
-index eb00ecb40623..980b306b8a72 100644
---- a/libavcodec/v4l2_req_media.c
-+++ b/libavcodec/v4l2_req_media.c
-@@ -604,6 +604,7 @@ struct mediabufs_ctl {
- 
-     struct v4l2_format src_fmt;
-     struct v4l2_format dst_fmt;
-+    struct v4l2_capability capability;
- };
- 
- static int qe_v4l2_queue(struct qent_base *const be,
-@@ -1498,20 +1499,24 @@ void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc)
-     mediabufs_ctl_delete(mbc);
- }
- 
-+unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc)
-+{
-+    return mbc->capability.version;
-+}
-+
- static int set_capabilities(struct mediabufs_ctl *const mbc)
- {
--    struct v4l2_capability capability = { 0 };
-     uint32_t caps;
- 
--    if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &capability)) {
-+    if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) {
-         int err = errno;
-         request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err));
-         return -err;
-     }
- 
--    caps = (capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ?
--            capability.device_caps :
--            capability.capabilities;
-+    caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ?
-+            mbc->capability.device_caps :
-+            mbc->capability.capabilities;
- 
-     if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) {
-         mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
-diff --git a/libavcodec/v4l2_req_media.h b/libavcodec/v4l2_req_media.h
-index 2f826cfb14e7..0307a831defd 100644
---- a/libavcodec/v4l2_req_media.h
-+++ b/libavcodec/v4l2_req_media.h
-@@ -142,6 +142,9 @@ MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw,
-                   struct dmabufs_ctl * const dbsc,
-                   unsigned int n);
- 
-+#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c))
-+unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc);
-+
- struct mediabufs_ctl * mediabufs_ctl_new(void * const dc,
-                      const char *vpath, struct pollqueue *const pq);
- void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc);
-diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-index 76ab0916cd6a..20e4e0ab1559 100644
---- a/libavcodec/v4l2_request_hevc.c
-+++ b/libavcodec/v4l2_request_hevc.c
-@@ -210,7 +210,11 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-         goto fail4;
-     }
- 
--    if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) {
-+    if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) {
-+        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n");
-+        ctx->fns = &V2(ff_v4l2_req_hevc, 3);
-+    }
-+    else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) {
-         av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n");
-         ctx->fns = &V2(ff_v4l2_req_hevc, 2);
-     }
-diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h
-index f14f594564d7..ed48d62e2d53 100644
---- a/libavcodec/v4l2_request_hevc.h
-+++ b/libavcodec/v4l2_request_hevc.h
-@@ -98,5 +98,6 @@ typedef struct v4l2_req_decode_fns {
- 
- extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
- extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
-+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3);
- 
- #endif
-
-From a9773d356f79d719d96e2c59434647c0cb1295fd Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 3 May 2022 12:44:42 +0000
-Subject: [PATCH 046/186] Remove V4l2 frame size check for meson-vdec
-
----
- libavcodec/v4l2_m2m.h     |  3 ++-
- libavcodec/v4l2_m2m_dec.c | 10 +++++++---
- 2 files changed, 9 insertions(+), 4 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 9a20447030e2..6bd5e8eda76a 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -116,7 +116,8 @@ typedef struct V4L2m2mContext {
-     /* Ext data sent */
-     int extdata_sent;
- 
--#define FF_V4L2_QUIRK_REINIT_ALWAYS     1
-+#define FF_V4L2_QUIRK_REINIT_ALWAYS             1
-+#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN    2
-     /* Quirks */
-     unsigned int quirks;
- 
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 8dcadf461bb8..888ba67fea8c 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -604,6 +604,10 @@ check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
-         av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc));
-         return 0;
-     }
-+    if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) {
-+        av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc));
-+        return 0;
-+    }
- 
-     for (i = 0;; ++i) {
-         struct v4l2_frmsizeenum fs = {
-@@ -623,8 +627,8 @@ check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
-                 av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err));
-                 return err;
-             }
--            av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in frame size enums\n",
--                   w, h, av_fourcc2str(fcc));
-+            av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n",
-+                   w, h, av_fourcc2str(fcc), i);
-             return err;
-         }
- 
-@@ -684,7 +688,7 @@ get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s)
-     // capture to clear the event even if the capture buffers were the right
-     // size in the first place.
-     if (strcmp(cap.driver, "meson-vdec") == 0)
--        s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS;
-+        s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN;
- 
-     av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks);
-     return 0;
-
-From 2e80c1992c272b3b23d47e7afae817f45b0b2a88 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 23 May 2022 18:05:20 +0100
-Subject: [PATCH 047/186] v4l2m2m_dec: Make some error rturns a bit more robust
-
----
- libavcodec/v4l2_context.c |  5 ++---
- libavcodec/v4l2_m2m_dec.c | 23 ++++++++++++++---------
- 2 files changed, 16 insertions(+), 12 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 007a58c8f1db..b3662aedaa9f 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -765,7 +765,7 @@ static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
- int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
- {
-     int type = ctx->type;
--    int ret;
-+    int ret = 0;
-     AVCodecContext * const avctx = logger(ctx);
- 
-     // Avoid doing anything if there is nothing we can do
-@@ -777,8 +777,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
-     if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type))
-         stuff_all_buffers(avctx, ctx);
- 
--    ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type);
--    if (ret < 0) {
-+    if (ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type) < 0) {
-         const int err = errno;
-         av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name,
-                cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err);
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 888ba67fea8c..88a341aae2c2 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -110,16 +110,21 @@ static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *co
-         return 0;
- 
-     ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
--    if (ret < 0)
--        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context\n");
--
--    ret = ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd);
--    if (ret < 0)
--        av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno);
--    else
--        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_DECODER_CMD start OK\n");
-+    if (ret != 0) {
-+        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret));
-+        return ret;
-+    }
- 
--    return ret;
-+    // STREAMON should do implicit START so this just for those that don't.
-+    // It is optional so don't worry if it fails
-+    if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) {
-+        ret = AVERROR(errno);
-+        av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret));
-+    }
-+    else {
-+        av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n");
-+    }
-+    return 0;
- }
- 
- static int v4l2_try_start(AVCodecContext *avctx)
-
-From 1c9856de210cdf151f53ce249cb4781722adfb3d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 24 May 2022 17:02:58 +0000
-Subject: [PATCH 048/186] v4l2m2m_dec: Support in-pkt AV_PKT_DATA_NEW_EXTRADATA
-
-Support packet side-data containing AV_PKT_DATA_NEW_EXTRADATA.  Should
-also detect and complain about unexpected streams of empty packets.
-
-This functionality untested as I haven't yet found anything that creates
-NEW_EXTRADATA side data.
----
- libavcodec/v4l2_m2m.c     |  1 +
- libavcodec/v4l2_m2m.h     |  3 +++
- libavcodec/v4l2_m2m_dec.c | 49 ++++++++++++++++++++++++++++++++++++---
- 3 files changed, 50 insertions(+), 3 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index e26bd74c3e9a..6dd01e2e0085 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -251,6 +251,7 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context)
-     av_frame_unref(s->frame);
-     av_frame_free(&s->frame);
-     av_packet_unref(&s->buf_pkt);
-+    av_freep(&s->extdata_data);
- 
-     av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n");
- 
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 6bd5e8eda76a..19d618698dd7 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -115,6 +115,9 @@ typedef struct V4L2m2mContext {
- 
-     /* Ext data sent */
-     int extdata_sent;
-+    /* Ext data sent in packet - overrides ctx */
-+    uint8_t * extdata_data;
-+    size_t extdata_size;
- 
- #define FF_V4L2_QUIRK_REINIT_ALWAYS             1
- #define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN    2
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 88a341aae2c2..392a68f0c7d2 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -343,7 +343,46 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-     // We will already have a coded pkt if the output Q was full last time we
-     // tried to Q it
-     if (!s->buf_pkt.size && !do_not_get) {
--        ret = ff_decode_get_packet(avctx, &s->buf_pkt);
-+        unsigned int i;
-+
-+        for (i = 0; i < 256; ++i) {
-+            uint8_t * side_data;
-+            size_t side_size;
-+
-+            ret = ff_decode_get_packet(avctx, &s->buf_pkt);
-+            if (ret != 0)
-+                break;
-+
-+            // New extradata is the only side-data we undertand
-+            side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
-+            if (side_data) {
-+                av_log(avctx, AV_LOG_DEBUG, "New extradata\n");
-+                av_freep(&s->extdata_data);
-+                if ((s->extdata_data = av_malloc(side_size ? side_size : 1)) == NULL) {
-+                    av_log(avctx, AV_LOG_ERROR, "Failed to alloc %zd bytes of extra data\n", side_size);
-+                    return AVERROR(ENOMEM);
-+                }
-+                memcpy(s->extdata_data, side_data, side_size);
-+                s->extdata_size = side_size;
-+                s->extdata_sent = 0;
-+            }
-+
-+            if (s->buf_pkt.size != 0)
-+                break;
-+
-+            if (s->buf_pkt.side_data_elems == 0) {
-+                av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n");
-+                ret = AVERROR_EOF;
-+                break;
-+            }
-+
-+            // Retry a side-data only pkt
-+        }
-+        // If i >= 256 something has gone wrong
-+        if (i >= 256) {
-+            av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n");
-+            return AVERROR(EIO);
-+        }
- 
-         if (ret == AVERROR(EAGAIN)) {
-             if (!stream_started(s)) {
-@@ -398,8 +437,12 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-     if ((ret = check_output_streamon(avctx, s)) != 0)
-         return ret;
- 
--    ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt,
--                                         avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size);
-+    if (s->extdata_sent)
-+        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
-+    else if (s->extdata_data)
-+        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
-+    else
-+        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, avctx->extradata, avctx->extradata_size);
- 
-     if (ret == AVERROR(EAGAIN)) {
-         // Out of input buffers - keep packet
-
-From f128df5af073fb4cece2393c1fd0a19defb5675e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 24 May 2022 20:02:48 +0000
-Subject: [PATCH 049/186] v4l2m2m_dec: Catch repeated Q fulls
-
----
- libavcodec/v4l2_m2m_dec.c | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 392a68f0c7d2..7e170447064a 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -504,13 +504,14 @@ static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
- static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- {
-     V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
--    int src_rv;
-+    int src_rv = NQ_OK;
-     int dst_rv = 1;  // Non-zero (done), non-negative (error) number
-     unsigned int i = 0;
- 
-     do {
-         const int pending = xlat_pending(&s->xlat);
-         const int prefer_dq = (pending > s->pending_hw / 16);
-+        const int last_src_rv = src_rv;
- 
-         // Enqueue another pkt for decode if
-         // (a) We don't have a lot of stuff in the buffer already OR
-@@ -526,6 +527,11 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-         if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
-             break;
- 
-+        if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) {
-+            av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n");
-+            break;
-+        }
-+
-         // Try to get a new frame if
-         // (a) we haven't already got one AND
-         // (b) enqueue returned a status indicating that decode should be attempted
-
-From 3997265fa559c38feaf4458625e3ec0e353e505b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 25 May 2022 15:22:12 +0000
-Subject: [PATCH 050/186] Remove requirement for epoxy & libudev config options
-
----
- configure              | 26 +++++++++++++++++---------
- pi-util/conf_native.sh |  2 --
- 2 files changed, 17 insertions(+), 11 deletions(-)
-
-diff --git a/configure b/configure
-index a4ffd8797690..f3991452e4a5 100755
---- a/configure
-+++ b/configure
-@@ -205,6 +205,7 @@ External library support:
-   --disable-bzlib          disable bzlib [autodetect]
-   --disable-coreimage      disable Apple CoreImage framework [autodetect]
-   --enable-chromaprint     enable audio fingerprinting with chromaprint [no]
-+  --disable-epoxy          disable epoxy [autodetect]
-   --enable-frei0r          enable frei0r video filtering [no]
-   --enable-gcrypt          enable gcrypt, needed for rtmp(t)e support
-                            if openssl, librtmp or gmp is not used [no]
-@@ -281,7 +282,7 @@ External library support:
-                            if openssl, gnutls or mbedtls is not used [no]
-   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
-   --enable-libuavs3d       enable AVS3 decoding via libuavs3d [no]
--  --enable-libudev         enable libudev [no]
-+  --disable-libudev        disable libudev [autodetect]
-   --enable-libv4l2         enable libv4l2/v4l-utils [no]
-   --enable-libvidstab      enable video stabilization using vid.stab [no]
-   --enable-libvmaf         enable vmaf filter via libvmaf [no]
-@@ -1747,7 +1748,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST="
-     avfoundation
-     bzlib
-     coreimage
-+    epoxy
-     iconv
-+    libudev
-     libxcb
-     libxcb_shm
-     libxcb_shape
-@@ -1819,7 +1822,6 @@ EXTERNAL_LIBRARY_LIST="
-     libdav1d
-     libdc1394
-     libdrm
--    epoxy
-     libflite
-     libfontconfig
-     libfreetype
-@@ -1863,7 +1865,6 @@ EXTERNAL_LIBRARY_LIST="
-     libtheora
-     libtwolame
-     libuavs3d
--    libudev
-     libv4l2
-     libvmaf
-     libvorbis
-@@ -3567,9 +3568,8 @@ v4l2_indev_suggest="libv4l2"
- v4l2_outdev_deps="libdrm"
- v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
- v4l2_outdev_suggest="libv4l2"
--vout_drm_outdev_deps="libdrm vout_drm"
--vout_egl_outdev_deps="xlib"
--vout_egl_outdev_select="epoxy"
-+vout_drm_outdev_deps="libdrm"
-+vout_egl_outdev_deps="xlib epoxy"
- vfwcap_indev_deps="vfw32 vfwcap_defines"
- xcbgrab_indev_deps="libxcb"
- xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
-@@ -6355,6 +6355,12 @@ if enabled xlib; then
-         disable xlib
- fi
- 
-+enabled libudev &&
-+    check_pkg_config libudev libudev libudev.h udev_new
-+
-+enabled epoxy &&
-+    check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version
-+
- check_headers direct.h
- check_headers dirent.h
- check_headers dxgidebug.h
-@@ -6601,7 +6607,6 @@ enabled libdav1d          && require_pkg_config libdav1d "dav1d >= 0.5.0" "dav1d
- enabled libdavs2          && require_pkg_config libdavs2 "davs2 >= 1.6.0" davs2.h davs2_decoder_open
- enabled libdc1394         && require_pkg_config libdc1394 libdc1394-2 dc1394/dc1394.h dc1394_new
- enabled libdrm            && require_pkg_config libdrm libdrm xf86drm.h drmGetVersion
--enabled epoxy             && require_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version
- enabled libfdk_aac        && { check_pkg_config libfdk_aac fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
-                                { require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&
-                                  warn "using libfdk without pkg-config"; } }
-@@ -6713,7 +6718,6 @@ enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame
-                              { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
-                                die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
- enabled libuavs3d         && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uavs3d.h uavs3d_decode
--enabled libudev           && require_pkg_config libudev libudev libudev.h udev_new
- enabled libv4l2           && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
- enabled libvidstab        && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
- enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init
-@@ -6819,9 +6823,13 @@ enabled rkmpp             && { require_pkg_config rkmpp rockchip_mpp  rockchip/r
- enabled v4l2_request      && { enabled libdrm ||
-                                die "ERROR: v4l2-request requires --enable-libdrm"; } &&
-                              { enabled libudev ||
--                               die "ERROR: v4l2-request requires --enable-libudev"; }
-+                               die "ERROR: v4l2-request requires libudev"; }
- enabled vapoursynth       && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init
- 
-+enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; }
-+
-+enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } &&
-+                    { enabled xlib  || die "ERROR: vout_egl requires xlib"; }
- 
- if enabled gcrypt; then
-     GCRYPT_CONFIG="${cross_prefix}libgcrypt-config"
-diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
-index 65576846e81f..37cea71756ae 100755
---- a/pi-util/conf_native.sh
-+++ b/pi-util/conf_native.sh
-@@ -91,8 +91,6 @@ $FFSRC/configure \
-  --disable-thumb\
-  --enable-v4l2-request\
-  --enable-libdrm\
-- --enable-epoxy\
-- --enable-libudev\
-  --enable-vout-egl\
-  --enable-vout-drm\
-  $SHARED_LIBS\
-
-From bcc9de26a54ab85a5f225706f6de36c885d7cb4c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 27 May 2022 09:36:51 +0000
-Subject: [PATCH 051/186] hevc: If hwaccel avoid creation of s/w only vars
-
----
- libavcodec/hevc_refs.c | 35 +++++++++++++++++++++--------------
- libavcodec/hevcdec.c   | 42 +++++++++++++++++++++++++++++-------------
- 2 files changed, 50 insertions(+), 27 deletions(-)
-
-diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
-index 811e8feff8a1..f7cf14eabccc 100644
---- a/libavcodec/hevc_refs.c
-+++ b/libavcodec/hevc_refs.c
-@@ -98,18 +98,22 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
-         if (!frame->rpl_buf)
-             goto fail;
- 
--        frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
--        if (!frame->tab_mvf_buf)
--            goto fail;
--        frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
-+        if (s->tab_mvf_pool) {
-+            frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
-+            if (!frame->tab_mvf_buf)
-+                goto fail;
-+            frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
-+        }
- 
--        frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
--        if (!frame->rpl_tab_buf)
--            goto fail;
--        frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
--        frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
--        for (j = 0; j < frame->ctb_count; j++)
--            frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
-+        if (s->rpl_tab_pool) {
-+            frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
-+            if (!frame->rpl_tab_buf)
-+                goto fail;
-+            frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
-+            frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
-+            for (j = 0; j < frame->ctb_count; j++)
-+                frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
-+        }
- 
-         frame->frame->top_field_first  = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
-         frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
-@@ -297,14 +301,17 @@ static int init_slice_rpl(HEVCContext *s)
-     int ctb_count    = frame->ctb_count;
-     int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
-     int i;
-+    RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
- 
-     if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab))
-         return AVERROR_INVALIDDATA;
- 
--    for (i = ctb_addr_ts; i < ctb_count; i++)
--        frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
-+    if (frame->rpl_tab) {
-+        for (i = ctb_addr_ts; i < ctb_count; i++)
-+            frame->rpl_tab[i] = tab;
-+    }
- 
--    frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
-+    frame->refPicList = tab->refPicList;
- 
-     return 0;
- }
-diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
-index e892436f9405..a2c29a611c5a 100644
---- a/libavcodec/hevcdec.c
-+++ b/libavcodec/hevcdec.c
-@@ -536,6 +536,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps,
-     if (!sps)
-         return 0;
- 
-+    // If hwaccel then we don't need all the s/w decode helper arrays
-+    if (s->avctx->hwaccel) {
-+        export_stream_params(s, sps);
-+
-+        s->avctx->pix_fmt = pix_fmt;
-+        s->ps.sps = sps;
-+        s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
-+        return 0;
-+    }
-+
-     ret = pic_arrays_init(s, sps);
-     if (ret < 0)
-         goto fail;
-@@ -2893,11 +2903,13 @@ static int hevc_frame_start(HEVCContext *s)
-                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
-     int ret;
- 
--    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
--    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
--    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
--    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
--    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
-+    if (s->horizontal_bs) {
-+        memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
-+        memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
-+        memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
-+        memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
-+        memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
-+    }
- 
-     s->is_decoded        = 0;
-     s->first_nal_type    = s->nal_unit_type;
-@@ -3441,15 +3453,19 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
-         dst->needs_fg = 1;
-     }
- 
--    dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
--    if (!dst->tab_mvf_buf)
--        goto fail;
--    dst->tab_mvf = src->tab_mvf;
-+    if (src->tab_mvf_buf) {
-+        dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
-+        if (!dst->tab_mvf_buf)
-+            goto fail;
-+        dst->tab_mvf = src->tab_mvf;
-+    }
- 
--    dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
--    if (!dst->rpl_tab_buf)
--        goto fail;
--    dst->rpl_tab = src->rpl_tab;
-+    if (src->rpl_tab_buf) {
-+        dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
-+        if (!dst->rpl_tab_buf)
-+            goto fail;
-+        dst->rpl_tab = src->rpl_tab;
-+    }
- 
-     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
-     if (!dst->rpl_buf)
-
-From af130585ebdfcda7ee01819b6869aa6eb6a0172d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 30 May 2022 17:51:44 +0100
-Subject: [PATCH 052/186] rpi_sand: Add SAND30->NV12 conversion
-
-C code only. Reworks the hwcontext_drm conversion to use the
-rpi_sand_fns generic frame convert fn rather than calling the
-individual conversion functions directly. This keeps all teh stride and
-size logic in a single place.
----
- libavutil/hwcontext_drm.c | 46 ++++++++------------
- libavutil/rpi_sand_fns.c  | 89 +++++++++++++++++++++++++++++++++++++++
- libavutil/rpi_sand_fns.h  |  5 +++
- 3 files changed, 111 insertions(+), 29 deletions(-)
-
-diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c
-index baf18920fa14..137a952d2c3c 100644
---- a/libavutil/hwcontext_drm.c
-+++ b/libavutil/hwcontext_drm.c
-@@ -234,14 +234,14 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx,
-                                     enum AVHWFrameTransferDirection dir,
-                                     enum AVPixelFormat **formats)
- {
--    enum AVPixelFormat *pix_fmts;
-+    enum AVPixelFormat *p;
- 
--    pix_fmts = av_malloc_array(2, sizeof(*pix_fmts));
--    if (!pix_fmts)
-+    p = *formats = av_malloc_array(3, sizeof(*p));
-+    if (!p)
-         return AVERROR(ENOMEM);
- 
-     // **** Offer native sand too ????
--    pix_fmts[0] =
-+    *p++ =
- #if CONFIG_SAND
-         ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
-             AV_PIX_FMT_YUV420P :
-@@ -249,9 +249,14 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx,
-             AV_PIX_FMT_YUV420P10LE :
- #endif
-             ctx->sw_format;
--    pix_fmts[1] = AV_PIX_FMT_NONE;
- 
--    *formats = pix_fmts;
-+#if CONFIG_SAND
-+    if (ctx->sw_format == AV_PIX_FMT_RPI4_10 ||
-+        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128)
-+        *p++ = AV_PIX_FMT_NV12;
-+#endif
-+
-+    *p = AV_PIX_FMT_NONE;
-     return 0;
- }
- 
-@@ -294,29 +299,12 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc,
-         const unsigned int w = FFMIN(dst->width, map->width);
-         const unsigned int h = FFMIN(dst->height, map->height);
- 
--        if (map->format == AV_PIX_FMT_RPI4_8 && dst->format == AV_PIX_FMT_YUV420P) {
--            av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
--                                     map->data[0],
--                                     128, stride2,
--                                     0, 0, w, h);
--            av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
--                                     dst->data[2], dst->linesize[2],
--                                     map->data[1],
--                                     128, stride2,
--                                     0, 0, w / 2, h / 2);
--        }
--        else if (map->format == AV_PIX_FMT_RPI4_10 && dst->format == AV_PIX_FMT_YUV420P10LE) {
--            av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
--                                     map->data[0],
--                                     128, stride2,
--                                     0, 0, w, h);
--            av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
--                                     dst->data[2], dst->linesize[2],
--                                     map->data[1],
--                                     128, stride2,
--                                     0, 0, w / 2, h / 2);
--        }
--        else
-+        map->crop_top = 0;
-+        map->crop_bottom = 0;
-+        map->crop_left = 0;
-+        map->crop_right = 0;
-+
-+        if (av_rpi_sand_to_planar_frame(dst, map) != 0)
-         {
-             av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
-             err = AVERROR(EINVAL);
-diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
-index 1f543e935701..256c3d532f38 100644
---- a/libavutil/rpi_sand_fns.c
-+++ b/libavutil/rpi_sand_fns.c
-@@ -229,6 +229,75 @@ void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_
-     }
- }
- 
-+// Fetches a single patch - offscreen fixup not done here
-+// w <= stride1
-+// single lose bottom 2 bits truncation
-+// _x & _w in pixels, strides in bytes
-+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h)
-+{
-+    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
-+    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
-+    const unsigned int x1 = ((_x + _w) / 3) * 4;
-+    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
-+    const unsigned int mask = stride1 - 1;
-+    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
-+    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
-+
-+#if HAVE_SAND_ASM && 0
-+    if (_x == 0) {
-+        ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
-+        return;
-+    }
-+#endif
-+
-+    if (x0 == x1) {
-+        // *******************
-+        // Partial single word xfer
-+        return;
-+    }
-+
-+    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
-+    {
-+        unsigned int x = x0;
-+        const uint32_t * p = (const uint32_t *)p0;
-+        uint8_t * d = dst;
-+
-+        if (xskip0 != 0) {
-+            const uint32_t p3 = *p++;
-+
-+            if (xskip0 == 1)
-+                *d++ = (p3 >> 12) & 0xff;
-+            *d++ = (p3 >> 22) & 0xff;
-+
-+            if (((x += 4) & mask) == 0)
-+                p += slice_inc;
-+        }
-+
-+        while (x != x1) {
-+            const uint32_t p3 = *p++;
-+            *d++ = (p3 >> 2) & 0xff;
-+            *d++ = (p3 >> 12) & 0xff;
-+            *d++ = (p3 >> 22) & 0xff;
-+
-+            if (((x += 4) & mask) == 0)
-+                p += slice_inc;
-+        }
-+
-+        if (xrem1 != 0) {
-+            const uint32_t p3 = *p;
-+
-+            *d++ = (p3 >> 2) & 0xff;
-+            if (xrem1 == 2)
-+                *d++ = (p3 >> 12) & 0xff;
-+        }
-+    }
-+}
-+
-+
- 
- // w/h in pixels
- void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
-@@ -310,6 +379,16 @@ int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
-                                              av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-                                              x/2, y/2,  w/2, h/2);
-                     break;
-+                case AV_PIX_FMT_NV12:
-+                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
-+                                             src->data[0],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x, y, w, h);
-+                    av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1],
-+                                             src->data[1],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x/2, y/2, w, h/2);
-+                    break;
-                 default:
-                     return -1;
-             }
-@@ -344,6 +423,16 @@ int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
-                                              av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-                                              x/2, y/2, w/2, h/2);
-                     break;
-+                case AV_PIX_FMT_NV12:
-+                    av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0],
-+                                             src->data[0],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x, y, w, h);
-+                    av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1],
-+                                             src->data[1],
-+                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
-+                                             x/2, y/2, w, h/2);
-+                    break;
-                 default:
-                     return -1;
-             }
-diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h
-index 634b55e800dc..462ccb8abd18 100644
---- a/libavutil/rpi_sand_fns.h
-+++ b/libavutil/rpi_sand_fns.h
-@@ -85,6 +85,11 @@ void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_
-                              unsigned int _x, unsigned int y,
-                              unsigned int _w, unsigned int h);
- 
-+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
-+                             const uint8_t * src,
-+                             unsigned int stride1, unsigned int stride2,
-+                             unsigned int _x, unsigned int y,
-+                             unsigned int _w, unsigned int h);
- 
- // w/h in pixels
- void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
-
-From 474c3010278bb385614f536968681bd5043e81ae Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 1 Jun 2022 17:49:26 +0000
-Subject: [PATCH 053/186] rpi_sand: Add SAND30->NV12 asm for Armv7 & Armv8
-
-Also reworks the previous Armv8 SAND30->Y16 function in a slightly more
-efficient way that makes it look more like the Armv7 version.
----
- libavutil/aarch64/rpi_sand_neon.S | 549 ++++++++++++++++++------------
- libavutil/aarch64/rpi_sand_neon.h |   4 +
- libavutil/arm/rpi_sand_neon.S     | 239 ++++++++++---
- libavutil/arm/rpi_sand_neon.h     |  11 +
- libavutil/rpi_sand_fns.c          |   2 +-
- 5 files changed, 541 insertions(+), 264 deletions(-)
-
-diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
-index cdcf71ee6740..2f07d9674c9f 100644
---- a/libavutil/aarch64/rpi_sand_neon.S
-+++ b/libavutil/aarch64/rpi_sand_neon.S
-@@ -248,228 +248,6 @@ incomplete_block_loop_end_c8:
-     ret
- endfunc
- 
--//void ff_rpi_sand30_lines_to_planar_y16(
--//  uint8_t * dest,             // [x0]
--//  unsigned int dst_stride,    // [w1] -> assumed to be equal to _w
--//  const uint8_t * src,        // [x2]
--//  unsigned int src_stride1,   // [w3] -> 128
--//  unsigned int src_stride2,   // [w4]
--//  unsigned int _x,            // [w5]
--//  unsigned int y,             // [w6]
--//  unsigned int _w,            // [w7]
--//  unsigned int h);            // [sp, #0]
--
--function ff_rpi_sand30_lines_to_planar_y16, export=1
--    stp x19, x20, [sp, #-48]!
--    stp x21, x22, [sp, #16]
--    stp x23, x24, [sp, #32]
--
--    // w6 = argument h
--    ldr w6, [sp, #48]
--
--    // slice_inc = ((stride2 - 1) * stride1)
--    mov w5, w4
--    sub w5, w5, #1
--    lsl w5, w5, #7
--
--    // total number of bytes per row = (width / 3) * 4
--    mov w8, w7
--    mov w9, #3
--    udiv w8, w8, w9
--    lsl w8, w8, #2
--
--    // number of full 128 byte blocks to be processed
--    mov w9, #96
--    udiv w9, w7, w9 // = (width * 4) / (3*128) = width/96
--
--    // w10 = number of full integers to process (4 bytes)
--    // w11 = remaning zero to two 10bit values still to copy over
--    mov w12, #96
--    mul w12, w9, w12
--    sub w12, w7, w12  // width - blocks*96 = remaining points per row
--    mov w11, #3
--    udiv w10, w12, w11 // full integers to process = w12 / 3 
--    mul w11, w10, w11  // #integers *3
--    sub w11, w12, w11  // remaining 0-2 points = remaining points - integers*3
--
--    // increase w9 by one if w10+w11 is not zero, and decrease the row count by one
--    // this is to efficiently copy incomplete blocks at the end of the rows
--    // the last row is handled explicitly to avoid writing out of bounds
--    add w22, w10, w11
--    cmp w22, #0
--    cset w22, ne // 1 iff w10+w11 not zero, 0 otherwise
--    add w9, w9, w22
--    sub w6, w6, #1
--
--    // store the number of bytes in w20 which we copy too much for every row
--    // when the width of the frame is not a multiple of 96 (128bytes storing 96 10bit values)
--    mov w20, #96*2
--    mul w20, w20, w9
--    sub w20, w1, w20
--
--    mov w23, #0 // flag to check whether the last line had already been processed
--    
--    // bitmask to clear the uppper 6bits of the result values
--    mov x19, #0x03ff03ff03ff03ff
--    dup v22.2d, x19
--
--    // row counter = 0
--    eor w12, w12, w12
--row_loop_y16:
--    cmp w12, w6               // jump to row_loop_y16_fin if we processed all rows
--    bge row_loop_y16_fin
--
--    mov x13, x2               // row src
--    eor w14, w14, w14         // full block counter
--block_loop_y16:
--    cmp w14, w9
--    bge block_loop_y16_fin
--
--    // load 64 bytes
--    ld1 { v0.4s,  v1.4s, v2.4s, v3.4s }, [x13], #64
--   
--    // process v0 and v1
--    xtn v16.4h, v0.4s
--    ushr v0.4s, v0.4s, #10
--    xtn v17.4h, v0.4s
--    ushr v0.4s, v0.4s, #10
--    xtn v18.4h, v0.4s
--   
--    xtn2 v16.8h, v1.4s
--    and v16.16b, v16.16b, v22.16b
--    ushr v1.4s, v1.4s, #10
--    xtn2 v17.8h, v1.4s
--    and v17.16b, v17.16b, v22.16b
--    ushr v1.4s, v1.4s, #10
--    xtn2 v18.8h, v1.4s
--    and v18.16b, v18.16b, v22.16b
--
--    st3 { v16.8h, v17.8h, v18.8h }, [x0], #48
--
--    // process v2 and v3
--    xtn v23.4h, v2.4s
--    ushr v2.4s, v2.4s, #10
--    xtn v24.4h, v2.4s
--    ushr v2.4s, v2.4s, #10
--    xtn v25.4h, v2.4s
--    
--    xtn2 v23.8h, v3.4s
--    and v23.16b, v23.16b, v22.16b
--    ushr v3.4s, v3.4s, #10
--    xtn2 v24.8h, v3.4s
--    and v24.16b, v24.16b, v22.16b
--    ushr v3.4s, v3.4s, #10
--    xtn2 v25.8h, v3.4s
--    and v25.16b, v25.16b, v22.16b
--
--    st3 { v23.8h, v24.8h, v25.8h }, [x0], #48
--
--    // load the second half of the block -> 64 bytes into registers v4-v7
--    ld1 { v4.4s,  v5.4s,  v6.4s,  v7.4s }, [x13], #64
--    
--    // process v4 and v5
--    xtn v16.4h, v4.4s
--    ushr v4.4s, v4.4s, #10
--    xtn v17.4h, v4.4s
--    ushr v4.4s, v4.4s, #10
--    xtn v18.4h, v4.4s
--   
--    xtn2 v16.8h, v5.4s 
--    and v16.16b, v16.16b, v22.16b
--    ushr v5.4s, v5.4s, #10
--    xtn2 v17.8h, v5.4s
--    and v17.16b, v17.16b, v22.16b
--    ushr v5.4s, v5.4s, #10
--    xtn2 v18.8h, v5.4s
--    and v18.16b, v18.16b, v22.16b
--
--    st3 { v16.8h, v17.8h, v18.8h }, [x0], #48
--
--    // v6 and v7
--    xtn v23.4h, v6.4s
--    ushr v6.4s, v6.4s, #10
--    xtn v24.4h, v6.4s
--    ushr v6.4s, v6.4s, #10
--    xtn v25.4h, v6.4s
--   
--    xtn2 v23.8h, v7.4s 
--    and v23.16b, v23.16b, v22.16b
--    ushr v7.4s, v7.4s, #10
--    xtn2 v24.8h, v7.4s
--    and v24.16b, v24.16b, v22.16b
--    ushr v7.4s, v7.4s, #10
--    xtn2 v25.8h, v7.4s
--    and v25.16b, v25.16b, v22.16b
--
--    st3 { v23.8h, v24.8h, v25.8h }, [x0], #48
-- 
--    add x13, x13, x5          // row src += slice_inc
--    add w14, w14, #1
--    b block_loop_y16
--block_loop_y16_fin:
--
--    
--
--
--    add x2, x2, #128          // src += stride1 (start of the next row)
--    add x0, x0, w20, sxtw     // subtract the bytes we copied too much from dst
--    add w12, w12, #1
--    b row_loop_y16
--row_loop_y16_fin:
--
--    // check whether we have incomplete blocks at the end of every row
--    // in that case decrease row block count by one
--    // change height back to it's original value (meaning increase it by 1)
--    // and jump back to another iteration of row_loop_y16
--
--    cmp w23, #1
--    beq row_loop_y16_fin2 // don't continue here if we already processed the last row
--    add w6, w6, #1    // increase height to the original value
--    sub w9, w9, w22   // block count - 1 or 0, depending on the remaining bytes count
--    mov w23, #1
--    b row_loop_y16
--row_loop_y16_fin2:
--
--    sub x0, x0, w20, sxtw // with the last row we didn't actually move the dst ptr to far ahead, therefore readd the diference
--
--    // now we've got to handle the last block in the last row
--    eor w12, w12, w12 // w12 = 0 = counter
--integer_loop_y16:
--    cmp w12, w10
--    bge integer_loop_y16_fin
--    ldr w14, [x13], #4
--    and w15, w14, #0x3ff
--    strh w15, [x0], #2
--    lsr w14, w14, #10
--    and w15, w14, #0x3ff
--    strh w15, [x0], #2
--    lsr w14, w14, #10
--    and w15, w14, #0x3ff
--    strh w15, [x0], #2
--    add w12, w12, #1
--    b integer_loop_y16
--integer_loop_y16_fin:
--
--final_values_y16:
--    // remaining point count = w11
--    ldr w14, [x13], #4
--    cmp w11, #0
--    beq final_values_y16_fin
--    and w15, w14, #0x3ff
--    strh w15, [x0], #2
--    cmp w11, #1
--    beq final_values_y16_fin
--    lsr w14, w14, #10
--    and w15, w14, #0x3ff
--    strh w15, [x0], #2
--final_values_y16_fin:
--
--    ldp x23, x24, [sp, #32]
--    ldp x21, x22, [sp, #16]
--    ldp x19, x20, [sp], #48
--    ret
--endfunc
--
- //void ff_rpi_sand30_lines_to_planar_c16(
- //  uint8_t * dst_u,            // [x0]
- //  unsigned int dst_stride_u,  // [w1] == _w*2
-@@ -674,3 +452,330 @@ endfunc
- //  unsigned int _w,
- //  unsigned int h);
- 
-+// void ff_rpi_sand30_lines_to_planar_y8(
-+//   uint8_t * dest,            : x0
-+//   unsigned int dst_stride,   : w1
-+//   const uint8_t * src,       : x2
-+//   unsigned int src_stride1,  : w3, always 128
-+//   unsigned int src_stride2,  : w4
-+//   unsigned int _x,           : w5
-+//   unsigned int y,            : w6
-+//   unsigned int _w,           : w7
-+//   unsigned int h);           : [sp, #0]
-+//
-+// Assumes that we are starting on a stripe boundary and that overreading
-+// within the stripe is OK. However it does respect the dest size for wri
-+
-+function ff_rpi_sand30_lines_to_planar_y16, export=1
-+                lsl             w4,  w4,  #7
-+                sub             w4,  w4,  #64
-+                sub             w1,  w1,  w7, lsl #1
-+                uxtw            x6,  w6
-+                add             x8,  x2,  x6, lsl #7
-+                ldr             w6,  [sp, #0]
-+
-+10:
-+                mov             x2,  x8
-+                mov             w5,  w7
-+1:
-+                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
-+                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
-+
-+                subs            w5,  w5,  #96
-+
-+                // v0, v1
-+
-+                shrn            v18.4h,  v0.4s,   #14
-+                xtn             v16.4h,  v0.4s
-+                shrn            v17.4h,  v0.4s,   #10
-+
-+                shrn2           v18.8h,  v1.4s,   #14
-+                xtn2            v16.8h,  v1.4s
-+                shrn2           v17.8h,  v1.4s,   #10
-+
-+                ushr            v18.8h,  v18.8h,  #6
-+                bic             v16.8h,  #0xfc,   lsl #8
-+                bic             v17.8h,  #0xfc,   lsl #8
-+
-+                // v2, v3
-+
-+                shrn            v21.4h,  v2.4s,   #14
-+                xtn             v19.4h,  v2.4s
-+                shrn            v20.4h,  v2.4s,   #10
-+
-+                shrn2           v21.8h,  v3.4s,   #14
-+                xtn2            v19.8h,  v3.4s
-+                shrn2           v20.8h,  v3.4s,   #10
-+
-+                ushr            v21.8h,  v21.8h,  #6
-+                bic             v19.8h,  #0xfc,   lsl #8
-+                bic             v20.8h,  #0xfc,   lsl #8
-+
-+                // v4, v5
-+
-+                shrn            v24.4h,  v4.4s,   #14
-+                xtn             v22.4h,  v4.4s
-+                shrn            v23.4h,  v4.4s,   #10
-+
-+                shrn2           v24.8h,  v5.4s,   #14
-+                xtn2            v22.8h,  v5.4s
-+                shrn2           v23.8h,  v5.4s,   #10
-+
-+                ushr            v24.8h,  v24.8h,  #6
-+                bic             v22.8h,  #0xfc,   lsl #8
-+                bic             v23.8h,  #0xfc,   lsl #8
-+
-+                // v6, v7
-+
-+                shrn            v27.4h,  v6.4s,   #14
-+                xtn             v25.4h,  v6.4s
-+                shrn            v26.4h,  v6.4s,   #10
-+
-+                shrn2           v27.8h,  v7.4s,   #14
-+                xtn2            v25.8h,  v7.4s
-+                shrn2           v26.8h,  v7.4s,   #10
-+
-+                ushr            v27.8h,  v27.8h,  #6
-+                bic             v25.8h,  #0xfc,   lsl #8
-+                bic             v26.8h,  #0xfc,   lsl #8
-+
-+                blt             2f
-+
-+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
-+                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
-+                st3             {v22.8h, v23.8h, v24.8h}, [x0], #48
-+                st3             {v25.8h, v26.8h, v27.8h}, [x0], #48
-+
-+                bne             1b
-+
-+11:
-+                subs            w6,  w6,  #1
-+                add             x0,  x0,  w1,  uxtw
-+                add             x8,  x8,  #128
-+                bne             10b
-+
-+                ret
-+
-+// Partial final write
-+2:
-+                cmp             w5,  #48-96
-+                blt             1f
-+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
-+                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
-+                beq             11b
-+                mov             v16.16b, v22.16b
-+                mov             v17.16b, v23.16b
-+                sub             w5,  w5,  #48
-+                mov             v18.16b, v24.16b
-+                mov             v19.16b, v25.16b
-+                mov             v20.16b, v26.16b
-+                mov             v21.16b, v27.16b
-+1:
-+                cmp             w5,  #24-96
-+                blt             1f
-+                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
-+                beq             11b
-+                mov             v16.16b, v19.16b
-+                mov             v17.16b, v20.16b
-+                sub             w5,  w5,  #24
-+                mov             v18.16b, v21.16b
-+1:
-+                cmp             w5,  #12-96
-+                blt             1f
-+                st3             {v16.4h, v17.4h, v18.4h}, [x0], #24
-+                beq             11b
-+                mov             v16.2d[0], v16.2d[1]
-+                sub             w5,  w5,  #12
-+                mov             v17.2d[0], v17.2d[1]
-+                mov             v18.2d[0], v18.2d[1]
-+1:
-+                cmp             w5,  #6-96
-+                blt             1f
-+                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
-+                st3             {v16.h, v17.h, v18.h}[1], [x0], #6
-+                beq             11b
-+                mov             v16.2s[0], v16.2s[1]
-+                sub             w5,  w5,  #6
-+                mov             v17.2s[0], v17.2s[1]
-+                mov             v18.2s[0], v18.2s[1]
-+1:
-+                cmp             w5,  #3-96
-+                blt             1f
-+                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
-+                beq             11b
-+                mov             v16.4h[0], v16.4h[1]
-+                sub             w5,  w5,  #3
-+                mov             v17.4h[0], v17.4h[1]
-+1:
-+                cmp             w5,  #2-96
-+                blt             1f
-+                st2             {v16.h, v17.h}[0], [x0], #4
-+                b               11b
-+1:
-+                st1             {v16.h}[0], [x0], #2
-+                b               11b
-+
-+endfunc
-+
-+// void ff_rpi_sand30_lines_to_planar_y8(
-+//   uint8_t * dest,            : x0
-+//   unsigned int dst_stride,   : w1
-+//   const uint8_t * src,       : x2
-+//   unsigned int src_stride1,  : w3, always 128
-+//   unsigned int src_stride2,  : w4
-+//   unsigned int _x,           : w5
-+//   unsigned int y,            : w6
-+//   unsigned int _w,           : w7
-+//   unsigned int h);           : [sp, #0]
-+//
-+// Assumes that we are starting on a stripe boundary and that overreading
-+// within the stripe is OK. However it does respect the dest size for wri
-+
-+function ff_rpi_sand30_lines_to_planar_y8, export=1
-+                lsl             w4,  w4,  #7
-+                sub             w4,  w4,  #64
-+                sub             w1,  w1,  w7
-+                uxtw            x6,  w6
-+                add             x8,  x2,  x6, lsl #7
-+                ldr             w6,  [sp, #0]
-+
-+10:
-+                mov             x2,  x8
-+                mov             w5,  w7
-+1:
-+                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
-+                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
-+
-+                subs            w5,  w5,  #96
-+
-+                // v0, v1
-+
-+                shrn            v18.4h,  v0.4s,   #16
-+                xtn             v16.4h,  v0.4s
-+                shrn            v17.4h,  v0.4s,   #12
-+
-+                shrn2           v18.8h,  v1.4s,   #16
-+                xtn2            v16.8h,  v1.4s
-+                shrn2           v17.8h,  v1.4s,   #12
-+
-+                shrn            v18.8b,  v18.8h,  #6
-+                shrn            v16.8b,  v16.8h,  #2
-+                xtn             v17.8b,  v17.8h
-+
-+                // v2, v3
-+
-+                shrn            v21.4h,  v2.4s,   #16
-+                xtn             v19.4h,  v2.4s
-+                shrn            v20.4h,  v2.4s,   #12
-+
-+                shrn2           v21.8h,  v3.4s,   #16
-+                xtn2            v19.8h,  v3.4s
-+                shrn2           v20.8h,  v3.4s,   #12
-+
-+                shrn2           v18.16b, v21.8h,  #6
-+                shrn2           v16.16b, v19.8h,  #2
-+                xtn2            v17.16b, v20.8h
-+
-+                // v4, v5
-+
-+                shrn            v24.4h,  v4.4s,   #16
-+                xtn             v22.4h,  v4.4s
-+                shrn            v23.4h,  v4.4s,   #12
-+
-+                shrn2           v24.8h,  v5.4s,   #16
-+                xtn2            v22.8h,  v5.4s
-+                shrn2           v23.8h,  v5.4s,   #12
-+
-+                shrn            v21.8b,  v24.8h,  #6
-+                shrn            v19.8b,  v22.8h,  #2
-+                xtn             v20.8b,  v23.8h
-+
-+                // v6, v7
-+
-+                shrn            v27.4h,  v6.4s,   #16
-+                xtn             v25.4h,  v6.4s
-+                shrn            v26.4h,  v6.4s,   #12
-+
-+                shrn2           v27.8h,  v7.4s,   #16
-+                xtn2            v25.8h,  v7.4s
-+                shrn2           v26.8h,  v7.4s,   #12
-+
-+                shrn2           v21.16b, v27.8h,  #6
-+                shrn2           v19.16b, v25.8h,  #2
-+                xtn2            v20.16b, v26.8h
-+
-+                blt             2f
-+
-+                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
-+                st3             {v19.16b, v20.16b, v21.16b}, [x0], #48
-+
-+                bne             1b
-+
-+11:
-+                subs            w6,  w6,  #1
-+                add             x0,  x0,  w1,  uxtw
-+                add             x8,  x8,  #128
-+                bne             10b
-+
-+                ret
-+
-+// Partial final write
-+2:
-+                cmp             w5,  #48-96
-+                blt             1f
-+                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
-+                beq             11b
-+                mov             v16.16b, v22.16b
-+                mov             v17.16b, v23.16b
-+                sub             w5,  w5,  #48
-+                mov             v18.16b, v24.16b
-+1:
-+                cmp             w5,  #24-96
-+                blt             1f
-+                st3             {v16.8b, v17.8b, v18.8b}, [x0], #24
-+                beq             11b
-+                mov             v16.2d[0], v16.2d[1]
-+                sub             w5,  w5,  #24
-+                mov             v17.2d[0], v17.2d[1]
-+                mov             v18.2d[0], v18.2d[1]
-+1:
-+                cmp             w5,  #12-96
-+                blt             1f
-+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
-+                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
-+                st3             {v16.b, v17.b, v18.b}[2], [x0], #3
-+                st3             {v16.b, v17.b, v18.b}[3], [x0], #3
-+                beq             11b
-+                mov             v16.2s[0], v16.2s[1]
-+                sub             w5,  w5,  #12
-+                mov             v17.2s[0], v17.2s[1]
-+                mov             v18.2s[0], v18.2s[1]
-+1:
-+                cmp             w5,  #6-96
-+                blt             1f
-+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
-+                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
-+                beq             11b
-+                mov             v16.4h[0], v16.4h[1]
-+                sub             w5,  w5,  #6
-+                mov             v17.4h[0], v17.4h[1]
-+                mov             v18.4h[0], v18.4h[1]
-+1:
-+                cmp             w5,  #3-96
-+                blt             1f
-+                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
-+                beq             11b
-+                mov             v16.8b[0], v16.8b[1]
-+                sub             w5,  w5,  #3
-+                mov             v17.8b[0], v17.8b[1]
-+1:
-+                cmp             w5,  #2-96
-+                blt             1f
-+                st2             {v16.b, v17.b}[0], [x0], #2
-+                b               11b
-+1:
-+                st1             {v16.b}[0], [x0], #1
-+                b               11b
-+
-+endfunc
-+
-diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h
-index b3aa481ea497..2a56135bc327 100644
---- a/libavutil/aarch64/rpi_sand_neon.h
-+++ b/libavutil/aarch64/rpi_sand_neon.h
-@@ -49,6 +49,10 @@ void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_
-   uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
-   unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
- 
-+void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
-+  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
-+  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
-+
- #ifdef __cplusplus
- }
- #endif
-diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S
-index 80890fe9854b..60e697f6819b 100644
---- a/libavutil/arm/rpi_sand_neon.S
-+++ b/libavutil/arm/rpi_sand_neon.S
-@@ -360,7 +360,6 @@ function ff_rpi_sand30_lines_to_planar_y16, export=1
-                 ldr             r6,  [sp, #36]
-                 ldr             r7,  [sp, #32]  @ y
-                 mov             r12, #48
--                vmov.u16        q15, #0x3ff
-                 sub             r3,  #1
-                 lsl             r3,  #7
-                 sub             r1,  r1,  r6,  lsl #1
-@@ -376,37 +375,33 @@ function ff_rpi_sand30_lines_to_planar_y16, export=1
-                 vldm            r2!, {q10-q13}
-                 add             lr,  #64
- 
--                vshr.u32        q14, q10, #20    @ Cannot vshrn.u32 #20!
-+                vshrn.u32       d4 , q10, #14    @ Cannot vshrn.u32 #20!
-                 ands            lr,  #127
-                 vshrn.u32       d2,  q10, #10
-                 vmovn.u32       d0,  q10
--                vmovn.u32       d4,  q14
- 
--                vshr.u32        q14, q11, #20
-+                vshrn.u32       d5,  q11, #14
-                 it              eq
-                 addeq           r2,  r3
-                 vshrn.u32       d3,  q11, #10
-                 vmovn.u32       d1,  q11
--                vmovn.u32       d5,  q14
- 
-                 subs            r5,  #48
--                vand            q0,  q15
--                vand            q1,  q15
--                vand            q2,  q15
-+                vshr.u16        q2,  #6
-+                vbic.u16        q0,  #0xfc00
-+                vbic.u16        q1,  #0xfc00
- 
--                vshr.u32        q14, q12, #20
-+                vshrn.u32       d20, q12, #14
-                 vshrn.u32       d18, q12, #10
-                 vmovn.u32       d16, q12
--                vmovn.u32       d20, q14
- 
--                vshr.u32        q14, q13, #20
-+                vshrn.u32       d21, q13, #14
-                 vshrn.u32       d19, q13, #10
-                 vmovn.u32       d17, q13
--                vmovn.u32       d21, q14
- 
--                vand            q8,  q15
--                vand            q9,  q15
--                vand            q10, q15
-+                vshr.u16        q10, #6
-+                vbic.u16        q8,  #0xfc00
-+                vbic.u16        q9 , #0xfc00
-                 blt             2f
- 
-                 vst3.16         {d0,  d2,  d4},  [r0], r12
-@@ -499,7 +494,6 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1
-                 ldr             r7,  [sp, #48]
-                 ldr             r9,  [sp, #52]
-                 mov             r12, #48
--                vmov.u16        q15, #0x3ff
-                 sub             r8,  #1
-                 lsl             r8,  #7
-                 add             r5,  r5,  r7,  lsl #7
-@@ -515,48 +509,44 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1
-                 add             lr,  #64
- 
-                 @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
--                vshr.u32        q14, q0,  #20
--                vshrn.u32       d16, q0,  #10
-+                vshrn.u32       d20, q0,  #14
-                 vmovn.u32       d18, q0
-+                vshrn.u32       d0,  q0,  #10
-                 ands            lr,  #127
--                vmovn.u32       d20, q14
- 
--                vshr.u32        q14, q1,  #20
--                vshrn.u32       d17, q1,  #10
-+                vshrn.u32       d21, q1,  #14
-                 vmovn.u32       d19, q1
--                vmovn.u32       d21, q14
-+                vshrn.u32       d1,  q1,  #10
- 
--                vshr.u32        q14, q2,  #20
-                 vshrn.u32       d22, q2,  #10
--                vmovn.u32       d24, q2
--                vmovn.u32       d26, q14
-+                vmovn.u32       d2,  q2
-+                vshrn.u32       d4,  q2,  #14
- 
--                vshr.u32        q14, q3,  #20
--                vshrn.u32       d23, q3,  #10
--                vmovn.u32       d25, q3
-                 add             r10, r0,  #24
--                vmovn.u32       d27, q14
-+                vshrn.u32       d23, q3,  #10
-+                vmovn.u32       d3,  q3
-+                vshrn.u32       d5,  q3,  #14
- 
-                 it              eq
-                 addeq           r4,  r8
--                vuzp.16         q8,  q11
--                vuzp.16         q9,  q12
--                vuzp.16         q10, q13
-+                vuzp.16         q0,  q11
-+                vuzp.16         q9,  q1
-+                vuzp.16         q10, q2
- 
--                @ q8   V0, V3,.. -> q0
-+                @ q0   V0, V3,..
-                 @ q9   U0, U3...
-                 @ q10  U1, U4...
-                 @ q11  U2, U5,..
--                @ q12  V1, V4,.. -> q1
--                @ q13  V2, V5,.. -> q2
-+                @ q1   V1, V4,
-+                @ q2   V2, V5,..
- 
-                 subs            r6,  #24
--                vand            q11, q15
--                vand            q9,  q15
--                vand            q10, q15
--                vand            q0,  q8,  q15
--                vand            q1,  q12, q15
--                vand            q2,  q13, q15
-+                vbic.u16        q11, #0xfc00
-+                vbic.u16        q9,  #0xfc00
-+                vshr.u16        q10, #6
-+                vshr.u16        q2,  #6
-+                vbic.u16        q0,  #0xfc00
-+                vbic.u16        q1,  #0xfc00
- 
-                 blt             2f
- 
-@@ -765,4 +755,171 @@ function ff_rpi_sand30_lines_to_planar_p010, export=1
- endfunc
- 
- 
-+@ void ff_rpi_sand30_lines_to_planar_y8(
-+@   uint8_t * dest,             // [r0]
-+@   unsigned int dst_stride,    // [r1]
-+@   const uint8_t * src,        // [r2]
-+@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
-+@   unsigned int src_stride2,   // [sp, #0]  -> r3
-+@   unsigned int _x,            // [sp, #4]  Ignored - 0
-+@   unsigned int y,             // [sp, #8]  (r7 in prefix)
-+@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-+@   unsigned int h);            // [sp, #16] -> r7
-+@
-+@ Assumes that we are starting on a stripe boundary and that overreading
-+@ within the stripe is OK. However it does respect the dest size for wri
-+
-+function ff_rpi_sand30_lines_to_planar_y8, export=1
-+                push            {r4-r8, lr}     @ +24
-+                ldr             r3,  [sp, #24]
-+                ldr             r6,  [sp, #36]
-+                ldr             r7,  [sp, #32]  @ y
-+                mov             r12, #48
-+                lsl             r3,  #7
-+                sub             r1,  r1,  r6
-+                add             r8,  r2,  r7,  lsl #7
-+                ldr             r7,  [sp, #40]
-+
-+10:
-+                mov             r2,  r8
-+                add             r4,  r0,  #24
-+                mov             r5,  r6
-+1:
-+                vldm            r2,  {q8-q15}
-+
-+                subs            r5,  #96
-+
-+                vmovn.u32       d0,  q8
-+                vshrn.u32       d2,  q8,  #12
-+                vshrn.u32       d4,  q8,  #16    @ Cannot vshrn.u32 #20!
-+
-+                add             r2,  r3
-+
-+                vmovn.u32       d1,  q9
-+                vshrn.u32       d3,  q9,  #12
-+                vshrn.u32       d5,  q9,  #16
-+
-+                pld             [r2, #0]
-+
-+                vshrn.u16       d0,  q0,  #2
-+                vmovn.u16       d1,  q1
-+                vshrn.u16       d2,  q2,  #6
-+
-+                vmovn.u32       d16, q10
-+                vshrn.u32       d18, q10, #12
-+                vshrn.u32       d20, q10, #16
-+
-+                vmovn.u32       d17, q11
-+                vshrn.u32       d19, q11, #12
-+                vshrn.u32       d21, q11, #16
-+
-+                pld             [r2, #64]
-+
-+                vshrn.u16       d4,  q8,  #2
-+                vmovn.u16       d5,  q9
-+                vshrn.u16       d6,  q10, #6
-+
-+                vmovn.u32       d16, q12
-+                vshrn.u32       d18, q12, #12
-+                vshrn.u32       d20, q12, #16
-+
-+                vmovn.u32       d17, q13
-+                vshrn.u32       d19, q13, #12
-+                vshrn.u32       d21, q13, #16
-+
-+                vshrn.u16       d16, q8,  #2
-+                vmovn.u16       d17, q9
-+                vshrn.u16       d18, q10, #6
-+
-+                vmovn.u32       d20, q14
-+                vshrn.u32       d22, q14, #12
-+                vshrn.u32       d24, q14, #16
-+
-+                vmovn.u32       d21, q15
-+                vshrn.u32       d23, q15, #12
-+                vshrn.u32       d25, q15, #16
-+
-+                vshrn.u16       d20, q10, #2
-+                vmovn.u16       d21, q11
-+                vshrn.u16       d22, q12, #6
-+
-+                blt             2f
-+
-+                vst3.8          {d0,  d1,  d2},  [r0], r12
-+                vst3.8          {d4,  d5,  d6},  [r4], r12
-+                vst3.8          {d16, d17, d18}, [r0], r12
-+                vst3.8          {d20, d21, d22}, [r4], r12
-+
-+                bne             1b
-+
-+11:
-+                subs            r7,  #1
-+                add             r0,  r1
-+                add             r8,  #128
-+                bne             10b
-+
-+                pop             {r4-r8, pc}
-+
-+@ Partial final write
-+2:
-+                cmp             r5,  #48-96
-+                blt             1f
-+                vst3.8          {d0,  d1,  d2},  [r0], r12
-+                vst3.8          {d4,  d5,  d6},  [r4], r12
-+                beq             11b
-+                vmov            q0,  q8
-+                vmov            q2,  q10
-+                sub             r5,  #48
-+                vmov            d2,  d18
-+                vmov            d6,  d22
-+1:
-+                cmp             r5,  #24-96
-+                blt             1f
-+                vst3.8          {d0,  d1,  d2},  [r0]!
-+                beq             11b
-+                vmov            q0,  q2
-+                sub             r5,  #24
-+                vmov            d2,  d6
-+1:
-+                cmp             r5,  #12-96
-+                blt             1f
-+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
-+                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
-+                vst3.8          {d0[2], d1[2], d2[2]}, [r0]!
-+                vst3.8          {d0[3], d1[3], d2[3]}, [r0]!
-+                beq             11b
-+                vmov            s0,  s1
-+                sub             r5,  #12
-+                vmov            s2,  s3
-+                vmov            s4,  s5
-+1:
-+                cmp             r5,  #6-96
-+                blt             1f
-+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
-+                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
-+                add             r0,  #12
-+                beq             11b
-+                vshr.u32        d0,  #16
-+                sub             r5,  #6
-+                vshr.u32        d1,  #16
-+                vshr.u32        d2,  #16
-+1:
-+                cmp             r5, #3-96
-+                blt             1f
-+                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
-+                beq             11b
-+                sub             r5, #3
-+                vshr.u32        d0, #8
-+                vshr.u32        d1, #8
-+1:
-+                cmp             r5, #2-96
-+                blt             1f
-+                vst2.8          {d0[0], d1[0]}, [r0]!
-+                b               11b
-+1:
-+                vst1.8          {d0[0]}, [r0]!
-+                b               11b
-+
-+endfunc
-+
- 
-diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h
-index 447f367bea8f..d457c1087082 100644
---- a/libavutil/arm/rpi_sand_neon.h
-+++ b/libavutil/arm/rpi_sand_neon.h
-@@ -95,5 +95,16 @@ void ff_rpi_sand30_lines_to_planar_p010(
-   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-   unsigned int h);            // [sp, #16] -> r7
- 
-+void ff_rpi_sand30_lines_to_planar_y8(
-+  uint8_t * dest,             // [r0]
-+  unsigned int dst_stride,    // [r1]
-+  const uint8_t * src,        // [r2]
-+  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
-+  unsigned int src_stride2,   // [sp, #0]  -> r3
-+  unsigned int _x,            // [sp, #4]  Ignored - 0
-+  unsigned int y,             // [sp, #8]  (r7 in prefix)
-+  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
-+  unsigned int h);            // [sp, #16] -> r7
-+
- #endif // AVUTIL_ARM_SAND_NEON_H
- 
-diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
-index 256c3d532f38..b6071e2928f7 100644
---- a/libavutil/rpi_sand_fns.c
-+++ b/libavutil/rpi_sand_fns.c
-@@ -247,7 +247,7 @@ void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
-     const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
-     const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
- 
--#if HAVE_SAND_ASM && 0
-+#if HAVE_SAND_ASM
-     if (_x == 0) {
-         ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
-         return;
-
-From 76556eb5987c43cc46decc0d642ed7d762113613 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 7 Jun 2022 14:46:12 +0000
-Subject: [PATCH 054/186] v4l2_m2m_enc: Add the ability to encode DRM_PRIME
- frames
-
----
- libavcodec/v4l2_buffers.c | 100 +++++++++++---
- libavcodec/v4l2_buffers.h |  20 ++-
- libavcodec/v4l2_context.c | 212 +++++++++++++++++++++++++---
- libavcodec/v4l2_context.h |  15 +-
- libavcodec/v4l2_m2m.c     |  37 +++--
- libavcodec/v4l2_m2m.h     |   3 +
- libavcodec/v4l2_m2m_dec.c | 171 ++++++-----------------
- libavcodec/v4l2_m2m_enc.c | 283 +++++++++++++++++++++++++++++++++++++-
- 8 files changed, 643 insertions(+), 198 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 8c4f18dbede2..9ef2f40e3991 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -29,6 +29,8 @@
- #include <fcntl.h>
- #include <poll.h>
- #include "libavcodec/avcodec.h"
-+#include "libavcodec/internal.h"
-+#include "libavutil/avassert.h"
- #include "libavutil/pixdesc.h"
- #include "libavutil/hwcontext.h"
- #include "v4l2_context.h"
-@@ -60,27 +62,39 @@ static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf)
-     return tb.num && tb.den ? tb : v4l2_timebase;
- }
- 
-+static inline struct timeval tv_from_int(const int64_t t)
-+{
-+    return (struct timeval){
-+        .tv_usec = t % USEC_PER_SEC,
-+        .tv_sec  = t / USEC_PER_SEC
-+    };
-+}
-+
-+static inline int64_t int_from_tv(const struct timeval t)
-+{
-+    return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec;
-+}
-+
- static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
- {
-     /* convert pts to v4l2 timebase */
-     const int64_t v4l2_pts =
--        out->context->no_pts_rescale ? pts :
-         pts == AV_NOPTS_VALUE ? 0 :
-             av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
--    out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
--    out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
-+    out->buf.timestamp = tv_from_int(v4l2_pts);
- }
- 
- static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf)
- {
-+    const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp);
-+    return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE;
-+#if 0
-     /* convert pts back to encoder timebase */
--    const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
--                        avbuf->buf.timestamp.tv_usec;
--
-     return
-         avbuf->context->no_pts_rescale ? v4l2_pts :
-         v4l2_pts == 0 ? AV_NOPTS_VALUE :
-             av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
-+#endif
- }
- 
- static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
-@@ -435,7 +449,7 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data)
- 
-         ff_mutex_lock(&ctx->lock);
- 
--        avbuf->status = V4L2BUF_AVAILABLE;
-+        ff_v4l2_buffer_set_avail(avbuf);
- 
-         if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-             av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
-@@ -599,6 +613,38 @@ static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes)
-     return i != 0  && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
- }
- 
-+static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
-+{
-+    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
-+
-+    if (frame->format != AV_PIX_FMT_DRM_PRIME || !src)
-+        return AVERROR(EINVAL);
-+
-+    av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF);
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
-+        // Only currently cope with single buffer types
-+        if (out->buf.length != 1)
-+            return AVERROR_PATCHWELCOME;
-+        if (src->nb_objects != 1)
-+            return AVERROR(EINVAL);
-+
-+        out->planes[0].m.fd = src->objects[0].fd;
-+    }
-+    else {
-+        if (src->nb_objects != 1)
-+            return AVERROR(EINVAL);
-+
-+        out->buf.m.fd      = src->objects[0].fd;
-+    }
-+
-+    // No need to copy src AVDescriptor and if we did then we may confuse
-+    // fd close on free
-+    out->ref_buf = av_buffer_ref(frame->buf[0]);
-+
-+    return 0;
-+}
-+
- static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
- {
-     int i;
-@@ -678,7 +724,7 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
-  *
-  ******************************************************************************/
- 
--int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
-+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts)
- {
-     out->buf.flags = frame->key_frame ?
-         (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
-@@ -688,10 +734,15 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
-     v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
-     v4l2_set_color_range(out, frame->color_range);
-     // PTS & interlace are buffer vars
--    v4l2_set_pts(out, frame->pts);
-+    if (track_ts)
-+        out->buf.timestamp = tv_from_int(track_ts);
-+    else
-+        v4l2_set_pts(out, frame->pts);
-     v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first);
- 
--    return v4l2_buffer_swframe_to_buf(frame, out);
-+    return frame->format == AV_PIX_FMT_DRM_PRIME ?
-+        v4l2_buffer_primeframe_to_buf(frame, out) :
-+        v4l2_buffer_swframe_to_buf(frame, out);
- }
- 
- int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
-@@ -754,6 +805,7 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
- 
-     pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
-     pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
-+    pkt->flags = 0;
- 
-     if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
-         pkt->flags |= AV_PKT_FLAG_KEY;
-@@ -768,8 +820,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
-     return 0;
- }
- 
--int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
--                                    const void *extdata, size_t extlen)
-+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
-+                                    const void *extdata, size_t extlen,
-+                                    const int64_t timestamp)
- {
-     int ret;
- 
-@@ -783,7 +836,10 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-     if (ret && ret != AVERROR(ENOMEM))
-         return ret;
- 
--    v4l2_set_pts(out, pkt->pts);
-+    if (timestamp)
-+        out->buf.timestamp = tv_from_int(timestamp);
-+    else
-+        v4l2_set_pts(out, pkt->pts);
- 
-     out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ?
-         (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
-@@ -794,7 +850,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
- 
- int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
- {
--    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0);
-+    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
- }
- 
- 
-@@ -814,13 +870,15 @@ static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
-             close(avbuf->drm_frame.objects[i].fd);
-     }
- 
-+    av_buffer_unref(&avbuf->ref_buf);
-+
-     ff_weak_link_unref(&avbuf->context_wl);
- 
-     av_free(avbuf);
- }
- 
- 
--int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx)
-+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem)
- {
-     int ret, i;
-     V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
-@@ -837,7 +895,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-     }
- 
-     avbuf->context = ctx;
--    avbuf->buf.memory = V4L2_MEMORY_MMAP;
-+    avbuf->buf.memory = mem;
-     avbuf->buf.type = ctx->type;
-     avbuf->buf.index = index;
- 
-@@ -867,6 +925,8 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-         avbuf->num_planes = 1;
- 
-     for (i = 0; i < avbuf->num_planes; i++) {
-+        const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
-+            (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
- 
-         avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
-             ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline :
-@@ -875,21 +935,17 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-         if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-             avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
- 
--            if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) ||
--                !buf_to_m2mctx(avbuf)->output_drm) {
-+            if (want_mmap)
-                 avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
-                                                PROT_READ | PROT_WRITE, MAP_SHARED,
-                                                buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
--            }
-         } else {
-             avbuf->plane_info[i].length = avbuf->buf.length;
- 
--            if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) ||
--                !buf_to_m2mctx(avbuf)->output_drm) {
-+            if (want_mmap)
-                 avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
-                                                PROT_READ | PROT_WRITE, MAP_SHARED,
-                                                buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
--            }
-         }
- 
-         if (avbuf->plane_info[i].mm_addr == MAP_FAILED) {
-diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
-index 3b7ca4d99e1e..1ac32c5989f1 100644
---- a/libavcodec/v4l2_buffers.h
-+++ b/libavcodec/v4l2_buffers.h
-@@ -59,6 +59,10 @@ typedef struct V4L2Buffer {
- 
-     /* DRM descriptor */
-     AVDRMFrameDescriptor drm_frame;
-+    /* For DRM_PRIME encode - need to keep a ref to the source buffer till we
-+     * are done
-+     */
-+    AVBufferRef * ref_buf;
- 
-     /* keep track of the mmap address and mmap length */
-     struct V4L2Plane_info {
-@@ -110,8 +114,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);
-  */
- int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
- 
--int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
--                                    const void *extdata, size_t extlen);
-+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
-+                                    const void *extdata, size_t extlen,
-+                                    const int64_t timestamp);
- 
- /**
-  * Extracts the data from an AVFrame to a V4L2Buffer
-@@ -121,7 +126,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
-  *
-  * @returns 0 in case of success, a negative AVERROR code otherwise
-  */
--int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
-+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts);
- 
- /**
-  * Initializes a V4L2Buffer
-@@ -131,7 +136,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
-  *
-  * @returns 0 in case of success, a negative AVERROR code otherwise
-  */
--int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx);
-+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem);
- 
- /**
-  * Enqueues a V4L2Buffer
-@@ -142,5 +147,12 @@ int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context
-  */
- int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf);
- 
-+static inline void
-+ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf)
-+{
-+    avbuf->status = V4L2BUF_AVAILABLE;
-+    av_buffer_unref(&avbuf->ref_buf);
-+}
-+
- 
- #endif // AVCODEC_V4L2_BUFFERS_H
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index b3662aedaa9f..7a707d21fc7a 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -43,6 +43,160 @@ struct v4l2_format_update {
-     int update_avfmt;
- };
- 
-+
-+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
-+{
-+    return (int64_t)n;
-+}
-+
-+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
-+{
-+    return (unsigned int)pts;
-+}
-+
-+// FFmpeg requires us to propagate a number of vars from the coded pkt into
-+// the decoded frame. The only thing that tracks like that in V4L2 stateful
-+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
-+// guarantees about PTS being unique or specified for every frame so replace
-+// the supplied PTS with a simple incrementing number and keep a circular
-+// buffer of all the things we want preserved (including the original PTS)
-+// indexed by the tracking no.
-+static int64_t
-+xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt)
-+{
-+    int64_t track_pts;
-+
-+    // Avoid 0
-+    if (++x->track_no == 0)
-+        x->track_no = 1;
-+
-+    track_pts = track_to_pts(avctx, x->track_no);
-+
-+    av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
-+    x->last_pkt_dts = avpkt->dts;
-+    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
-+        .discard          = 0,
-+        .pending          = 1,
-+        .pkt_size         = avpkt->size,
-+        .pts              = avpkt->pts,
-+        .dts              = avpkt->dts,
-+        .reordered_opaque = avctx->reordered_opaque,
-+        .pkt_pos          = avpkt->pos,
-+        .pkt_duration     = avpkt->duration,
-+        .track_pts        = track_pts
-+    };
-+    return track_pts;
-+}
-+
-+static int64_t
-+xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame)
-+{
-+    int64_t track_pts;
-+
-+    // Avoid 0
-+    if (++x->track_no == 0)
-+        x->track_no = 1;
-+
-+    track_pts = track_to_pts(avctx, x->track_no);
-+
-+    av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no);
-+    x->last_pkt_dts = frame->pkt_dts;
-+    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
-+        .discard          = 0,
-+        .pending          = 1,
-+        .pkt_size         = 0,
-+        .pts              = frame->pts,
-+        .dts              = AV_NOPTS_VALUE,
-+        .reordered_opaque = frame->reordered_opaque,
-+        .pkt_pos          = frame->pkt_pos,
-+        .pkt_duration     = frame->pkt_duration,
-+        .track_pts        = track_pts
-+    };
-+    return track_pts;
-+}
-+
-+
-+// Returns -1 if we should discard the frame
-+static int
-+xlat_pts_frame_out(AVCodecContext *const avctx,
-+             xlat_track_t * const x,
-+             AVFrame *const frame)
-+{
-+    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
-+    V4L2m2mTrackEl *const t = x->track_els + n;
-+    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
-+    {
-+        av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
-+               "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
-+        frame->pts              = AV_NOPTS_VALUE;
-+        frame->pkt_dts          = x->last_pkt_dts;
-+        frame->reordered_opaque = x->last_opaque;
-+        frame->pkt_pos          = -1;
-+        frame->pkt_duration     = 0;
-+        frame->pkt_size         = -1;
-+    }
-+    else if (!t->discard)
-+    {
-+        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
-+        frame->pkt_dts          = x->last_pkt_dts;
-+        frame->reordered_opaque = t->reordered_opaque;
-+        frame->pkt_pos          = t->pkt_pos;
-+        frame->pkt_duration     = t->pkt_duration;
-+        frame->pkt_size         = t->pkt_size;
-+
-+        x->last_opaque = x->track_els[n].reordered_opaque;
-+        if (frame->pts != AV_NOPTS_VALUE)
-+            x->last_pts = frame->pts;
-+        t->pending = 0;
-+    }
-+    else
-+    {
-+        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
-+        return -1;
-+    }
-+
-+    av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
-+           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
-+    return 0;
-+}
-+
-+// Returns -1 if we should discard the frame
-+static int
-+xlat_pts_pkt_out(AVCodecContext *const avctx,
-+             xlat_track_t * const x,
-+             AVPacket *const pkt)
-+{
-+    unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE;
-+    V4L2m2mTrackEl *const t = x->track_els + n;
-+    if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts)
-+    {
-+        av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
-+               "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
-+        pkt->pts                = AV_NOPTS_VALUE;
-+    }
-+    else if (!t->discard)
-+    {
-+        pkt->pts                = t->pending ? t->pts : AV_NOPTS_VALUE;
-+
-+        x->last_opaque = x->track_els[n].reordered_opaque;
-+        if (pkt->pts != AV_NOPTS_VALUE)
-+            x->last_pts = pkt->pts;
-+        t->pending = 0;
-+    }
-+    else
-+    {
-+        av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
-+        return -1;
-+    }
-+
-+    // * Would like something much better than this...xlat(offset + out_count)?
-+    pkt->dts = pkt->pts;
-+    av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n",
-+           pkt->pts, t->track_pts, n);
-+    return 0;
-+}
-+
-+
- static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
- {
-     return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
-@@ -353,12 +507,14 @@ dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf)
-     atomic_fetch_sub(&ctx->q_count, 1);
- 
-     avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
--    avbuf->status = V4L2BUF_AVAILABLE;
-+    ff_v4l2_buffer_set_avail(avbuf);
-     avbuf->buf = buf;
-     if (is_mp) {
-         memcpy(avbuf->planes, planes, sizeof(planes));
-         avbuf->buf.m.planes = avbuf->planes;
-     }
-+    // Done with any attached buffer
-+    av_buffer_unref(&avbuf->ref_buf);
- 
-     if (V4L2_TYPE_IS_CAPTURE(ctx->type)) {
-         // Zero length cap buffer return == EOS
-@@ -733,7 +889,7 @@ static void flush_all_buffers_status(V4L2Context* const ctx)
-     for (i = 0; i < ctx->num_buffers; ++i) {
-         struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
-         if (buf->status == V4L2BUF_IN_DRIVER)
--            buf->status = V4L2BUF_AVAILABLE;
-+            ff_v4l2_buffer_set_avail(buf);
-     }
-     atomic_store(&ctx->q_count, 0);
- }
-@@ -787,6 +943,8 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
-     {
-         if (cmd == VIDIOC_STREAMOFF)
-             flush_all_buffers_status(ctx);
-+        else
-+            ctx->first_buf = 1;
- 
-         ctx->streamon = (cmd == VIDIOC_STREAMON);
-         av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name,
-@@ -803,14 +961,16 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
- 
- int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
- {
--    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-+    V4L2m2mContext *const s = ctx_to_m2mctx(ctx);
-+    AVCodecContext *const avctx = s->avctx;
-+    int64_t track_ts;
-     V4L2Buffer* avbuf;
-     int ret;
- 
-     if (!frame) {
-         ret = v4l2_stop_encode(ctx);
-         if (ret)
--            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
-+            av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
-         s->draining= 1;
-         return 0;
-     }
-@@ -819,7 +979,9 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
-     if (!avbuf)
-         return AVERROR(EAGAIN);
- 
--    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf);
-+    track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame);
-+
-+    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts);
-     if (ret)
-         return ret;
- 
-@@ -830,14 +992,16 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
-                                    const void * extdata, size_t extlen)
- {
-     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-+    AVCodecContext *const avctx = s->avctx;
-     V4L2Buffer* avbuf;
-     int ret;
-+    int64_t track_ts;
- 
-     if (!pkt->size) {
-         ret = v4l2_stop_decode(ctx);
-         // Log but otherwise ignore stop failure
-         if (ret)
--            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
-+            av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
-         s->draining = 1;
-         return 0;
-     }
-@@ -846,7 +1010,9 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
-     if (!avbuf)
-         return AVERROR(EAGAIN);
- 
--    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen);
-+    track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt);
-+
-+    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts);
-     if (ret == AVERROR(ENOMEM))
-         av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
-                __func__, pkt->size, avbuf->planes[0].length);
-@@ -858,24 +1024,36 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
- 
- int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
- {
-+    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-+    AVCodecContext *const avctx = s->avctx;
-     V4L2Buffer *avbuf;
-     int rv;
- 
--    if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
--        return rv;
-+    do {
-+        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
-+            return rv;
-+        if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0)
-+            return rv;
-+    } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0);
- 
--    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
-+   return 0;
- }
- 
- int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
- {
-+    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-+    AVCodecContext *const avctx = s->avctx;
-     V4L2Buffer *avbuf;
-     int rv;
- 
--    if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0)
--        return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
-+    do {
-+        if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0)
-+            return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
-+        if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0)
-+            return rv;
-+    } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0);
- 
--    return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
-+    return 0;
- }
- 
- int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
-@@ -951,7 +1129,7 @@ void ff_v4l2_context_release(V4L2Context* ctx)
- }
- 
- 
--static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers)
-+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem)
- {
-     V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-     struct v4l2_requestbuffers req;
-@@ -962,7 +1140,7 @@ static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers
- 
-     memset(&req, 0, sizeof(req));
-     req.count = req_buffers;
--    req.memory = V4L2_MEMORY_MMAP;
-+    req.memory = mem;
-     req.type = ctx->type;
-     while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) {
-         if (errno != EINTR) {
-@@ -986,7 +1164,7 @@ static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers
-     }
- 
-     for (i = 0; i < ctx->num_buffers; i++) {
--        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx);
-+        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem);
-         if (ret) {
-             av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
-             goto fail_release;
-@@ -1052,7 +1230,7 @@ int ff_v4l2_context_init(V4L2Context* ctx)
-         goto fail_unref_hwframes;
-     }
- 
--    ret = create_buffers(ctx, ctx->num_buffers);
-+    ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
-     if (ret < 0)
-         goto fail_unref_hwframes;
- 
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 0efff58f1892..21265f1bd77b 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -91,11 +91,19 @@ typedef struct V4L2Context {
-      */
-     int num_buffers;
- 
-+    /**
-+     * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF
-+     */
-+    enum v4l2_memory buf_mem;
-+
-     /**
-      * Whether the stream has been started (VIDIOC_STREAMON has been sent).
-      */
-     int streamon;
- 
-+    /* 1st buffer after stream on */
-+    int first_buf;
-+
-     /**
-      *  Either no more buffers available or an unrecoverable error was notified
-      *  by the V4L2 kernel driver: once set the context has to be exited.
-@@ -105,11 +113,10 @@ typedef struct V4L2Context {
-     int flag_last;
- 
-     /**
--     * PTS rescale not wanted
--     * If the PTS is just a dummy frame count then rescale is
--     * actively harmful
-+     * If NZ then when Qing frame/pkt use this rather than the
-+     * "real" PTS
-      */
--    int no_pts_rescale;
-+    uint64_t track_ts;
- 
-     AVBufferRef *frames_ref;
-     atomic_int q_count;
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index 6dd01e2e0085..1e30d15fd866 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -35,6 +35,14 @@
- #include "v4l2_fmt.h"
- #include "v4l2_m2m.h"
- 
-+static void
-+xlat_init(xlat_track_t * const x)
-+{
-+    memset(x, 0, sizeof(*x));
-+    x->last_pts = AV_NOPTS_VALUE;
-+}
-+
-+
- static inline int v4l2_splane_video(struct v4l2_capability *cap)
- {
-     if (cap->capabilities & (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT) &&
-@@ -67,7 +75,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
- 
-     s->capture.done = s->output.done = 0;
-     s->capture.name = "capture";
-+    s->capture.buf_mem = V4L2_MEMORY_MMAP;
-     s->output.name = "output";
-+    s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
-     atomic_init(&s->refcount, 0);
-     sem_init(&s->refsync, 0, 0);
- 
-@@ -334,35 +344,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *priv)
-     return v4l2_configure_contexts(s);
- }
- 
--int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
-+int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps)
- {
--    *s = av_mallocz(sizeof(V4L2m2mContext));
--    if (!*s)
-+    V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext));
-+
-+    *pps = NULL;
-+    if (!s)
-         return AVERROR(ENOMEM);
- 
--    priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext),
-+    priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s),
-                                          &v4l2_m2m_destroy_context, NULL, 0);
-     if (!priv->context_ref) {
--        av_freep(s);
-+        av_free(s);
-         return AVERROR(ENOMEM);
-     }
- 
-     /* assign the context */
--    priv->context = *s;
--    (*s)->priv = priv;
-+    priv->context = s;
-+    s->priv = priv;
- 
-     /* populate it */
--    priv->context->capture.num_buffers = priv->num_capture_buffers;
--    priv->context->output.num_buffers  = priv->num_output_buffers;
--    priv->context->self_ref = priv->context_ref;
--    priv->context->fd = -1;
-+    s->capture.num_buffers = priv->num_capture_buffers;
-+    s->output.num_buffers  = priv->num_output_buffers;
-+    s->self_ref = priv->context_ref;
-+    s->fd = -1;
-+    xlat_init(&s->xlat);
- 
-     priv->context->frame = av_frame_alloc();
-     if (!priv->context->frame) {
-         av_buffer_unref(&priv->context_ref);
--        *s = NULL; /* freed when unreferencing context_ref */
-         return AVERROR(ENOMEM);
-     }
- 
-+    *pps = s;
-     return 0;
- }
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 19d618698dd7..d6cdaf65e183 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -103,6 +103,9 @@ typedef struct V4L2m2mContext {
-     /* generate DRM frames */
-     int output_drm;
- 
-+    /* input frames are drmprime */
-+    int input_drm;
-+
-     /* Frame tracking */
-     xlat_track_t xlat;
-     int pending_hw;
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 7e170447064a..fbbfc81342d5 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -169,96 +169,17 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s)
-     return 0;
- }
- 
--static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
--{
--    return (int64_t)n;
--}
--
--static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
--{
--    return (unsigned int)pts;
--}
--
--// FFmpeg requires us to propagate a number of vars from the coded pkt into
--// the decoded frame. The only thing that tracks like that in V4L2 stateful
--// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
--// guarantees about PTS being unique or specified for every frame so replace
--// the supplied PTS with a simple incrementing number and keep a circular
--// buffer of all the things we want preserved (including the original PTS)
--// indexed by the tracking no.
- static void
--xlat_pts_in(AVCodecContext *const avctx, xlat_track_t *const x, AVPacket *const avpkt)
--{
--    int64_t track_pts;
--
--    // Avoid 0
--    if (++x->track_no == 0)
--        x->track_no = 1;
--
--    track_pts = track_to_pts(avctx, x->track_no);
--
--    av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
--    x->last_pkt_dts = avpkt->dts;
--    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
--        .discard          = 0,
--        .pending          = 1,
--        .pkt_size         = avpkt->size,
--        .pts              = avpkt->pts,
--        .dts              = avpkt->dts,
--        .reordered_opaque = avctx->reordered_opaque,
--        .pkt_pos          = avpkt->pos,
--        .pkt_duration     = avpkt->duration,
--        .track_pts        = track_pts
--    };
--    avpkt->pts = track_pts;
--}
--
--// Returns -1 if we should discard the frame
--static int
--xlat_pts_out(AVCodecContext *const avctx,
--             xlat_track_t * const x,
-+set_best_effort_pts(AVCodecContext *const avctx,
-              pts_stats_t * const ps,
-              AVFrame *const frame)
- {
--    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
--    V4L2m2mTrackEl *const t = x->track_els + n;
--    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
--    {
--        av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
--        frame->pts              = AV_NOPTS_VALUE;
--        frame->pkt_dts          = x->last_pkt_dts;
--        frame->reordered_opaque = x->last_opaque;
--        frame->pkt_pos          = -1;
--        frame->pkt_duration     = 0;
--        frame->pkt_size         = -1;
--    }
--    else if (!t->discard)
--    {
--        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
--        frame->pkt_dts          = x->last_pkt_dts;
--        frame->reordered_opaque = t->reordered_opaque;
--        frame->pkt_pos          = t->pkt_pos;
--        frame->pkt_duration     = t->pkt_duration;
--        frame->pkt_size         = t->pkt_size;
--
--        x->last_opaque = x->track_els[n].reordered_opaque;
--        if (frame->pts != AV_NOPTS_VALUE)
--            x->last_pts = frame->pts;
--        t->pending = 0;
--    }
--    else
--    {
--        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
--        return -1;
--    }
--
-     pts_stats_add(ps, frame->pts);
- 
-     frame->best_effort_timestamp = pts_stats_guess(ps);
-     frame->pkt_dts               = frame->pts;  // We can't emulate what s/w does in a useful manner?
--    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
--           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
--    return 0;
-+    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n",
-+           frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
- }
- 
- static void
-@@ -272,13 +193,6 @@ xlat_flush(xlat_track_t * const x)
-     x->last_pts = AV_NOPTS_VALUE;
- }
- 
--static void
--xlat_init(xlat_track_t * const x)
--{
--    memset(x, 0, sizeof(*x));
--    x->last_pts = AV_NOPTS_VALUE;
--}
--
- static int
- xlat_pending(const xlat_track_t * const x)
- {
-@@ -419,8 +333,6 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-             av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
-             return ret;
-         }
--
--        xlat_pts_in(avctx, &s->xlat, &s->buf_pkt);
-     }
- 
-     if (s->draining) {
-@@ -542,49 +454,47 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-                 prefer_dq ? 5 :
-                 src_rv == NQ_Q_FULL ? -1 : 0;
- 
--            do {
--                // Dequeue frame will unref any previous contents of frame
--                // if it returns success so we don't need an explicit unref
--                // when discarding
--                // This returns AVERROR(EAGAIN) on timeout or if
--                // there is room in the input Q and timeout == -1
--                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
--
--                // Failure due to no buffer in Q?
--                if (dst_rv == AVERROR(ENOSPC)) {
--                    // Wait & retry
--                    if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
--                        dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
--                    }
-+            // Dequeue frame will unref any previous contents of frame
-+            // if it returns success so we don't need an explicit unref
-+            // when discarding
-+            // This returns AVERROR(EAGAIN) on timeout or if
-+            // there is room in the input Q and timeout == -1
-+            dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
-+
-+            // Failure due to no buffer in Q?
-+            if (dst_rv == AVERROR(ENOSPC)) {
-+                // Wait & retry
-+                if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
-+                    dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
-                 }
-+            }
-+
-+            // Adjust dynamic pending threshold
-+            if (dst_rv == 0) {
-+                if (--s->pending_hw < PENDING_HW_MIN)
-+                    s->pending_hw = PENDING_HW_MIN;
-+                s->pending_n = 0;
- 
--                // Adjust dynamic pending threshold
--                if (dst_rv == 0) {
--                    if (--s->pending_hw < PENDING_HW_MIN)
--                        s->pending_hw = PENDING_HW_MIN;
-+                set_best_effort_pts(avctx, &s->pts_stat, frame);
-+            }
-+            else if (dst_rv == AVERROR(EAGAIN)) {
-+                if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) {
-+                    s->pending_hw = pending * 16 + PENDING_HW_OFFSET;
-                     s->pending_n = 0;
-                 }
--                else if (dst_rv == AVERROR(EAGAIN)) {
--                    if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) {
--                        s->pending_hw = pending * 16 + PENDING_HW_OFFSET;
--                        s->pending_n = 0;
--                    }
--                }
-+            }
- 
--                if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
--                    av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
--                    dst_rv = AVERROR_EOF;
--                    s->capture.done = 1;
--                }
--                else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
--                    av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
--                           s->draining, s->capture.done);
--                else if (dst_rv && dst_rv != AVERROR(EAGAIN))
--                    av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
--                           s->draining, s->capture.done, dst_rv);
--
--                // Go again if we got a frame that we need to discard
--            } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame));
-+            if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
-+                av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
-+                dst_rv = AVERROR_EOF;
-+                s->capture.done = 1;
-+            }
-+            else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
-+                av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
-+                       s->draining, s->capture.done);
-+            else if (dst_rv && dst_rv != AVERROR(EAGAIN))
-+                av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
-+                       s->draining, s->capture.done, dst_rv);
-         }
- 
-         ++i;
-@@ -791,7 +701,6 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     if (ret < 0)
-         return ret;
- 
--    xlat_init(&s->xlat);
-     pts_stats_init(&s->pts_stat, avctx, "decoder");
-     s->pending_hw = PENDING_HW_MIN;
- 
-@@ -810,12 +719,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     output->av_codec_id = avctx->codec_id;
-     output->av_pix_fmt  = AV_PIX_FMT_NONE;
-     output->min_buf_size = max_coded_size(avctx);
--    output->no_pts_rescale = 1;
- 
-     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
-     capture->av_pix_fmt = avctx->pix_fmt;
-     capture->min_buf_size = 0;
--    capture->no_pts_rescale = 1;
- 
-     /* the client requests the codec to generate DRM frames:
-      *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
-diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
-index 9a0837ecf3d7..05ff6ba72655 100644
---- a/libavcodec/v4l2_m2m_enc.c
-+++ b/libavcodec/v4l2_m2m_enc.c
-@@ -24,6 +24,8 @@
- #include <linux/videodev2.h>
- #include <sys/ioctl.h>
- #include <search.h>
-+#include <drm_fourcc.h>
-+
- #include "encode.h"
- #include "libavcodec/avcodec.h"
- #include "libavutil/pixdesc.h"
-@@ -38,6 +40,34 @@
- #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x
- #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x
- 
-+// P030 should be defined in drm_fourcc.h and hopefully will be sometime
-+// in the future but until then...
-+#ifndef DRM_FORMAT_P030
-+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
-+#endif
-+
-+#ifndef DRM_FORMAT_NV15
-+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
-+#endif
-+
-+#ifndef DRM_FORMAT_NV20
-+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
-+#endif
-+
-+#ifndef V4L2_CID_CODEC_BASE
-+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
-+#endif
-+
-+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
-+// in videodev2.h hopefully will be sometime in the future but until then...
-+#ifndef V4L2_PIX_FMT_NV12_10_COL128
-+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
-+#endif
-+
-+#ifndef V4L2_PIX_FMT_NV12_COL128
-+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
-+#endif
-+
- static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den)
- {
-     struct v4l2_streamparm parm = { 0 };
-@@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p)
- static int v4l2_check_b_frame_support(V4L2m2mContext *s)
- {
-     if (s->avctx->max_b_frames)
--        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n");
-+        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames);
- 
--    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0);
-+    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1);
-     v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0);
-     if (s->avctx->max_b_frames == 0)
-         return 0;
- 
-     avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding");
--
-     return AVERROR_PATCHWELCOME;
- }
- 
-@@ -271,13 +300,184 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s)
-     return 0;
- }
- 
-+static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame)
-+{
-+    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
-+
-+    const uint32_t drm_fmt = src->layers[0].format;
-+    // Treat INVALID as LINEAR
-+    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
-+        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
-+    uint32_t pix_fmt = 0;
-+    uint32_t w = 0;
-+    uint32_t h = 0;
-+    uint32_t bpl = src->layers[0].planes[0].pitch;
-+
-+    // We really don't expect multiple layers
-+    // All formats that we currently cope with are single object
-+
-+    if (src->nb_layers != 1 || src->nb_objects != 1)
-+        return AVERROR(EINVAL);
-+
-+    switch (drm_fmt) {
-+        case DRM_FORMAT_YUV420:
-+            if (mod == DRM_FORMAT_MOD_LINEAR) {
-+                if (src->layers[0].nb_planes != 3)
-+                    break;
-+                pix_fmt = V4L2_PIX_FMT_YUV420;
-+                h = src->layers[0].planes[1].offset / bpl;
-+                w = bpl;
-+            }
-+            break;
-+
-+        case DRM_FORMAT_NV12:
-+            if (mod == DRM_FORMAT_MOD_LINEAR) {
-+                if (src->layers[0].nb_planes != 2)
-+                    break;
-+                pix_fmt = V4L2_PIX_FMT_NV12;
-+                h = src->layers[0].planes[1].offset / bpl;
-+                w = bpl;
-+            }
-+            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
-+                if (src->layers[0].nb_planes != 2)
-+                    break;
-+                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
-+                w = bpl;
-+                h = src->layers[0].planes[1].offset / 128;
-+                bpl = fourcc_mod_broadcom_param(mod);
-+            }
-+            break;
-+
-+        case DRM_FORMAT_P030:
-+            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
-+                if (src->layers[0].nb_planes != 2)
-+                    break;
-+                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
-+                w = bpl / 2;  // Matching lie to how we construct this
-+                h = src->layers[0].planes[1].offset / 128;
-+                bpl = fourcc_mod_broadcom_param(mod);
-+            }
-+            break;
-+
-+        default:
-+            break;
-+    }
-+
-+    if (!pix_fmt)
-+        return AVERROR(EINVAL);
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
-+        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
-+
-+        pix->width = w;
-+        pix->height = h;
-+        pix->pixelformat = pix_fmt;
-+        pix->plane_fmt[0].bytesperline = bpl;
-+        pix->num_planes = 1;
-+    }
-+    else {
-+        struct v4l2_pix_format *const pix = &format->fmt.pix;
-+
-+        pix->width = w;
-+        pix->height = h;
-+        pix->pixelformat = pix_fmt;
-+        pix->bytesperline = bpl;
-+    }
-+
-+    return 0;
-+}
-+
-+// Do we have similar enough formats to be usable?
-+static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b)
-+{
-+    if (a->type != b->type)
-+        return 0;
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) {
-+        const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp;
-+        const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp;
-+        unsigned int i;
-+        if (pa->pixelformat != pb->pixelformat ||
-+            pa->num_planes != pb->num_planes)
-+            return 0;
-+        for (i = 0; i != pa->num_planes; ++i) {
-+            if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline)
-+                return 0;
-+        }
-+    }
-+    else {
-+        const struct v4l2_pix_format *const pa = &a->fmt.pix;
-+        const struct v4l2_pix_format *const pb = &b->fmt.pix;
-+        if (pa->pixelformat != pb->pixelformat ||
-+            pa->bytesperline != pb->bytesperline)
-+            return 0;
-+    }
-+    return 1;
-+}
-+
-+
- static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
- {
-     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-     V4L2Context *const output = &s->output;
- 
-+    // Signal EOF if needed
-+    if (!frame) {
-+        return ff_v4l2_context_enqueue_frame(output, frame);
-+    }
-+
-+    if (s->input_drm && !output->streamon) {
-+        int rv;
-+        struct v4l2_format req_format = {.type = output->format.type};
-+
-+        // Set format when we first get a buffer
-+        if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) {
-+            av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n");
-+            return rv;
-+        }
-+
-+        ff_v4l2_context_release(output);
-+
-+        output->format = req_format;
-+
-+        if ((rv = ff_v4l2_context_set_format(output)) != 0) {
-+            av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n");
-+            return rv;
-+        }
-+
-+        if (!fmt_eq(&req_format, &output->format)) {
-+            av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n");
-+            return AVERROR(EINVAL);
-+        }
-+
-+        output->selection.top = frame->crop_top;
-+        output->selection.left = frame->crop_left;
-+        output->selection.width = av_frame_cropped_width(frame);
-+        output->selection.height = av_frame_cropped_height(frame);
-+
-+        if ((rv = ff_v4l2_context_init(output)) != 0) {
-+            av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n");
-+            return rv;
-+        }
-+
-+        {
-+            struct v4l2_selection selection = {
-+                .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
-+                .target = V4L2_SEL_TGT_CROP,
-+                .r = output->selection
-+            };
-+            if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) {
-+                av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n",
-+                       selection.r.width, selection.r.height, selection.r.left, selection.r.top,
-+                       av_err2str(AVERROR(errno)));
-+            }
-+            av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n",
-+                   selection.r.width, selection.r.height, selection.r.left, selection.r.top);
-+        }
-+    }
-+
- #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME
--    if (frame && frame->pict_type == AV_PICTURE_TYPE_I)
-+    if (frame->pict_type == AV_PICTURE_TYPE_I)
-         v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1);
- #endif
- 
-@@ -328,7 +528,70 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-     }
- 
- dequeue:
--    return ff_v4l2_context_dequeue_packet(capture, avpkt);
-+    if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0)
-+        return ret;
-+
-+    if (capture->first_buf == 1) {
-+        uint8_t * data;
-+        const int len = avpkt->size;
-+
-+        // 1st buffer after streamon should be SPS/PPS
-+        capture->first_buf = 2;
-+
-+        // Clear both possible stores so there is no chance of confusion
-+        av_freep(&s->extdata_data);
-+        s->extdata_size = 0;
-+        av_freep(&avctx->extradata);
-+        avctx->extradata_size = 0;
-+
-+        if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) != NULL)
-+            memcpy(data, avpkt->data, len);
-+
-+        av_packet_unref(avpkt);
-+
-+        if (data == NULL)
-+            return AVERROR(ENOMEM);
-+
-+        // We need to copy the header, but keep local if not global
-+        if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) {
-+            avctx->extradata = data;
-+            avctx->extradata_size = len;
-+        }
-+        else {
-+            s->extdata_data = data;
-+            s->extdata_size = len;
-+        }
-+
-+        if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0)
-+            return ret;
-+    }
-+
-+    // First frame must be key so mark as such even if encoder forgot
-+    if (capture->first_buf == 2)
-+        avpkt->flags |= AV_PKT_FLAG_KEY;
-+
-+    // Add SPS/PPS to the start of every key frame if non-global headers
-+    if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) {
-+        const size_t newlen = s->extdata_size + avpkt->size;
-+        AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE);
-+
-+        if (buf == NULL) {
-+            av_packet_unref(avpkt);
-+            return AVERROR(ENOMEM);
-+        }
-+
-+        memcpy(buf->data, s->extdata_data, s->extdata_size);
-+        memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size);
-+
-+        av_buffer_unref(&avpkt->buf);
-+        avpkt->buf = buf;
-+        avpkt->data = buf->data;
-+        avpkt->size = newlen;
-+    }
-+
-+//    av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret);
-+    capture->first_buf = 0;
-+    return 0;
- }
- 
- static av_cold int v4l2_encode_init(AVCodecContext *avctx)
-@@ -340,6 +603,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
-     uint32_t v4l2_fmt_output;
-     int ret;
- 
-+    av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt);
-+
-     ret = ff_v4l2_m2m_create_context(priv, &s);
-     if (ret < 0)
-         return ret;
-@@ -347,13 +612,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
-     capture = &s->capture;
-     output  = &s->output;
- 
-+    s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME);
-+
-     /* common settings output/capture */
-     output->height = capture->height = avctx->height;
-     output->width = capture->width = avctx->width;
- 
-     /* output context */
-     output->av_codec_id = AV_CODEC_ID_RAWVIDEO;
--    output->av_pix_fmt = avctx->pix_fmt;
-+    output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt :
-+            avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt :
-+            AV_PIX_FMT_YUV420P;
- 
-     /* capture context */
-     capture->av_codec_id = avctx->codec_id;
-@@ -372,7 +641,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
-         v4l2_fmt_output = output->format.fmt.pix.pixelformat;
- 
-     pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO);
--    if (pix_fmt_output != avctx->pix_fmt) {
-+    if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) {
-         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output);
-         av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name);
-         return AVERROR(EINVAL);
-
-From e073991161bc9fc70a5ea3079d6b25c9d9008a4b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 8 Jun 2022 16:13:31 +0000
-Subject: [PATCH 055/186] v4l2_m2m_dec: Use DTS for best effort PTS if PTS is
- always NO_PTS
-
-If we do have DTS but don't have PTS then assume PTS=DTS.
-Also get rid of last_dts from tracking as its info wasn't actually
-useful in any way.
----
- libavcodec/v4l2_context.c | 6 ++----
- libavcodec/v4l2_m2m.h     | 1 -
- libavcodec/v4l2_m2m_dec.c | 8 +++++++-
- 3 files changed, 9 insertions(+), 6 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 7a707d21fc7a..6b97eab41ed7 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -73,7 +73,6 @@ xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPack
-     track_pts = track_to_pts(avctx, x->track_no);
- 
-     av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
--    x->last_pkt_dts = avpkt->dts;
-     x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
-         .discard          = 0,
-         .pending          = 1,
-@@ -100,7 +99,6 @@ xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFr
-     track_pts = track_to_pts(avctx, x->track_no);
- 
-     av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no);
--    x->last_pkt_dts = frame->pkt_dts;
-     x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
-         .discard          = 0,
-         .pending          = 1,
-@@ -129,7 +127,7 @@ xlat_pts_frame_out(AVCodecContext *const avctx,
-         av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
-                "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
-         frame->pts              = AV_NOPTS_VALUE;
--        frame->pkt_dts          = x->last_pkt_dts;
-+        frame->pkt_dts          = AV_NOPTS_VALUE;
-         frame->reordered_opaque = x->last_opaque;
-         frame->pkt_pos          = -1;
-         frame->pkt_duration     = 0;
-@@ -138,7 +136,7 @@ xlat_pts_frame_out(AVCodecContext *const avctx,
-     else if (!t->discard)
-     {
-         frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
--        frame->pkt_dts          = x->last_pkt_dts;
-+        frame->pkt_dts          = t->dts;
-         frame->reordered_opaque = t->reordered_opaque;
-         frame->pkt_pos          = t->pkt_pos;
-         frame->pkt_duration     = t->pkt_duration;
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index d6cdaf65e183..ee72beb0522b 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -67,7 +67,6 @@ typedef struct pts_stats_s
- typedef struct xlat_track_s {
-     unsigned int track_no;
-     int64_t last_pts;
--    int64_t last_pkt_dts;
-     int64_t last_opaque;
-     V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
- } xlat_track_t;
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index fbbfc81342d5..485a96f4b487 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -177,7 +177,13 @@ set_best_effort_pts(AVCodecContext *const avctx,
-     pts_stats_add(ps, frame->pts);
- 
-     frame->best_effort_timestamp = pts_stats_guess(ps);
--    frame->pkt_dts               = frame->pts;  // We can't emulate what s/w does in a useful manner?
-+    // If we can't guess from just PTS - try DTS
-+    if (frame->best_effort_timestamp == AV_NOPTS_VALUE)
-+        frame->best_effort_timestamp = frame->pkt_dts;
-+
-+    // We can't emulate what s/w does in a useful manner and using the
-+    // "correct" answer seems to just confuse things.
-+    frame->pkt_dts               = frame->pts;
-     av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n",
-            frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
- }
-
-From 256637631cb246663c310d0bc8c3769b4af70692 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 30 Jun 2022 15:59:23 +0000
-Subject: [PATCH 056/186] v4l2: Update H265 request for current API
-
-This works with v9 of the H265 patch set which hopefully will be the
-last one. Hevc controls extracted from patched v4l2-controls into
-hevc-ctrls-v4 - if HEVC controls found in the system v4l2-controls then
-those will be used instead.
----
- libavcodec/Makefile            |   2 +-
- libavcodec/hevc-ctrls-v4.h     | 515 +++++++++++++++++++++++++++++++++
- libavcodec/v4l2_req_hevc_v4.c  |   3 +
- libavcodec/v4l2_req_hevc_vx.c  |  81 ++++--
- libavcodec/v4l2_request_hevc.c |   6 +-
- libavcodec/v4l2_request_hevc.h |   1 +
- 6 files changed, 583 insertions(+), 25 deletions(-)
- create mode 100644 libavcodec/hevc-ctrls-v4.h
- create mode 100644 libavcodec/v4l2_req_hevc_v4.c
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index 2b3c16185d75..d433a712366f 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -1000,7 +1000,7 @@ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
- OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
- OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec.o
- OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o\
--                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o
-+                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o  v4l2_req_hevc_v4.o
- OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
- OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o h265_profile_level.o
- OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
 diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h
 new file mode 100644
-index 000000000000..7e05f6e7c39b
+index 000000000000..c02fdbe5a8e9
 --- /dev/null
 +++ b/libavcodec/hevc-ctrls-v4.h
-@@ -0,0 +1,515 @@
+@@ -0,0 +1,524 @@
 +/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */
 +/*
 + *  Video for Linux Two controls header file
@@ -23513,6 +1141,15 @@ index 000000000000..7e05f6e7c39b
 +#include <linux/const.h>
 +#include <linux/types.h>
 +
++#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS
++#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000	/* Stateless codecs controls */
++#endif
++#ifndef V4L2_CID_CODEC_STATELESS_BASE
++#define V4L2_CID_CODEC_STATELESS_BASE		(V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900)
++#endif
++
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
 +#define V4L2_CID_STATELESS_HEVC_SPS		(V4L2_CID_CODEC_STATELESS_BASE + 400)
 +#define V4L2_CID_STATELESS_HEVC_PPS		(V4L2_CID_CODEC_STATELESS_BASE + 401)
 +#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 402)
@@ -23973,1282 +1610,6382 @@ index 000000000000..7e05f6e7c39b
 +};
 +
 +#endif
-diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c
-new file mode 100644
-index 000000000000..c35579d8e0ab
---- /dev/null
-+++ b/libavcodec/v4l2_req_hevc_v4.c
-@@ -0,0 +1,3 @@
-+#define HEVC_CTRLS_VERSION 4
-+#include "v4l2_req_hevc_vx.c"
+diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
+index 0dc24f82f8a4..e27f480a8e5c 100644
+--- a/libavcodec/hevc/hevcdec.c
++++ b/libavcodec/hevc/hevcdec.c
+@@ -366,6 +366,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps)
+     else
+         avctx->color_range = AVCOL_RANGE_MPEG;
+ 
++    if (sps->chroma_format_idc == 1) {
++        avctx->chroma_sample_location = sps->vui.common.chroma_loc_info_present_flag ?
++            sps->vui.common.chroma_sample_loc_type_top_field + 1 :
++            AVCHROMA_LOC_LEFT;
++    }
++    else if (sps->chroma_format_idc == 2 ||
++             sps->chroma_format_idc == 3) {
++        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
++    }
++    else {
++        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
++    }
 +
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-index 611fa21cc319..761c5b2dc74b 100644
---- a/libavcodec/v4l2_req_hevc_vx.c
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -6,8 +6,6 @@
- #include "internal.h"
- #include "thread.h"
- 
--#include "v4l2_request_hevc.h"
--
- #if HEVC_CTRLS_VERSION == 1
- #include "hevc-ctrls-v1.h"
- 
-@@ -18,10 +16,37 @@
- #include "hevc-ctrls-v2.h"
- #elif HEVC_CTRLS_VERSION == 3
- #include "hevc-ctrls-v3.h"
-+#elif HEVC_CTRLS_VERSION == 4
-+#include <linux/v4l2-controls.h>
-+#if !defined(V4L2_CID_STATELESS_HEVC_SPS)
-+#include "hevc-ctrls-v4.h"
-+#endif
- #else
- #error Unknown HEVC_CTRLS_VERSION
+     if (sps->vui.common.colour_description_present_flag) {
+         avctx->color_primaries = sps->vui.common.colour_primaries;
+         avctx->color_trc       = sps->vui.common.transfer_characteristics;
+@@ -527,6 +540,7 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
+                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
+                      CONFIG_HEVC_D3D12VA_HWACCEL + \
+                      CONFIG_HEVC_NVDEC_HWACCEL + \
++                     CONFIG_HEVC_V4L2REQUEST_HWACCEL + \
+                      CONFIG_HEVC_VAAPI_HWACCEL + \
+                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
+                      CONFIG_HEVC_VDPAU_HWACCEL + \
+@@ -561,6 +575,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
  #endif
- 
-+#ifndef V4L2_CID_STATELESS_HEVC_SPS
-+#define V4L2_CID_STATELESS_HEVC_SPS                     V4L2_CID_MPEG_VIDEO_HEVC_SPS
-+#define V4L2_CID_STATELESS_HEVC_PPS                     V4L2_CID_MPEG_VIDEO_HEVC_PPS
-+#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS            V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS
-+#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX          V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX
-+#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS           V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS
-+#define V4L2_CID_STATELESS_HEVC_DECODE_MODE             V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE
-+#define V4L2_CID_STATELESS_HEVC_START_CODE              V4L2_CID_MPEG_VIDEO_HEVC_START_CODE
-+
-+#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED
-+#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED
-+#define V4L2_STATELESS_HEVC_START_CODE_NONE             V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE
-+#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B          V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B
+ #if CONFIG_HEVC_VULKAN_HWACCEL
+         *fmt++ = AV_PIX_FMT_VULKAN;
 +#endif
-+
-+// Should be in videodev2 but we might not have a good enough one
-+#ifndef V4L2_PIX_FMT_HEVC_SLICE
-+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
-+#endif
-+
-+#include "v4l2_request_hevc.h"
-+
- #include "libavutil/hwcontext_drm.h"
- 
- #include <semaphore.h>
-@@ -259,9 +284,13 @@ fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++        *fmt++ = AV_PIX_FMT_DRM_PRIME;
  #endif
-             entry->field_pic = frame->frame->interlaced_frame;
- 
-+#if HEVC_CTRLS_VERSION <= 3
-             /* TODO: Interleaved: Get the POC for each field. */
-             entry->pic_order_cnt[0] = frame->poc;
-             entry->pic_order_cnt[1] = frame->poc;
-+#else
-+            entry->pic_order_cnt_val = frame->poc;
+         break;
+     case AV_PIX_FMT_YUV420P10:
+@@ -588,6 +605,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps)
+ #endif
+ #if CONFIG_HEVC_NVDEC_HWACCEL
+         *fmt++ = AV_PIX_FMT_CUDA;
 +#endif
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++        *fmt++ = AV_PIX_FMT_DRM_PRIME;
+ #endif
+         break;
+     case AV_PIX_FMT_YUV444P:
+@@ -682,13 +702,16 @@ static int set_sps(HEVCContext *s, HEVCLayerContext *l, const HEVCSPS *sps)
+     if (!sps)
+         return 0;
+ 
+-    ret = pic_arrays_init(l, sps);
+-    if (ret < 0)
+-        goto fail;
++    // If hwaccel then we don't need all the s/w decode helper arrays
++    if (!s->avctx->hwaccel) {
++        ret = pic_arrays_init(l, sps);
++        if (ret < 0)
++            goto fail;
+ 
+-    ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
+-    ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
+-    ff_videodsp_init (&s->vdsp,    sps->bit_depth);
++        ff_hevc_pred_init(&s->hpc,     sps->bit_depth);
++        ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth);
++        ff_videodsp_init (&s->vdsp,    sps->bit_depth);
++    }
+ 
+     l->sps    = ff_refstruct_ref_c(sps);
+     s->vps    = ff_refstruct_ref_c(sps->vps);
+@@ -3179,11 +3202,13 @@ static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l,
          }
      }
-     return n;
-@@ -287,8 +316,11 @@ static void fill_slice_params(const HEVCContext * const h,
  
-     *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
-         .bit_size = bit_size,
-+#if HEVC_CTRLS_VERSION <= 3
-         .data_bit_offset = bit_offset,
--
-+#else
-+        .data_byte_offset = bit_offset / 8 + 1,
-+#endif
-         /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
-         .slice_segment_addr = sh->slice_segment_addr,
- 
-@@ -376,8 +408,10 @@ static void fill_slice_params(const HEVCContext * const h,
-         av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
-     }
- 
-+#if HEVC_CTRLS_VERSION <= 3
-     for (i = 0; i < slice_params->num_entry_point_offsets; i++)
-         slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
-+#endif
- }
- 
- #if HEVC_CTRLS_VERSION >= 2
-@@ -761,30 +795,30 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
- 
-     struct v4l2_ext_control control[] = {
-         {
--            .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
-+            .id = V4L2_CID_STATELESS_HEVC_SPS,
-             .ptr = &controls->sps,
-             .size = sizeof(controls->sps),
-         },
-         {
--            .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
-+            .id = V4L2_CID_STATELESS_HEVC_PPS,
-             .ptr = &controls->pps,
-             .size = sizeof(controls->pps),
-         },
- #if HEVC_CTRLS_VERSION >= 2
-         {
--            .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS,
-+            .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
-             .ptr = dec,
-             .size = sizeof(*dec),
-         },
- #endif
-         {
--            .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
-+            .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
-             .ptr = slices + slice_no,
-             .size = sizeof(*slices) * slice_count,
-         },
-         // Optional
-         {
--            .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX,
-+            .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
-             .ptr = &controls->scaling_matrix,
-             .size = sizeof(controls->scaling_matrix),
-         },
-@@ -1000,12 +1034,12 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
- 
-     // Check for var slice array
-     struct v4l2_query_ext_ctrl qc[] = {
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS },
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS },
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS },
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX },
-+        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS },
-+        { .id = V4L2_CID_STATELESS_HEVC_SPS },
-+        { .id = V4L2_CID_STATELESS_HEVC_PPS },
-+        { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX },
- #if HEVC_CTRLS_VERSION >= 2
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS },
-+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS },
- #endif
-     };
-     // Order & size must match!
-@@ -1042,12 +1076,13 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
- 
-     fill_sps(&ctrl_sps, sps);
- 
--    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_MPEG_VIDEO_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
-+    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
-         av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n");
-         return AVERROR(EINVAL);
-     }
- 
-     ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0;
-+    av_log(avctx, AV_LOG_INFO, "%s SPS muti-slice\n", ctx->multi_slice ? "Has" : "No");
-     return 0;
- }
- 
-@@ -1058,29 +1093,29 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-     int ret;
- 
-     struct v4l2_query_ext_ctrl querys[] = {
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, },
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, },
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, },
-+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
-+        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
-+        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, },
-     };
- 
-     struct v4l2_ext_control ctrls[] = {
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, },
--        { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, },
-+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
-+        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
-     };
- 
-     mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
- 
-     ctx->decode_mode = querys[0].default_value;
- 
--    if (ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED &&
--        ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED) {
-+    if (ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
-+        ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
-         av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode);
-         return AVERROR(EINVAL);
-     }
- 
-     ctx->start_code = querys[1].default_value;
--    if (ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE &&
--        ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) {
-+    if (ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_NONE &&
-+        ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
-         av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code);
-         return AVERROR(EINVAL);
-     }
-diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-index 20e4e0ab1559..cd79aad5631a 100644
---- a/libavcodec/v4l2_request_hevc.c
-+++ b/libavcodec/v4l2_request_hevc.c
-@@ -210,7 +210,11 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-         goto fail4;
-     }
- 
--    if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) {
-+    if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) {
-+        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n");
-+        ctx->fns = &V2(ff_v4l2_req_hevc, 4);
+-    memset(l->horizontal_bs, 0, l->bs_width * l->bs_height);
+-    memset(l->vertical_bs,   0, l->bs_width * l->bs_height);
+-    memset(l->cbf_luma,      0, sps->min_tb_width * sps->min_tb_height);
+-    memset(l->is_pcm,        0, (sps->min_pu_width + 1) * (sps->min_pu_height + 1));
+-    memset(l->tab_slice_address, -1, pic_size_in_ctb * sizeof(*l->tab_slice_address));
++    if (l->horizontal_bs) {
++        memset(l->horizontal_bs, 0, l->bs_width * l->bs_height);
++        memset(l->vertical_bs,   0, l->bs_width * l->bs_height);
++        memset(l->cbf_luma,      0, sps->min_tb_width * sps->min_tb_height);
++        memset(l->is_pcm,        0, (sps->min_pu_width + 1) * (sps->min_pu_height + 1));
++        memset(l->tab_slice_address, -1, pic_size_in_ctb * sizeof(*l->tab_slice_address));
 +    }
-+    else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) {
-         av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n");
-         ctx->fns = &V2(ff_v4l2_req_hevc, 3);
+ 
+     if (IS_IDR(s))
+         ff_hevc_clear_refs(l);
+@@ -3750,8 +3775,13 @@ static int hevc_receive_frame(AVCodecContext *avctx, AVFrame *frame)
      }
-diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h
-index ed48d62e2d53..d4adb3f812a6 100644
---- a/libavcodec/v4l2_request_hevc.h
-+++ b/libavcodec/v4l2_request_hevc.h
-@@ -99,5 +99,6 @@ typedef struct v4l2_req_decode_fns {
- extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
- extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
- extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3);
-+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4);
  
- #endif
-
-From d6457f1d161c7430750b1cc925e370314ba9b83c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Sun, 3 Jul 2022 13:40:41 +0000
-Subject: [PATCH 057/186] v4l2_req: Observe limit on size of slice_array
-
-This in fact provides some minor simplifications by combing the
-multi-slice and single-slice paths.
-
-(cherry picked from commit 7631e6d1a66fca9048605c214f3464c90d37932c)
----
- libavcodec/v4l2_req_hevc_vx.c  | 39 ++++++++++++++--------------------
- libavcodec/v4l2_request_hevc.h |  5 +----
- 2 files changed, 17 insertions(+), 27 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-index 761c5b2dc74b..9d08d13d9e68 100644
---- a/libavcodec/v4l2_req_hevc_vx.c
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -840,18 +840,21 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *
-     int bcount = get_bits_count(&h->HEVClc->gb);
-     uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount;
- 
-+    const unsigned int n = rd->num_slices;
-+    const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices;
+     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
+-    if (ret < 0)
++    if (ret < 0) {
++        // Ensure that hwaccel knows this frame is over
++        if (FF_HW_HAS_CB(avctx, abort_frame))
++            FF_HW_SIMPLE_CALL(avctx, abort_frame);
 +
-     int rv;
-     struct slice_info * si;
+         return ret;
++    }
  
-     if ((rv = slice_add(rd)) != 0)
-         return rv;
- 
--    si = rd->slices + rd->num_slices - 1;
-+    si = rd->slices + n;
-     si->ptr = buffer;
-     si->len = size;
- 
--    if (ctx->multi_slice && rd->num_slices > 1) {
--        struct slice_info *const si0 = rd->slices;
-+    if (n != block_start) {
-+        struct slice_info *const si0 = rd->slices + block_start;
-         const size_t offset = (buffer - si0->ptr);
-         boff += offset * 8;
-         size += offset;
-@@ -859,11 +862,11 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *
+ do_output:
+     if (ff_container_fifo_read(s->output_fifo, frame) >= 0) {
+@@ -3780,8 +3810,10 @@ static int hevc_ref_frame(HEVCFrame *dst, const HEVCFrame *src)
      }
  
- #if HEVC_CTRLS_VERSION >= 2
--    if (rd->num_slices == 1)
-+    if (n == 0)
-         fill_decode_params(h, &rd->dec);
--    fill_slice_params(h, &rd->dec, rd->slice_params + rd->num_slices - 1, size * 8, boff);
-+    fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff);
- #else
--    fill_slice_params(h, rd->slice_params + rd->num_slices - 1, size * 8, boff);
-+    fill_slice_params(h, rd->slice_params + n, size * 8, boff);
+     dst->pps     = ff_refstruct_ref_c(src->pps);
+-    dst->tab_mvf = ff_refstruct_ref(src->tab_mvf);
+-    dst->rpl_tab = ff_refstruct_ref(src->rpl_tab);
++    if (src->tab_mvf)
++        dst->tab_mvf = ff_refstruct_ref(src->tab_mvf);
++    if (src->rpl_tab)
++        dst->rpl_tab = ff_refstruct_ref(src->rpl_tab);
+     dst->rpl = ff_refstruct_ref(src->rpl);
+     dst->nb_rpl_elems = src->nb_rpl_elems;
+ 
+@@ -4096,6 +4128,9 @@ const FFCodec ff_hevc_decoder = {
+ #if CONFIG_HEVC_NVDEC_HWACCEL
+                                HWACCEL_NVDEC(hevc),
  #endif
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++                               HWACCEL_V4L2REQUEST(hevc),
++#endif
+ #if CONFIG_HEVC_VAAPI_HWACCEL
+                                HWACCEL_VAAPI(hevc),
+ #endif
+diff --git a/libavcodec/hevc/parser.c b/libavcodec/hevc/parser.c
+index 16b40e2b10db..cf9830648a78 100644
+--- a/libavcodec/hevc/parser.c
++++ b/libavcodec/hevc/parser.c
+@@ -93,6 +93,19 @@ static int hevc_parse_slice_header(AVCodecParserContext *s, H2645NAL *nal,
+     avctx->profile  = sps->ptl.general_ptl.profile_idc;
+     avctx->level    = sps->ptl.general_ptl.level_idc;
  
-     return 0;
-@@ -997,18 +1000,11 @@ static int v4l2_request_hevc_end_frame(AVCodecContext *avctx)
-     }
- 
-     // Send as slices
--    if (ctx->multi_slice)
--    {
--        if ((rv = send_slice(avctx, rd, &rc, 0, rd->num_slices)) != 0)
-+    for (i = 0; i < rd->num_slices; i += ctx->max_slices) {
-+        const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices);
-+        if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0)
++    if (sps->chroma_format_idc == 1) {
++        avctx->chroma_sample_location = sps->vui.common.chroma_loc_info_present_flag ?
++            sps->vui.common.chroma_sample_loc_type_top_field + 1 :
++            AVCHROMA_LOC_LEFT;
++    }
++    else if (sps->chroma_format_idc == 2 ||
++             sps->chroma_format_idc == 3) {
++        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
++    }
++    else {
++        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
++    }
++
+     if (sps->vps->vps_timing_info_present_flag) {
+         num = sps->vps->vps_num_units_in_tick;
+         den = sps->vps->vps_time_scale;
+diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
+index 6ba667e9f520..fc7197ff2bcb 100644
+--- a/libavcodec/hevc/refs.c
++++ b/libavcodec/hevc/refs.c
+@@ -140,16 +140,19 @@ static HEVCFrame *alloc_frame(HEVCContext *s, HEVCLayerContext *l)
              goto fail;
-     }
--    else
--    {
--        for (i = 0; i != rd->num_slices; ++i) {
--            if ((rv = send_slice(avctx, rd, &rc, i, i + 1)) != 0)
--                goto fail;
--        }
--    }
+         frame->nb_rpl_elems = s->pkt.nb_nals;
  
-     // Set the drm_prime desriptor
-     drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs));
-@@ -1081,8 +1077,6 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-         return AVERROR(EINVAL);
-     }
+-        frame->tab_mvf = ff_refstruct_pool_get(l->tab_mvf_pool);
+-        if (!frame->tab_mvf)
+-            goto fail;
+-
+-        frame->rpl_tab = ff_refstruct_pool_get(l->rpl_tab_pool);
+-        if (!frame->rpl_tab)
+-            goto fail;
+-        frame->ctb_count = l->sps->ctb_width * l->sps->ctb_height;
+-        for (j = 0; j < frame->ctb_count; j++)
+-            frame->rpl_tab[j] = frame->rpl;
++        if (l->tab_mvf_pool) {
++            frame->tab_mvf = ff_refstruct_pool_get(l->tab_mvf_pool);
++            if (!frame->tab_mvf)
++                goto fail;
++        }
++        if (l->rpl_tab_pool) {
++            frame->rpl_tab = ff_refstruct_pool_get(l->rpl_tab_pool);
++            if (!frame->rpl_tab)
++                goto fail;
++            frame->ctb_count = l->sps->ctb_width * l->sps->ctb_height;
++            for (j = 0; j < frame->ctb_count; j++)
++                frame->rpl_tab[j] = frame->rpl;
++        }
+ 
+         if (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD)
+             frame->f->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST;
+@@ -287,14 +290,17 @@ static int init_slice_rpl(HEVCContext *s)
+     int ctb_count    = frame->ctb_count;
+     int ctb_addr_ts  = s->pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
+     int i;
++    RefPicListTab * const rpl = frame->rpl + s->slice_idx;
+ 
+     if (s->slice_idx >= frame->nb_rpl_elems)
+         return AVERROR_INVALIDDATA;
+ 
+-    for (i = ctb_addr_ts; i < ctb_count; i++)
+-        frame->rpl_tab[i] = frame->rpl + s->slice_idx;
++    if (frame->rpl_tab) {
++        for (i = ctb_addr_ts; i < ctb_count; i++)
++            frame->rpl_tab[i] = rpl;
++    }
+ 
+-    frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
++    frame->refPicList = (RefPicList *)rpl;
  
--    ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0;
--    av_log(avctx, AV_LOG_INFO, "%s SPS muti-slice\n", ctx->multi_slice ? "Has" : "No");
      return 0;
  }
- 
-@@ -1120,11 +1114,10 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-         return AVERROR(EINVAL);
-     }
- 
--    ctx->max_slices = querys[2].elems;
--    if (ctx->max_slices > MAX_SLICES) {
--        av_log(avctx, AV_LOG_ERROR, "%s: unsupported max slices, %d\n", __func__, ctx->max_slices);
--        return AVERROR(EINVAL);
--    }
-+    ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) ||
-+                       querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ?
-+        1 : querys[2].dims[0];
-+    av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices);
- 
-     ctrls[0].value = ctx->decode_mode;
-     ctrls[1].value = ctx->start_code;
-diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h
-index d4adb3f812a6..0029e2330977 100644
---- a/libavcodec/v4l2_request_hevc.h
-+++ b/libavcodec/v4l2_request_hevc.h
-@@ -46,8 +46,6 @@
- #define V4L2_CTRL_FLAG_DYNAMIC_ARRAY	0x0800
- #endif
- 
--#define MAX_SLICES 128
--
- #define VCAT(name, version) name##_v##version
- #define V2(n,v) VCAT(n, v)
- #define V(n) V2(n, HEVC_CTRLS_VERSION)
-@@ -64,10 +62,9 @@ typedef struct V4L2RequestContextHEVC {
- 
-     unsigned int timestamp;  // ?? maybe uint64_t
- 
--    int multi_slice;
-     int decode_mode;
-     int start_code;
--    int max_slices;
-+    unsigned int max_slices;
- 
-     req_decode_q decode_q;
- 
-
-From d4794c95cd25fb87a3dcc7585eb7b057211a87dd Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 4 Jul 2022 14:43:20 +0100
-Subject: [PATCH 058/186] v4l2_req: Add entry point offsets array control
-
----
- libavcodec/v4l2_req_hevc_vx.c  | 88 +++++++++++++++++++++++++++-------
- libavcodec/v4l2_request_hevc.h |  3 +-
- 2 files changed, 72 insertions(+), 19 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-index 9d08d13d9e68..43ef6631edc1 100644
---- a/libavcodec/v4l2_req_hevc_vx.c
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -82,11 +82,16 @@ typedef struct V4L2MediaReqDescriptor {
-     struct v4l2_ctrl_hevc_slice_params * slice_params;
-     struct slice_info * slices;
- 
-+    size_t num_offsets;
-+    size_t alloced_offsets;
-+    uint32_t *offsets;
+diff --git a/libavcodec/hwaccel_internal.h b/libavcodec/hwaccel_internal.h
+index b0cc22bb6899..d319f2abe1f5 100644
+--- a/libavcodec/hwaccel_internal.h
++++ b/libavcodec/hwaccel_internal.h
+@@ -161,6 +161,17 @@ typedef struct FFHWAccel {
+      * Callback to flush the hwaccel state.
+      */
+     void (*flush)(AVCodecContext *avctx);
 +
- } V4L2MediaReqDescriptor;
++    /**
++     * Called if parsing fails
++     *
++     * An error has occured, end_frame will not be called
++     * start_frame & decode_slice may or may not have been called
++     * Optional
++     *
++     * @param avctx the codec context
++     */
++    void (*abort_frame)(AVCodecContext *avctx);
+ } FFHWAccel;
  
- struct slice_info {
-     const uint8_t * ptr;
-     size_t len; // bytes
-+    size_t n_offsets;
+ static inline const FFHWAccel *ffhwaccel(const AVHWAccel *codec)
+diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
+index 5171e4c7d735..0b586f760d11 100644
+--- a/libavcodec/hwaccels.h
++++ b/libavcodec/hwaccels.h
+@@ -43,6 +43,7 @@ extern const struct FFHWAccel ff_hevc_d3d11va2_hwaccel;
+ extern const struct FFHWAccel ff_hevc_d3d12va_hwaccel;
+ extern const struct FFHWAccel ff_hevc_dxva2_hwaccel;
+ extern const struct FFHWAccel ff_hevc_nvdec_hwaccel;
++extern const struct FFHWAccel ff_hevc_v4l2request_hwaccel;
+ extern const struct FFHWAccel ff_hevc_vaapi_hwaccel;
+ extern const struct FFHWAccel ff_hevc_vdpau_hwaccel;
+ extern const struct FFHWAccel ff_hevc_videotoolbox_hwaccel;
+diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h
+index ee29ca631df0..a7cb81aa044f 100644
+--- a/libavcodec/hwconfig.h
++++ b/libavcodec/hwconfig.h
+@@ -67,6 +67,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx);
+     HW_CONFIG_HWACCEL(1, 1, 0, D3D11,        D3D11VA,      ff_ ## codec ## _d3d11va2_hwaccel)
+ #define HWACCEL_NVDEC(codec) \
+     HW_CONFIG_HWACCEL(1, 1, 0, CUDA,         CUDA,         ff_ ## codec ## _nvdec_hwaccel)
++#define HWACCEL_V4L2REQUEST(codec) \
++    HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME,    DRM,          ff_ ## codec ## _v4l2request_hwaccel)
+ #define HWACCEL_VAAPI(codec) \
+     HW_CONFIG_HWACCEL(1, 1, 1, VAAPI,        VAAPI,        ff_ ## codec ## _vaapi_hwaccel)
+ #define HWACCEL_VDPAU(codec) \
+diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
+index e42591110fec..70cc0ecabcb9 100644
+--- a/libavcodec/mmaldec.c
++++ b/libavcodec/mmaldec.c
+@@ -24,6 +24,9 @@
+  * MMAL Video Decoder
+  */
+ 
++#pragma GCC diagnostic push
++// Many many redundant decls in the header files
++#pragma GCC diagnostic ignored "-Wredundant-decls"
+ #include <bcm_host.h>
+ #include <interface/mmal/mmal.h>
+ #include <interface/mmal/mmal_parameters_video.h>
+@@ -31,6 +34,7 @@
+ #include <interface/mmal/util/mmal_util_params.h>
+ #include <interface/mmal/util/mmal_default_components.h>
+ #include <interface/mmal/vc/mmal_vc_api.h>
++#pragma GCC diagnostic pop
+ #include <stdatomic.h>
+ 
+ #include "avcodec.h"
+diff --git a/libavcodec/raw.c b/libavcodec/raw.c
+index b73b80e5fdb1..7ca36ac84975 100644
+--- a/libavcodec/raw.c
++++ b/libavcodec/raw.c
+@@ -297,6 +297,12 @@ static const PixelFormatTag raw_pix_fmt_tags[] = {
+     { AV_PIX_FMT_RGB565LE,MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
+     { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
+ 
++    /* RPI (Might as well define for everything) */
++    { AV_PIX_FMT_SAND128,     MKTAG('S', 'A', 'N', 'D') },
++    { AV_PIX_FMT_RPI4_8,      MKTAG('S', 'A', 'N', 'D') },
++    { AV_PIX_FMT_SAND64_10,   MKTAG('S', 'N', 'D', 'A') },
++    { AV_PIX_FMT_RPI4_10,     MKTAG('S', 'N', 'D', 'B') },
++
+     { AV_PIX_FMT_NONE, 0 },
  };
  
- // Handy container for accumulating controls before setting
-@@ -245,7 +250,7 @@ static int slice_add(V4L2MediaReqDescriptor * const rd)
-     if (rd->num_slices >= rd->alloced_slices) {
-         struct v4l2_ctrl_hevc_slice_params * p2;
-         struct slice_info * s2;
--        size_t n2 = rd->num_slices == 0 ? 8 : rd->num_slices * 2;
-+        size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2;
+diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c
+index 8c577006d922..8ca0379e1219 100644
+--- a/libavcodec/rawenc.c
++++ b/libavcodec/rawenc.c
+@@ -24,6 +24,7 @@
+  * Raw Video Encoder
+  */
  
-         p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2));
-         if (p2 == NULL)
-@@ -263,6 +268,23 @@ static int slice_add(V4L2MediaReqDescriptor * const rd)
++#include "config.h"
+ #include "avcodec.h"
+ #include "codec_internal.h"
+ #include "encode.h"
+@@ -33,6 +34,10 @@
+ #include "libavutil/intreadwrite.h"
+ #include "libavutil/imgutils.h"
+ #include "libavutil/internal.h"
++#include "libavutil/avassert.h"
++#if CONFIG_SAND
++#include "libavutil/rpi_sand_fns.h"
++#endif
+ 
+ static av_cold int raw_encode_init(AVCodecContext *avctx)
+ {
+@@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCodecContext *avctx)
      return 0;
  }
  
-+static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets)
++#if CONFIG_SAND
++static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++                      const AVFrame *frame)
 +{
-+    if (rd->num_offsets + n > rd->alloced_offsets) {
-+        size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2;
-+        void * p2;
-+        while (rd->num_offsets + n > n2)
-+            n2 *= 2;
-+        if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL)
-+            return AVERROR(ENOMEM);
-+        rd->offsets = p2;
-+        rd->alloced_offsets = n2;
-+    }
-+    for (size_t i = 0; i != n; ++i)
-+        rd->offsets[rd->num_offsets++] = offsets[i] - 1;
++    const int width = av_frame_cropped_width(frame);
++    const int height = av_frame_cropped_height(frame);
++    const int x0 = frame->crop_left;
++    const int y0 = frame->crop_top;
++    const int size = width * height * 3 / 2;
++    uint8_t * dst;
++    int ret;
++
++    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
++        return ret;
++
++    dst = pkt->data;
++
++    av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
++    dst += width * height;
++    av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2,
++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2);
 +    return 0;
 +}
 +
- static unsigned int
- fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries)
- {
-@@ -403,12 +425,12 @@ static void fill_slice_params(const HEVCContext * const h,
-     fill_pred_table(h, &slice_params->pred_weight_table);
- 
-     slice_params->num_entry_point_offsets = sh->num_entry_point_offsets;
-+#if HEVC_CTRLS_VERSION <= 3
-     if (slice_params->num_entry_point_offsets > 256) {
-         slice_params->num_entry_point_offsets = 256;
-         av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
-     }
- 
--#if HEVC_CTRLS_VERSION <= 3
-     for (i = 0; i < slice_params->num_entry_point_offsets; i++)
-         slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
- #endif
-@@ -787,13 +809,17 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
- #if HEVC_CTRLS_VERSION >= 2
-     struct v4l2_ctrl_hevc_decode_params * const dec,
- #endif
--    struct v4l2_ctrl_hevc_slice_params * const slices,
--    const unsigned int slice_no,
--    const unsigned int slice_count)
-+    struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count,
-+    void * const offsets, const size_t offset_count)
- {
-     int rv;
-+#if HEVC_CTRLS_VERSION >= 2
-+    unsigned int n = 4;
-+#else
-+    unsigned int n = 3;
-+#endif
- 
--    struct v4l2_ext_control control[] = {
-+    struct v4l2_ext_control control[6] = {
-         {
-             .id = V4L2_CID_STATELESS_HEVC_SPS,
-             .ptr = &controls->sps,
-@@ -813,21 +839,28 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
- #endif
-         {
-             .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
--            .ptr = slices + slice_no,
-+            .ptr = slices,
-             .size = sizeof(*slices) * slice_count,
-         },
--        // Optional
--        {
-+    };
-+
-+    if (controls->has_scaling)
-+        control[n++] = (struct v4l2_ext_control) {
-             .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
-             .ptr = &controls->scaling_matrix,
-             .size = sizeof(controls->scaling_matrix),
--        },
--    };
-+        };
-+
-+#if HEVC_CTRLS_VERSION >= 4
-+    if (offsets)
-+        control[n++] = (struct v4l2_ext_control) {
-+            .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS,
-+            .ptr = offsets,
-+            .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count,
-+        };
-+#endif
- 
--    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control,
--            controls->has_scaling ?
--                FF_ARRAY_ELEMS(control) :
--                FF_ARRAY_ELEMS(control) - 1);
-+    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n);
- 
-     return rv;
- }
-@@ -852,6 +885,7 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *
-     si = rd->slices + n;
-     si->ptr = buffer;
-     si->len = size;
-+    si->n_offsets = rd->num_offsets;
- 
-     if (n != block_start) {
-         struct slice_info *const si0 = rd->slices + block_start;
-@@ -868,6 +902,9 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *
- #else
-     fill_slice_params(h, rd->slice_params + n, size * 8, boff);
- #endif
-+    if (ctx->max_offsets != 0 &&
-+        (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0)
-+        return rv;
- 
-     return 0;
- }
-@@ -893,10 +930,13 @@ static int send_slice(AVCodecContext * const avctx,
- {
-     V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
- 
-+    const int is_last = (j == rd->num_slices);
-     struct slice_info *const si = rd->slices + i;
-     struct media_request * req = NULL;
-     struct qent_src * src = NULL;
-     MediaBufsStatus stat;
-+    void * offsets = rd->offsets + rd->slices[i].n_offsets;
-+    size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets;
- 
-     if ((req = media_request_get(ctx->mpool)) == NULL) {
-         av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__);
-@@ -908,8 +948,8 @@ static int send_slice(AVCodecContext * const avctx,
- #if HEVC_CTRLS_VERSION >= 2
-                      &rd->dec,
- #endif
--                     rd->slice_params,
--                     i, j - i)) {
-+                     rd->slice_params + i, j - i,
-+                     offsets, n_offsets)) {
-         av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__);
-         goto fail1;
-     }
-@@ -935,7 +975,7 @@ static int send_slice(AVCodecContext * const avctx,
- 
-     stat = mediabufs_start_request(ctx->mbufs, &req, &src,
-                                    i == 0 ? rd->qe_dst : NULL,
--                                   j == rd->num_slices);
-+                                   is_last);
- 
-     if (stat != MEDIABUFS_STATUS_SUCCESS) {
-         av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__);
-@@ -1090,6 +1130,9 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-         { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
-         { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
-         { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, },
-+#if HEVC_CTRLS_VERSION >= 4
-+        { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, },
-+#endif
-     };
- 
-     struct v4l2_ext_control ctrls[] = {
-@@ -1119,6 +1162,14 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-         1 : querys[2].dims[0];
-     av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices);
- 
-+#if HEVC_CTRLS_VERSION >= 4
-+    ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ?
-+        0 : querys[3].dims[0];
-+    av_log(avctx, AV_LOG_INFO, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets);
-+#else
-+    ctx->max_offsets = 0;
-+#endif
-+
-     ctrls[0].value = ctx->decode_mode;
-     ctrls[1].value = ctx->start_code;
- 
-@@ -1141,6 +1192,7 @@ static void v4l2_req_frame_free(void *opaque, uint8_t *data)
- 
-     av_freep(&rd->slices);
-     av_freep(&rd->slice_params);
-+    av_freep(&rd->offsets);
- 
-     av_free(rd);
- }
-diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h
-index 0029e2330977..99c90064eab8 100644
---- a/libavcodec/v4l2_request_hevc.h
-+++ b/libavcodec/v4l2_request_hevc.h
-@@ -64,7 +64,8 @@ typedef struct V4L2RequestContextHEVC {
- 
-     int decode_mode;
-     int start_code;
--    unsigned int max_slices;
-+    unsigned int max_slices;    // 0 => not wanted (frame mode)
-+    unsigned int max_offsets;   // 0 => not wanted
- 
-     req_decode_q decode_q;
- 
-
-From b6e9c32d7a2cab41f9fbce46989173f32e2135d8 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 4 Jul 2022 16:22:54 +0100
-Subject: [PATCH 059/186] v4l2_req: Support Annex B
-
----
- libavcodec/v4l2_req_hevc_vx.c | 61 +++++++++++++++++++++++------------
- 1 file changed, 41 insertions(+), 20 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-index 43ef6631edc1..5e0db9850aad 100644
---- a/libavcodec/v4l2_req_hevc_vx.c
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -879,6 +879,18 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *
-     int rv;
-     struct slice_info * si;
- 
-+    // This looks dodgy but we know that FFmpeg has parsed this from a buffer
-+    // that contains the entire frame including the start code
-+    if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
-+        buffer -= 3;
-+        size += 3;
-+        boff += 24;
-+        if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) {
-+            av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n",
-+                   buffer[0], buffer[1], buffer[2]);
-+        }
-+    }
-+
-     if ((rv = slice_add(rd)) != 0)
-         return rv;
- 
-@@ -969,10 +981,6 @@ static int send_slice(AVCodecContext * const avctx,
-         goto fail2;
-     }
- 
--#warning ANNEX_B start code
--//        if (ctx->start_code == V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) {
--//        }
--
-     stat = mediabufs_start_request(ctx->mbufs, &req, &src,
-                                    i == 0 ? rd->qe_dst : NULL,
-                                    is_last);
-@@ -1120,6 +1128,12 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-     return 0;
- }
- 
-+static inline int
-+ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v)
++static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++                      const AVFrame *frame)
 +{
-+    return v >= c->minimum && v <= c->maximum;
-+}
++    const int width = av_frame_cropped_width(frame);
++    const int height = av_frame_cropped_height(frame);
++    const int x0 = frame->crop_left;
++    const int y0 = frame->crop_top;
++    const int size = width * height * 3;
++    uint8_t * dst;
++    int ret;
 +
- // Final init
- static int
- set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-@@ -1142,21 +1156,6 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
- 
-     mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
- 
--    ctx->decode_mode = querys[0].default_value;
--
--    if (ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
--        ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
--        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode);
--        return AVERROR(EINVAL);
--    }
--
--    ctx->start_code = querys[1].default_value;
--    if (ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_NONE &&
--        ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
--        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code);
--        return AVERROR(EINVAL);
--    }
--
-     ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) ||
-                        querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ?
-         1 : querys[2].dims[0];
-@@ -1165,11 +1164,33 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
- #if HEVC_CTRLS_VERSION >= 4
-     ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ?
-         0 : querys[3].dims[0];
--    av_log(avctx, AV_LOG_INFO, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets);
-+    av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets);
- #else
-     ctx->max_offsets = 0;
- #endif
- 
-+    ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
-+
-+    if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
-+    {
-+        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED;
-+
-+        // Prefer NONE as it doesn't require the slightly dodgy look
-+        // backwards in our raw buffer
-+        if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
-+            ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
-+        else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B))
-+            ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
-+        else {
-+            av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__);
-+            return AVERROR(EINVAL);
-+        }
-+    }
-+    else
-+    {
-+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__);
-+    }
-+
-     ctrls[0].value = ctx->decode_mode;
-     ctrls[1].value = ctx->start_code;
- 
-
-From ed654f6e2703c556582de06129a8052c95e1d934 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 4 Jul 2022 18:24:03 +0100
-Subject: [PATCH 060/186] v4l2_req: Add frame mode decode
-
----
- libavcodec/v4l2_req_hevc_vx.c | 69 +++++++++++++++++++++++------------
- 1 file changed, 46 insertions(+), 23 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-index 5e0db9850aad..ada53d0d44df 100644
---- a/libavcodec/v4l2_req_hevc_vx.c
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -814,9 +814,9 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
- {
-     int rv;
- #if HEVC_CTRLS_VERSION >= 2
--    unsigned int n = 4;
--#else
-     unsigned int n = 3;
-+#else
-+    unsigned int n = 2;
- #endif
- 
-     struct v4l2_ext_control control[6] = {
-@@ -837,12 +837,14 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
-             .size = sizeof(*dec),
-         },
- #endif
--        {
-+    };
-+
-+    if (slices)
-+        control[n++] = (struct v4l2_ext_control) {
-             .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
-             .ptr = slices,
-             .size = sizeof(*slices) * slice_count,
--        },
--    };
-+        };
- 
-     if (controls->has_scaling)
-         control[n++] = (struct v4l2_ext_control) {
-@@ -865,6 +867,8 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
-     return rv;
- }
- 
-+// This only works because we started out from a single coded frame buffer
-+// that will remain intact until after end_frame
- static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
- {
-     const HEVCContext * const h = avctx->priv_data;
-@@ -891,6 +895,17 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *
-         }
-     }
- 
-+    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
-+        if (rd->slices == NULL) {
-+            if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL)
-+                return AVERROR(ENOMEM);
-+            rd->slices->ptr = buffer;
-+            rd->num_slices = 1;
-+        }
-+        rd->slices->len = buffer - rd->slices->ptr + size;
-+        return 0;
-+    }
-+
-     if ((rv = slice_add(rd)) != 0)
-         return rv;
- 
-@@ -1169,28 +1184,36 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-     ctx->max_offsets = 0;
- #endif
- 
--    ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
--
--    if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
--    {
-+    if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED ||
-+        querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)
-+        ctx->decode_mode = querys[0].default_value;
-+    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED))
-+        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED;
-+    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
-         ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED;
--
--        // Prefer NONE as it doesn't require the slightly dodgy look
--        // backwards in our raw buffer
--        if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
--            ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
--        else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B))
--            ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
--        else {
--            av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__);
--            return AVERROR(EINVAL);
--        }
--    }
--    else
--    {
-+    else {
-         av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__);
-+        return AVERROR(EINVAL);
-     }
- 
-+    if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE ||
-+        querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)
-+        ctx->start_code = querys[1].default_value;
-+    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B))
-+        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
-+    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
-+        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
-+    else {
-+        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__);
-+        return AVERROR(EINVAL);
-+    }
-+
-+    // If we are in slice mode & START_CODE_NONE supported then pick that
-+    // as it doesn't require the slightly dodgy look backwards in our raw buffer
-+    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
-+        ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
-+        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
-+
-     ctrls[0].value = ctx->decode_mode;
-     ctrls[1].value = ctx->start_code;
- 
-
-From 765b4048cbe852cb857b64cab54afc2c4aed92cc Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 5 Jul 2022 12:54:22 +0000
-Subject: [PATCH 061/186] v4l2_req: Fix probe for frame based decode
-
----
- libavcodec/v4l2_req_hevc_vx.c | 33 +++++++++++++++++++++++----------
- 1 file changed, 23 insertions(+), 10 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-index ada53d0d44df..5d083016f89a 100644
---- a/libavcodec/v4l2_req_hevc_vx.c
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -1082,6 +1082,12 @@ fail:
-     return rv;
- }
- 
-+static inline int
-+ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v)
-+{
-+    return v >= c->minimum && v <= c->maximum;
-+}
-+
- // Initial check & init
- static int
- probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-@@ -1094,6 +1100,7 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-     // Check for var slice array
-     struct v4l2_query_ext_ctrl qc[] = {
-         { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS },
-+        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
-         { .id = V4L2_CID_STATELESS_HEVC_SPS },
-         { .id = V4L2_CID_STATELESS_HEVC_PPS },
-         { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX },
-@@ -1104,6 +1111,7 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-     // Order & size must match!
-     static const size_t ctrl_sizes[] = {
-         sizeof(struct v4l2_ctrl_hevc_slice_params),
-+        sizeof(int32_t),
-         sizeof(struct v4l2_ctrl_hevc_sps),
-         sizeof(struct v4l2_ctrl_hevc_pps),
-         sizeof(struct v4l2_ctrl_hevc_scaling_matrix),
-@@ -1121,11 +1129,22 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-         return AVERROR(EINVAL);
- #endif
- 
--    if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) {
--        av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION);
-+    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls);
-+    i = 0;
-+#if HEVC_CTRLS_VERSION >= 4
-+    // Skip slice check if no slice mode
-+    if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
-+        i = 1;
-+#else
-+    // Fail frame mode silently for anything prior to V4
-+    if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
-         return AVERROR(EINVAL);
--    }
--    for (i = 0; i != noof_ctrls; ++i) {
-+#endif
-+    for (; i != noof_ctrls; ++i) {
-+        if (qc[i].type == 0) {
-+            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id);
-+            return AVERROR(EINVAL);
-+        }
-         if (ctrl_sizes[i] != (size_t)qc[i].elem_size) {
-             av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n",
-                    HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size);
-@@ -1143,12 +1162,6 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-     return 0;
- }
- 
--static inline int
--ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v)
--{
--    return v >= c->minimum && v <= c->maximum;
--}
--
- // Final init
- static int
- set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
-
-From 64b8c3987f022ed871ba9ed835de13e18c7e38ee Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 26 Jul 2022 15:46:14 +0000
-Subject: [PATCH 062/186] vf_deinterlace_v4l2m2m: Support NV12 through
- deinterlace
-
-Supports NV12 (though not yet NV12M) through deinterlace.
-Also improves error handling such that attempting to deinterlace an
-unsupported drm format causes an error.
-No longer leaks frame structures.
----
- libavfilter/vf_deinterlace_v4l2m2m.c | 160 ++++++++++++++++++---------
- 1 file changed, 107 insertions(+), 53 deletions(-)
-
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-index 1a933b7e0a5f..1a3bef5bcba6 100644
---- a/libavfilter/vf_deinterlace_v4l2m2m.c
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -373,14 +373,16 @@ static int deint_v4l2m2m_try_format(V4L2Queue *queue)
- 		 fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline);
- 
-     if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
--        if (fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 ||
-+        if ((fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 &&
-+             fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_NV12) ||
-             fmt->fmt.pix_mp.field != field) {
-             av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type);
- 
-             return AVERROR(EINVAL);
-         }
-     } else {
--        if (fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 ||
-+        if ((fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 &&
-+             fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_NV12) ||
-             fmt->fmt.pix.field != field) {
-             av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type);
- 
-@@ -391,7 +393,7 @@ static int deint_v4l2m2m_try_format(V4L2Queue *queue)
-     return 0;
- }
- 
--static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, int height, int pitch, int ysize)
-+static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height, int pitch, int ysize)
- {
-     struct v4l2_format *fmt        = &queue->format;
-     DeintV4L2M2MContextShared *ctx = queue->ctx;
-@@ -402,13 +404,16 @@ static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width,
-         .target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP_BOUNDS : V4L2_SEL_TGT_COMPOSE_BOUNDS,
-     };
- 
-+    // This works for most single object 4:2:0 types
-     if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
-+        fmt->fmt.pix_mp.pixelformat = pixelformat;
-         fmt->fmt.pix_mp.field = field;
-         fmt->fmt.pix_mp.width = width;
-         fmt->fmt.pix_mp.height = ysize / pitch;
-         fmt->fmt.pix_mp.plane_fmt[0].bytesperline = pitch;
-         fmt->fmt.pix_mp.plane_fmt[0].sizeimage = ysize + (ysize >> 1);
-     } else {
-+        fmt->fmt.pix.pixelformat = pixelformat;
-         fmt->fmt.pix.field = field;
-         fmt->fmt.pix.width = width;
-         fmt->fmt.pix.height = height;
-@@ -417,12 +422,22 @@ static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width,
-     }
- 
-     ret = ioctl(ctx->fd, VIDIOC_S_FMT, fmt);
--    if (ret)
-+    if (ret) {
-+        ret = AVERROR(errno);
-         av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %d\n", ret);
++    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
 +        return ret;
++
++    dst = pkt->data;
++
++    av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height);
++    dst += width * height * 2;
++    av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width,
++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2);
++    return 0;
++}
++
++static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++                      const AVFrame *frame)
++{
++    const int width = av_frame_cropped_width(frame);
++    const int height = av_frame_cropped_height(frame);
++    const int x0 = frame->crop_left;
++    const int y0 = frame->crop_top;
++    const int size = width * height * 3;
++    uint8_t * dst;
++    int ret;
++
++    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
++        return ret;
++
++    dst = pkt->data;
++
++    av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
++    dst += width * height * 2;
++    av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width,
++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2);
++    return 0;
++}
++#endif
++
++
+ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
+-                      const AVFrame *frame, int *got_packet)
++                      const AVFrame *src_frame, int *got_packet)
+ {
+-    int ret = av_image_get_buffer_size(frame->format,
+-                                       frame->width, frame->height, 1);
++    int ret;
++    AVFrame * frame = NULL;
+ 
+-    if (ret < 0)
++#if CONFIG_SAND
++    if (av_rpi_is_sand_frame(src_frame)) {
++        ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) :
++            av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) :
++            av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1;
++        *got_packet = (ret == 0);
+         return ret;
++    }
++#endif
++
++    if ((frame = av_frame_clone(src_frame)) == NULL) {
++        ret = AVERROR(ENOMEM);
++        goto fail;
 +    }
 +
-+    if (pixelformat != fmt->fmt.pix.pixelformat) {
-+        av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt->fmt.pix.pixelformat));
-+        return AVERROR(EINVAL);
-+    }
++    if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0)
++        goto fail;
++
++    ret = av_image_get_buffer_size(frame->format,
++                                       frame->width, frame->height, 1);
++    if (ret < 0)
++        goto fail;
  
-     ret = ioctl(ctx->fd, VIDIOC_G_SELECTION, &sel);
+     if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0)
+-        return ret;
++        goto fail;
+     if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
+                                        (const uint8_t **)frame->data, frame->linesize,
+                                        frame->format,
+                                        frame->width, frame->height, 1)) < 0)
+-        return ret;
++        goto fail;
+ 
+     if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 &&
+        frame->format   == AV_PIX_FMT_YUYV422) {
+@@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
+             AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16);
+         }
+     }
++    pkt->flags |= AV_PKT_FLAG_KEY;
++    av_frame_free(&frame);
+     *got_packet = 1;
+     return 0;
++
++fail:
++    av_frame_free(&frame);
++    *got_packet = 0;
++    return ret;
+ }
+ 
+ const FFCodec ff_rawvideo_encoder = {
+diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
+index 23474ee1439e..5920463d0a65 100644
+--- a/libavcodec/v4l2_buffers.c
++++ b/libavcodec/v4l2_buffers.c
+@@ -21,6 +21,7 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
++#include "config.h"
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
+ #include <sys/mman.h>
+@@ -28,58 +29,94 @@
+ #include <fcntl.h>
+ #include <poll.h>
+ #include "libavcodec/avcodec.h"
++#include "libavcodec/internal.h"
++#include "libavutil/avassert.h"
++#include "libavutil/mem.h"
+ #include "libavutil/pixdesc.h"
+-#include "refstruct.h"
++#include "libavutil/hwcontext.h"
+ #include "v4l2_context.h"
+ #include "v4l2_buffers.h"
+ #include "v4l2_m2m.h"
++#include "v4l2_req_dmabufs.h"
++#include "weak_link.h"
++
++#if CONFIG_LIBDRM
++#include <drm_fourcc.h>
++#endif
+ 
+ #define USEC_PER_SEC 1000000
+-static AVRational v4l2_timebase = { 1, USEC_PER_SEC };
++static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
+ 
+-static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf)
++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
+ {
+-    return V4L2_TYPE_IS_OUTPUT(buf->context->type) ?
+-        container_of(buf->context, V4L2m2mContext, output) :
+-        container_of(buf->context, V4L2m2mContext, capture);
++    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
++        container_of(ctx, V4L2m2mContext, output) :
++        container_of(ctx, V4L2m2mContext, capture);
+ }
+ 
+-static inline AVCodecContext *logger(V4L2Buffer *buf)
++static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf)
+ {
+-    return buf_to_m2mctx(buf)->avctx;
++    return ctx_to_m2mctx(buf->context);
+ }
+ 
+-static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf)
++static inline AVCodecContext *logger(const V4L2Buffer * const buf)
+ {
+-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
++    return buf_to_m2mctx(buf)->avctx;
++}
+ 
+-    if (s->avctx->pkt_timebase.num)
+-        return s->avctx->pkt_timebase;
+-    return s->avctx->time_base;
++static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf)
++{
++    const V4L2m2mContext *s = buf_to_m2mctx(avbuf);
++    const AVRational tb = s->avctx->pkt_timebase.num ?
++        s->avctx->pkt_timebase :
++        s->avctx->time_base;
++    return tb.num && tb.den ? tb : v4l2_timebase;
+ }
+ 
+-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts)
++static inline struct timeval tv_from_int(const int64_t t)
+ {
+-    int64_t v4l2_pts;
++    return (struct timeval){
++        .tv_usec = t % USEC_PER_SEC,
++        .tv_sec  = t / USEC_PER_SEC
++    };
++}
+ 
+-    if (pts == AV_NOPTS_VALUE)
+-        pts = 0;
++static inline int64_t int_from_tv(const struct timeval t)
++{
++    return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec;
++}
+ 
++static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts)
++{
+     /* convert pts to v4l2 timebase */
+-    v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
+-    out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
+-    out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
++    const int64_t v4l2_pts =
++        pts == AV_NOPTS_VALUE ? 0 :
++            av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
++    out->buf.timestamp = tv_from_int(v4l2_pts);
+ }
+ 
+-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf)
++static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf)
+ {
+-    int64_t v4l2_pts;
+-
++    const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp);
++    return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE;
++#if 0
+     /* convert pts back to encoder timebase */
+-    v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
+-                        avbuf->buf.timestamp.tv_usec;
++    return
++        avbuf->context->no_pts_rescale ? v4l2_pts :
++        v4l2_pts == 0 ? AV_NOPTS_VALUE :
++            av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
++#endif
++}
+ 
+-    return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
++static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
++{
++    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
++        out->planes[plane].bytesused = bytesused;
++        out->planes[plane].length = length;
++    } else {
++        out->buf.bytesused = bytesused;
++        out->buf.length = length;
++    }
+ }
+ 
+ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
+@@ -116,6 +153,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
+     return AVCOL_PRI_UNSPECIFIED;
+ }
+ 
++static void v4l2_set_color(V4L2Buffer *buf,
++                           const enum AVColorPrimaries avcp,
++                           const enum AVColorSpace avcs,
++                           const enum AVColorTransferCharacteristic avxc)
++{
++    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
++    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
++    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
++
++    switch (avcp) {
++    case AVCOL_PRI_BT709:
++        cs = V4L2_COLORSPACE_REC709;
++        ycbcr = V4L2_YCBCR_ENC_709;
++        break;
++    case AVCOL_PRI_BT470M:
++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
++        ycbcr = V4L2_YCBCR_ENC_601;
++        break;
++    case AVCOL_PRI_BT470BG:
++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++        break;
++    case AVCOL_PRI_SMPTE170M:
++        cs = V4L2_COLORSPACE_SMPTE170M;
++        break;
++    case AVCOL_PRI_SMPTE240M:
++        cs = V4L2_COLORSPACE_SMPTE240M;
++        break;
++    case AVCOL_PRI_BT2020:
++        cs = V4L2_COLORSPACE_BT2020;
++        break;
++    case AVCOL_PRI_SMPTE428:
++    case AVCOL_PRI_SMPTE431:
++    case AVCOL_PRI_SMPTE432:
++    case AVCOL_PRI_EBU3213:
++    case AVCOL_PRI_RESERVED:
++    case AVCOL_PRI_FILM:
++    case AVCOL_PRI_UNSPECIFIED:
++    default:
++        break;
++    }
++
++    switch (avcs) {
++    case AVCOL_SPC_RGB:
++        cs = V4L2_COLORSPACE_SRGB;
++        break;
++    case AVCOL_SPC_BT709:
++        cs = V4L2_COLORSPACE_REC709;
++        break;
++    case AVCOL_SPC_FCC:
++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
++        break;
++    case AVCOL_SPC_BT470BG:
++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++        break;
++    case AVCOL_SPC_SMPTE170M:
++        cs = V4L2_COLORSPACE_SMPTE170M;
++        break;
++    case AVCOL_SPC_SMPTE240M:
++        cs = V4L2_COLORSPACE_SMPTE240M;
++        break;
++    case AVCOL_SPC_BT2020_CL:
++        cs = V4L2_COLORSPACE_BT2020;
++        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
++        break;
++    case AVCOL_SPC_BT2020_NCL:
++        cs = V4L2_COLORSPACE_BT2020;
++        break;
++    default:
++        break;
++    }
++
++    switch (xfer) {
++    case AVCOL_TRC_BT709:
++        xfer = V4L2_XFER_FUNC_709;
++        break;
++    case AVCOL_TRC_IEC61966_2_1:
++        xfer = V4L2_XFER_FUNC_SRGB;
++        break;
++    case AVCOL_TRC_SMPTE240M:
++        xfer = V4L2_XFER_FUNC_SMPTE240M;
++        break;
++    case AVCOL_TRC_SMPTE2084:
++        xfer = V4L2_XFER_FUNC_SMPTE2084;
++        break;
++    default:
++        break;
++    }
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
++        buf->context->format.fmt.pix_mp.colorspace = cs;
++        buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr;
++        buf->context->format.fmt.pix_mp.xfer_func = xfer;
++    } else {
++        buf->context->format.fmt.pix.colorspace = cs;
++        buf->context->format.fmt.pix.ycbcr_enc = ycbcr;
++        buf->context->format.fmt.pix.xfer_func = xfer;
++    }
++}
++
+ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
+ {
+     enum v4l2_quantization qt;
+@@ -134,6 +270,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
+      return AVCOL_RANGE_UNSPECIFIED;
+ }
+ 
++static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr)
++{
++    const enum v4l2_quantization q =
++        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
++        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
++            V4L2_QUANTIZATION_DEFAULT;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
++        buf->context->format.fmt.pix_mp.quantization = q;
++    } else {
++        buf->context->format.fmt.pix.quantization = q;
++    }
++}
++
+ static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
+ {
+     enum v4l2_ycbcr_encoding ycbcr;
+@@ -210,71 +360,294 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf)
+     return AVCOL_TRC_UNSPECIFIED;
+ }
+ 
+-static void v4l2_free_buffer(void *opaque, uint8_t *unused)
++static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf)
+ {
+-    V4L2Buffer* avbuf = opaque;
+-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
++    return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
++}
+ 
+-    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
+-        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
++{
++    return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
++}
+ 
+-        if (s->reinit) {
+-            if (!atomic_load(&s->refcount))
+-                sem_post(&s->refsync);
+-        } else {
+-            if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) {
+-                /* no need to queue more buffers to the driver */
+-                avbuf->status = V4L2BUF_AVAILABLE;
+-            }
+-            else if (avbuf->context->streamon)
+-                ff_v4l2_buffer_enqueue(avbuf);
+-        }
++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
++{
++    buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
++        is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
++}
+ 
+-        ff_refstruct_unref(&avbuf->context_ref);
++static inline void frame_set_interlace(AVFrame* frame, const int is_interlaced, const int is_tff)
++{
++    if (!is_interlaced) {
++#if FF_API_INTERLACED_FRAME
++FF_DISABLE_DEPRECATION_WARNINGS
++        frame->interlaced_frame = 0;
++        frame->top_field_first =  0;
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++        frame->flags &= ~(AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED);
++    }
++    else {
++#if FF_API_INTERLACED_FRAME
++FF_DISABLE_DEPRECATION_WARNINGS
++        frame->interlaced_frame = 1;
++        frame->top_field_first =  !!is_tff;
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++        if (is_tff)
++            frame->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED;
++        else
++            frame->flags = (frame->flags & ~AV_FRAME_FLAG_TOP_FIELD_FIRST) | AV_FRAME_FLAG_INTERLACED;
+     }
+ }
+ 
+-static int v4l2_buf_increase_ref(V4L2Buffer *in)
++static inline int frame_is_interlaced(const AVFrame* const frame)
+ {
+-    V4L2m2mContext *s = buf_to_m2mctx(in);
++#if FF_API_INTERLACED_FRAME
++FF_DISABLE_DEPRECATION_WARNINGS
++    return frame->interlaced_frame || (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0;
++FF_ENABLE_DEPRECATION_WARNINGS
++#else
++    return (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0;
++#endif
++}
+ 
+-    if (in->context_ref)
+-        atomic_fetch_add(&in->context_refcount, 1);
+-    else {
+-        in->context_ref = ff_refstruct_ref(s->self_ref);
++static inline int frame_is_tff(const AVFrame* const frame)
++{
++#if FF_API_INTERLACED_FRAME
++FF_DISABLE_DEPRECATION_WARNINGS
++    return frame->top_field_first || (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0;
++FF_ENABLE_DEPRECATION_WARNINGS
++#else
++    return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0;
++#endif
++}
++
++static inline int frame_is_key(const AVFrame* const frame)
++{
++#if FF_API_FRAME_KEY
++FF_DISABLE_DEPRECATION_WARNINGS
++    return frame->key_frame || (frame->flags & AV_FRAME_FLAG_KEY) != 0;
++FF_ENABLE_DEPRECATION_WARNINGS
++#else
++    return (frame->flags & AV_FRAME_FLAG_KEY) != 0;
++#endif
++}
+ 
+-        in->context_refcount = 1;
++static inline void frame_set_key(AVFrame* const frame, const int is_key)
++{
++#if FF_API_FRAME_KEY
++FF_DISABLE_DEPRECATION_WARNINGS
++    frame->key_frame = !!is_key;
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++    frame->flags = is_key ?
++        frame->flags | AV_FRAME_FLAG_KEY :
++        frame->flags & ~AV_FRAME_FLAG_KEY;
++}
++
++#if CONFIG_LIBDRM
++static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
++{
++    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
++    AVDRMLayerDescriptor *layer;
++
++    /* fill the DRM frame descriptor */
++    drm_desc->nb_objects = avbuf->num_planes;
++    drm_desc->nb_layers = 1;
++
++    layer = &drm_desc->layers[0];
++    layer->nb_planes = avbuf->num_planes;
++
++    for (int i = 0; i < avbuf->num_planes; i++) {
++        layer->planes[i].object_index = i;
++        layer->planes[i].offset = avbuf->plane_info[i].offset;
++        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
+     }
+ 
+-    in->status = V4L2BUF_RET_USER;
+-    atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
++    switch (avbuf->context->av_pix_fmt) {
++    case AV_PIX_FMT_0BGR:
++        layer->format = DRM_FORMAT_RGBX8888;
++        break;
++    case AV_PIX_FMT_RGB0:
++        layer->format = DRM_FORMAT_XBGR8888;
++        break;
++    case AV_PIX_FMT_0RGB:
++        layer->format = DRM_FORMAT_BGRX8888;
++        break;
++    case AV_PIX_FMT_BGR0:
++        layer->format = DRM_FORMAT_XRGB8888;
++        break;
+ 
+-    return 0;
++    case AV_PIX_FMT_ABGR:
++        layer->format = DRM_FORMAT_RGBA8888;
++        break;
++    case AV_PIX_FMT_RGBA:
++        layer->format = DRM_FORMAT_ABGR8888;
++        break;
++    case AV_PIX_FMT_ARGB:
++        layer->format = DRM_FORMAT_BGRA8888;
++        break;
++    case AV_PIX_FMT_BGRA:
++        layer->format = DRM_FORMAT_ARGB8888;
++        break;
++
++    case AV_PIX_FMT_BGR24:
++        layer->format = DRM_FORMAT_BGR888;
++        break;
++    case AV_PIX_FMT_RGB24:
++        layer->format = DRM_FORMAT_RGB888;
++        break;
++
++    case AV_PIX_FMT_YUYV422:
++
++        layer->format = DRM_FORMAT_YUYV;
++        layer->nb_planes = 1;
++
++        break;
++
++    case AV_PIX_FMT_NV12:
++    case AV_PIX_FMT_NV21:
++
++        layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ?
++            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
++
++        if (avbuf->num_planes > 1)
++            break;
++
++        layer->nb_planes = 2;
++
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
++            avbuf->context->format.fmt.pix.height;
++        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
++        break;
++
++    case AV_PIX_FMT_YUV420P:
++
++        layer->format = DRM_FORMAT_YUV420;
++
++        if (avbuf->num_planes > 1)
++            break;
++
++        layer->nb_planes = 3;
++
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
++            avbuf->context->format.fmt.pix.height;
++        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
++
++        layer->planes[2].object_index = 0;
++        layer->planes[2].offset = layer->planes[1].offset +
++            ((avbuf->plane_info[0].bytesperline *
++              avbuf->context->format.fmt.pix.height) >> 2);
++        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
++        break;
++
++    default:
++        drm_desc->nb_layers = 0;
++        break;
++    }
++
++    return (uint8_t *) drm_desc;
+ }
++#endif
+ 
+-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
++static void v4l2_free_bufref(void *opaque, uint8_t *data)
+ {
+-    int ret;
++    AVBufferRef * bufref = (AVBufferRef *)data;
++    V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data;
++    struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl);
+ 
+-    if (plane >= in->num_planes)
+-        return AVERROR(EINVAL);
++    if (ctx != NULL) {
++        // Buffer still attached to context
++        V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+ 
+-    /* even though most encoders return 0 in data_offset encoding vp8 does require this value */
+-    *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
+-                            in->plane_info[plane].length, v4l2_free_buffer, in, 0);
+-    if (!*buf)
+-        return AVERROR(ENOMEM);
++        if (!s->output_drm && avbuf->dmabuf[0] != NULL) {
++            for (unsigned int i = 0; i != avbuf->num_planes; ++i)
++                dmabuf_read_end(avbuf->dmabuf[i]);
++        }
+ 
+-    ret = v4l2_buf_increase_ref(in);
 -    if (ret)
--        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_SELECTION failed: %d\n", ret);
-+    if (ret) {
-+        ret = AVERROR(errno);
-+        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION failed: %d\n", ret);
-+    }
- 
-     sel.r.width = width;
-     sel.r.height = height;
-@@ -432,10 +447,12 @@ static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width,
-     sel.flags = V4L2_SEL_FLAG_LE;
- 
-     ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &sel);
--    if (ret)
--        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_SELECTION failed: %d\n", ret);
-+    if (ret) {
-+        ret = AVERROR(errno);
-+        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %d\n", ret);
-+    }
+-        av_buffer_unref(buf);
++        ff_mutex_lock(&ctx->lock);
  
 -    return ret;
-+    return 0;
- }
- 
- static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
-@@ -517,10 +534,25 @@ static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf)
-     return 0;
- }
- 
--static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
-+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf, const uint32_t pixelformat)
- {
-     struct v4l2_exportbuffer expbuf;
-     int i, ret;
-+    uint64_t mod = DRM_FORMAT_MOD_LINEAR;
-+    uint32_t fmt = 0;
++        ff_v4l2_buffer_set_avail(avbuf);
++        avbuf->buf.timestamp.tv_sec = 0;
++        avbuf->buf.timestamp.tv_usec = 0;
 +
-+    switch (pixelformat) {
-+    case V4L2_PIX_FMT_NV12:
-+        fmt = DRM_FORMAT_NV12;
-+        break;
-+    case V4L2_PIX_FMT_YUV420:
-+        fmt = DRM_FORMAT_YUV420;
-+        break;
-+    default:
-+        return AVERROR(EINVAL);
++        if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
++        }
++        else if (ctx->streamon) {
++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name);
++            ff_v4l2_buffer_enqueue(avbuf);  // will set to IN_DRIVER
++        }
++        else {
++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name);
++        }
++
++        ff_mutex_unlock(&ctx->lock);
 +    }
 +
-+    avbuf->drm_frame.layers[0].format = fmt;
- 
-     for (i = 0; i < avbuf->num_planes; i++) {
-         memset(&expbuf, 0, sizeof(expbuf));
-@@ -539,12 +571,12 @@ static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
-             /* drm frame */
-             avbuf->drm_frame.objects[i].size = avbuf->buffer.m.planes[i].length;
-             avbuf->drm_frame.objects[i].fd = expbuf.fd;
--            avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+            avbuf->drm_frame.objects[i].format_modifier = mod;
-         } else {
-             /* drm frame */
-             avbuf->drm_frame.objects[0].size = avbuf->buffer.length;
-             avbuf->drm_frame.objects[0].fd = expbuf.fd;
--            avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+            avbuf->drm_frame.objects[0].format_modifier = mod;
-         }
-     }
- 
-@@ -629,7 +661,7 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
-             if (ret)
-                 goto fail;
- 
--            ret = v4l2_buffer_export_drm(buf);
-+            ret = v4l2_buffer_export_drm(buf, multiplanar ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat);
-             if (ret)
-                 goto fail;
-         }
-@@ -878,7 +910,6 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused)
- 
- static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height)
- {
--    int av_pix_fmt = AV_PIX_FMT_YUV420P;
-     AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
-     AVDRMLayerDescriptor *layer;
- 
-@@ -895,20 +926,13 @@ static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height)
-         layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
-     }
- 
--    switch (av_pix_fmt) {
--    case AV_PIX_FMT_YUYV422:
--
--        layer->format = DRM_FORMAT_YUYV;
-+    switch (layer->format) {
-+    case DRM_FORMAT_YUYV:
-         layer->nb_planes = 1;
--
-         break;
- 
--    case AV_PIX_FMT_NV12:
--    case AV_PIX_FMT_NV21:
--
--        layer->format = av_pix_fmt == AV_PIX_FMT_NV12 ?
--            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
--
-+    case DRM_FORMAT_NV12:
-+    case DRM_FORMAT_NV21:
-         if (avbuf->num_planes > 1)
-             break;
- 
-@@ -920,10 +944,7 @@ static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height)
-         layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
-         break;
- 
--    case AV_PIX_FMT_YUV420P:
--
--        layer->format = DRM_FORMAT_YUV420;
--
-+    case DRM_FORMAT_YUV420:
-         if (avbuf->num_planes > 1)
-             break;
- 
-@@ -1032,6 +1053,26 @@ static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
-     return 0;
- }
- 
-+static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
-+{
-+    const int is_linear = (drm_desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ||
-+            drm_desc->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID);
-+
-+    switch (drm_desc->layers[0].format) {
-+    case DRM_FORMAT_YUV420:
-+        if (is_linear)
-+            return drm_desc->nb_objects == 1 ? V4L2_PIX_FMT_YUV420 : 0;
-+        break;
-+    case DRM_FORMAT_NV12:
-+        if (is_linear)
-+            return drm_desc->nb_objects == 1 ? V4L2_PIX_FMT_NV12 : 0;
-+        break;
-+    default:
-+        break;
-+    }
-+    return 0;
++    ff_weak_link_unlock(avbuf->context_wl);
++    av_buffer_unref(&bufref);
 +}
 +
- static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
++static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length;
++}
++
++static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
++{
++    int i, ret;
++    const V4L2m2mContext * const s = buf_to_m2mctx(avbuf);
++
++    for (i = 0; i < avbuf->num_planes; i++) {
++        int dma_fd = -1;
++        const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i);
++
++        if (s->db_ctl != NULL) {
++            if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL)
++                return AVERROR(ENOMEM);
++            dma_fd = dmabuf_fd(avbuf->dmabuf[i]);
++            if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type))
++                avbuf->buf.m.planes[i].m.fd = dma_fd;
++            else
++                avbuf->buf.m.fd = dma_fd;
++
++            if (!s->output_drm)
++                avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]);
++        }
++        else {
++            struct v4l2_exportbuffer expbuf;
++            memset(&expbuf, 0, sizeof(expbuf));
++
++            expbuf.index = avbuf->buf.index;
++            expbuf.type = avbuf->buf.type;
++            expbuf.plane = i;
++
++            ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf);
++            if (ret < 0)
++                return AVERROR(errno);
++            dma_fd = expbuf.fd;
++        }
++
++        avbuf->drm_frame.objects[i].size = blen;
++        avbuf->drm_frame.objects[i].fd = dma_fd;
++#if !CONFIG_LIBDRM
++        avbuf->drm_frame.objects[i].format_modifier = 0;
++#else
++        avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
++#endif
++    }
++
++    return 0;
+ }
+ 
+ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset)
  {
-     AVFilterContext *avctx         = link->dst;
-@@ -1047,23 +1088,27 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
-            avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out);
+     unsigned int bytesused, length;
++    int rv = 0;
  
-     if (ctx->field_order == V4L2_FIELD_ANY) {
--        AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)in->data[0];
-+        const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0];
-+        const uint32_t pixelformat = desc_pixelformat(drm_desc);
-+
-+        if (pixelformat == 0) {
-+            av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n",
-+                   av_fourcc2str(drm_desc->layers[0].format),
-+                   drm_desc->nb_objects, drm_desc->objects[0].format_modifier);
-+            return AVERROR(EINVAL);
-+        }
-+
-         ctx->orig_width = drm_desc->layers[0].planes[0].pitch;
-         ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width;
+     if (plane >= out->num_planes)
+         return AVERROR(EINVAL);
+@@ -282,32 +655,65 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i
+     length = out->plane_info[plane].length;
+     bytesused = FFMIN(size+offset, length);
  
-         av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
-            drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
- 
--        if (in->top_field_first)
--            ctx->field_order = V4L2_FIELD_INTERLACED_TB;
--        else
--            ctx->field_order = V4L2_FIELD_INTERLACED_BT;
+-    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset));
 -
--        ret = deint_v4l2m2m_set_format(output, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
-+        ret = deint_v4l2m2m_set_format(output, pixelformat, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
-         if (ret)
-             return ret;
- 
--        ret = deint_v4l2m2m_set_format(capture, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
-+        ret = deint_v4l2m2m_set_format(capture, pixelformat, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
-         if (ret)
-             return ret;
- 
-@@ -1082,6 +1127,12 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
-         ret = deint_v4l2m2m_streamon(output);
-         if (ret)
-             return ret;
-+
-+        if (in->top_field_first)
-+            ctx->field_order = V4L2_FIELD_INTERLACED_TB;
-+        else
-+            ctx->field_order = V4L2_FIELD_INTERLACED_BT;
-+
+-    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
+-        out->planes[plane].bytesused = bytesused;
+-        out->planes[plane].length = length;
+-    } else {
+-        out->buf.bytesused = bytesused;
+-        out->buf.length = length;
++    if (size > length - offset) {
++        size = length - offset;
++        rv = AVERROR(ENOMEM);
      }
  
-     ret = deint_v4l2m2m_enqueue_frame(output, in);
-@@ -1157,28 +1208,31 @@ again:
-         return 0;
-     }
- 
--    {
-+    recycle_q(&s->output);
-+    n = count_enqueued(&s->output);
+-    return 0;
++    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size);
 +
-+    while (n < 6) {
-         AVFrame * frame;
-         int rv;
++    set_buf_length(out, plane, bytesused, length);
++
++    return rv;
++}
++
++static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf)
++{
++    AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]);
++    AVBufferRef * newbuf;
++
++    if (!bufref)
++        return NULL;
++
++    newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0);
++    if (newbuf == NULL)
++        av_buffer_unref(&bufref);
++
++    avbuf->status = V4L2BUF_RET_USER;
++    return newbuf;
+ }
  
--        recycle_q(&s->output);
--        n = count_enqueued(&s->output);
-+        if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
-+            av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
-+            return rv;
-+        }
+ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
+ {
+-    int i, ret;
++    int i;
  
--        while (n < 6) {
--            if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
--                av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
--                return rv;
--            }
-+        if (frame == NULL) {
-+            av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
-+            break;
-+        }
+     frame->format = avbuf->context->av_pix_fmt;
  
--            if (frame == NULL) {
--                av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
--                break;
--            }
-+        rv = deint_v4l2m2m_filter_frame(inlink, frame);
-+        av_frame_free(&frame);
- 
--            deint_v4l2m2m_filter_frame(inlink, frame);
--            av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
--            ++n;
--        }
+-    for (i = 0; i < avbuf->num_planes; i++) {
+-        ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
+-        if (ret)
+-            return ret;
++    frame->buf[0] = wrap_avbuf(avbuf);
++    if (frame->buf[0] == NULL)
++        return AVERROR(ENOMEM);
++
++    if (buf_to_m2mctx(avbuf)->output_drm) {
++#if !CONFIG_LIBDRM
++        return AVERROR_OPTION_NOT_FOUND;
++#else
++        /* 1. get references to the actual data */
++        const int rv = ff_v4l2_context_frames_set(avbuf->context);
 +        if (rv != 0)
 +            return rv;
 +
-+        av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
-+        ++n;
++        frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
++        frame->format = AV_PIX_FMT_DRM_PRIME;
++        frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref);
++        return 0;
++#endif
++    }
++
+ 
++    /* 1. get references to the actual data */
++    for (i = 0; i < avbuf->num_planes; i++) {
++        frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset;
+         frame->linesize[i] = avbuf->plane_info[i].bytesperline;
+-        frame->data[i] = frame->buf[i]->data;
      }
  
-     if (n < 6) {
-
-From b24e23b1a87bc51508e8fb2cf4ea31385059e150 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 19 Aug 2022 15:29:11 +0000
-Subject: [PATCH 063/186] v4l2_req: Enable use of MMAP for buffer alloc
-
-Use MMAP rather than DMABUF if either the dmabuf device can't be opened
-or create_buf doesn't set the capability.
----
- libavcodec/v4l2_req_dmabufs.c  |  22 +++
- libavcodec/v4l2_req_dmabufs.h  |   3 +
- libavcodec/v4l2_req_media.c    | 263 ++++++++++++++++++++++++++++-----
- libavcodec/v4l2_req_media.h    |  21 ++-
- libavcodec/v4l2_request_hevc.c |  42 +++++-
- 5 files changed, 307 insertions(+), 44 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
-index ae6c64836972..c4bbed18c680 100644
---- a/libavcodec/v4l2_req_dmabufs.c
-+++ b/libavcodec/v4l2_req_dmabufs.c
-@@ -36,6 +36,26 @@ static unsigned int total_bufs = 0;
- static size_t total_size = 0;
+     /* fixup special cases */
+@@ -316,88 +722,152 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
+     case AV_PIX_FMT_NV21:
+         if (avbuf->num_planes > 1)
+             break;
+-        frame->linesize[1] = avbuf->plane_info[0].bytesperline;
+-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
++        frame->linesize[1] = frame->linesize[0];
++        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
+         break;
+ 
+     case AV_PIX_FMT_YUV420P:
+         if (avbuf->num_planes > 1)
+             break;
+-        frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1;
+-        frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1;
+-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
+-        frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2);
++        frame->linesize[1] = frame->linesize[0] / 2;
++        frame->linesize[2] = frame->linesize[1];
++        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
++        frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2;
+         break;
+ 
+     default:
+         break;
+     }
+ 
++    if (avbuf->dmabuf[0] != NULL) {
++        for (unsigned int i = 0; i != avbuf->num_planes; ++i)
++            dmabuf_read_start(avbuf->dmabuf[i]);
++    }
++
++    return 0;
++}
++
++static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h)
++{
++    if (dst_stride == src_stride && w + 32 >= dst_stride) {
++        memcpy(dst, src, dst_stride * h);
++    }
++    else {
++        while (--h >= 0) {
++            memcpy(dst, src, w);
++            dst += dst_stride;
++            src += src_stride;
++        }
++    }
++}
++
++static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes)
++{
++    return i != 0  && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
++}
++
++static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
++{
++    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
++
++    if (frame->format != AV_PIX_FMT_DRM_PRIME || !src)
++        return AVERROR(EINVAL);
++
++    av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF);
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
++        // Only currently cope with single buffer types
++        if (out->buf.length != 1)
++            return AVERROR_PATCHWELCOME;
++        if (src->nb_objects != 1)
++            return AVERROR(EINVAL);
++
++        out->planes[0].m.fd = src->objects[0].fd;
++    }
++    else {
++        if (src->nb_objects != 1)
++            return AVERROR(EINVAL);
++
++        out->buf.m.fd      = src->objects[0].fd;
++    }
++
++    // No need to copy src AVDescriptor and if we did then we may confuse
++    // fd close on free
++    out->ref_buf = av_buffer_ref(frame->buf[0]);
++
+     return 0;
+ }
+ 
+ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+ {
+-    int i, ret;
+-    struct v4l2_format fmt = out->context->format;
+-    int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
+-                       fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat;
+-    int height       = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
+-                       fmt.fmt.pix_mp.height : fmt.fmt.pix.height;
+-    int is_planar_format = 0;
+-
+-    switch (pixel_format) {
+-    case V4L2_PIX_FMT_YUV420M:
+-    case V4L2_PIX_FMT_YVU420M:
+-#ifdef V4L2_PIX_FMT_YUV422M
+-    case V4L2_PIX_FMT_YUV422M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YVU422M
+-    case V4L2_PIX_FMT_YVU422M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YUV444M
+-    case V4L2_PIX_FMT_YUV444M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YVU444M
+-    case V4L2_PIX_FMT_YVU444M:
+-#endif
+-    case V4L2_PIX_FMT_NV12M:
+-    case V4L2_PIX_FMT_NV21M:
+-    case V4L2_PIX_FMT_NV12MT_16X16:
+-    case V4L2_PIX_FMT_NV12MT:
+-    case V4L2_PIX_FMT_NV16M:
+-    case V4L2_PIX_FMT_NV61M:
+-        is_planar_format = 1;
+-    }
+-
+-    if (!is_planar_format) {
+-        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+-        int planes_nb = 0;
+-        int offset = 0;
+-
+-        for (i = 0; i < desc->nb_components; i++)
+-            planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
+-
+-        for (i = 0; i < planes_nb; i++) {
+-            int size, h = height;
+-            if (i == 1 || i == 2) {
++    int i;
++    int num_planes = 0;
++    int pel_strides[4] = {0};
++
++    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
++
++    if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) {
++        av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__);
++        return -1;
++    }
++
++    for (i = 0; i != desc->nb_components; ++i) {
++        if (desc->comp[i].plane >= num_planes)
++            num_planes = desc->comp[i].plane + 1;
++        pel_strides[desc->comp[i].plane] = desc->comp[i].step;
++    }
++
++    if (out->num_planes > 1) {
++        if (num_planes != out->num_planes) {
++            av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes);
++            return -1;
++        }
++        for (i = 0; i != num_planes; ++i) {
++            int w = frame->width;
++            int h = frame->height;
++            if (is_chroma(desc, i, num_planes)) {
++                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
+                 h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
+             }
+-            size = frame->linesize[i] * h;
+-            ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset);
+-            if (ret)
+-                return ret;
+-            offset += size;
++
++            cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline,
++                   frame->data[i], frame->linesize[i],
++                   w * pel_strides[i], h);
++            set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length);
+         }
+-        return 0;
+     }
++    else
++    {
++        unsigned int offset = 0;
++
++        for (i = 0; i != num_planes; ++i) {
++            int w = frame->width;
++            int h = frame->height;
++            int dst_stride = out->plane_info[0].bytesperline;
++            uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset;
++
++            if (is_chroma(desc, i, num_planes)) {
++                // Is chroma
++                dst_stride >>= desc->log2_chroma_w;
++                offset += dst_stride * (out->context->height >> desc->log2_chroma_h);
++                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
++                h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
++            }
++            else {
++                // Is luma or alpha
++                offset += dst_stride * out->context->height;
++            }
++            if (offset > out->plane_info[0].length) {
++                av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length);
++                return -1;
++            }
+ 
+-    for (i = 0; i < out->num_planes; i++) {
+-        ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0);
+-        if (ret)
+-            return ret;
++            cpy_2d(dst, dst_stride,
++                   frame->data[i], frame->linesize[i],
++                   w * pel_strides[i], h);
++        }
++        set_buf_length(out, 0, offset, out->plane_info[0].length);
+     }
+-
+     return 0;
+ }
+ 
+@@ -407,16 +877,31 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+  *
+  ******************************************************************************/
+ 
+-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts)
+ {
+-    v4l2_set_pts(out, frame->pts);
+-
+-    return v4l2_buffer_swframe_to_buf(frame, out);
++    out->buf.flags = frame_is_key(frame) ?
++        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
++        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
++    // Beware that colour info is held in format rather than the actual
++    // v4l2 buffer struct so this may not be as useful as you might hope
++    v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
++    v4l2_set_color_range(out, frame->color_range);
++    // PTS & interlace are buffer vars
++    if (track_ts)
++        out->buf.timestamp = tv_from_int(track_ts);
++    else
++        v4l2_set_pts(out, frame->pts);
++    v4l2_set_interlace(out, frame_is_interlaced(frame), frame_is_tff(frame));
++
++    return frame->format == AV_PIX_FMT_DRM_PRIME ?
++        v4l2_buffer_primeframe_to_buf(frame, out) :
++        v4l2_buffer_swframe_to_buf(frame, out);
+ }
+ 
+ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
+ {
+     int ret;
++    V4L2Context * const ctx = avbuf->context;
+ 
+     av_frame_unref(frame);
+ 
+@@ -426,19 +911,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
+         return ret;
+ 
+     /* 2. get frame information */
+-    if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
+-        frame->flags |= AV_FRAME_FLAG_KEY;
++    frame_set_key(frame, avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME);
++    frame->pict_type = (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) != 0 ? AV_PICTURE_TYPE_I :
++        (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P :
++        (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B :
++            AV_PICTURE_TYPE_NONE;
+     frame->color_primaries = v4l2_get_color_primaries(avbuf);
+     frame->colorspace = v4l2_get_color_space(avbuf);
+     frame->color_range = v4l2_get_color_range(avbuf);
+     frame->color_trc = v4l2_get_color_trc(avbuf);
+     frame->pts = v4l2_get_pts(avbuf);
+     frame->pkt_dts = AV_NOPTS_VALUE;
++    frame_set_interlace(frame, v4l2_buf_is_interlaced(avbuf), v4l2_buf_is_top_first(avbuf));
+ 
+     /* these values are updated also during re-init in v4l2_process_driver_event */
+-    frame->height = avbuf->context->height;
+-    frame->width = avbuf->context->width;
+-    frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio;
++    frame->height = ctx->height;
++    frame->width = ctx->width;
++    frame->sample_aspect_ratio = ctx->sample_aspect_ratio;
++
++    if (ctx->selection.height && ctx->selection.width) {
++        frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0;
++        frame->crop_top  = ctx->selection.top < frame->height ? ctx->selection.top  : 0;
++        frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ?
++            frame->width - (ctx->selection.left + ctx->selection.width) : 0;
++        frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ?
++            frame->height - (ctx->selection.top + ctx->selection.height) : 0;
++    }
+ 
+     /* 3. report errors upstream */
+     if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) {
+@@ -451,15 +949,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
+ 
+ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
+ {
+-    int ret;
+-
+     av_packet_unref(pkt);
+-    ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf);
+-    if (ret)
+-        return ret;
++
++    pkt->buf = wrap_avbuf(avbuf);
++    if (pkt->buf == NULL)
++        return AVERROR(ENOMEM);
+ 
+     pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
+-    pkt->data = pkt->buf->data;
++    pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
++    pkt->flags = 0;
+ 
+     if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
+         pkt->flags |= AV_PKT_FLAG_KEY;
+@@ -474,39 +972,108 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
+     return 0;
+ }
+ 
+-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
++                                    const void *extdata, size_t extlen,
++                                    const int64_t timestamp)
+ {
+     int ret;
+ 
+-    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0);
+-    if (ret)
++    if (extlen) {
++        ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0);
++        if (ret)
++            return ret;
++    }
++
++    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen);
++    if (ret && ret != AVERROR(ENOMEM))
+         return ret;
+ 
+-    v4l2_set_pts(out, pkt->pts);
++    if (timestamp)
++        out->buf.timestamp = tv_from_int(timestamp);
++    else
++        v4l2_set_pts(out, pkt->pts);
+ 
+-    if (pkt->flags & AV_PKT_FLAG_KEY)
+-        out->flags = V4L2_BUF_FLAG_KEYFRAME;
++    out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ?
++        (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) :
++        (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME);
+ 
+-    return 0;
++    return ret;
++}
++
++int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
++{
++    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
++}
++
++
++static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
++{
++    V4L2Buffer * const avbuf = (V4L2Buffer *)data;
++    int i;
++
++    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) {
++        struct V4L2Plane_info *p = avbuf->plane_info + i;
++        if (p->mm_addr != NULL)
++            munmap(p->mm_addr, p->length);
++    }
++
++    if (avbuf->dmabuf[0] == NULL) {
++        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
++            if (avbuf->drm_frame.objects[i].fd != -1)
++                close(avbuf->drm_frame.objects[i].fd);
++        }
++    }
++    else {
++        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) {
++            dmabuf_free(avbuf->dmabuf[i]);
++        }
++    }
++
++    av_buffer_unref(&avbuf->ref_buf);
++
++    ff_weak_link_unref(&avbuf->context_wl);
++
++    av_free(avbuf);
+ }
+ 
+-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
++
++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem)
+ {
+-    V4L2Context *ctx = avbuf->context;
+     int ret, i;
++    V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
++    AVBufferRef * bufref;
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++    int want_mmap;
++
++    *pbufref = NULL;
++    if (avbuf == NULL)
++        return AVERROR(ENOMEM);
+ 
+-    avbuf->buf.memory = V4L2_MEMORY_MMAP;
++    bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0);
++    if (bufref == NULL) {
++        av_free(avbuf);
++        return AVERROR(ENOMEM);
++    }
++
++    avbuf->context = ctx;
++    avbuf->buf.memory = mem;
+     avbuf->buf.type = ctx->type;
+     avbuf->buf.index = index;
+ 
++    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
++        avbuf->drm_frame.objects[i].fd = -1;
++    }
++
++    avbuf->context_wl = ff_weak_link_ref(ctx->wl_master);
++
+     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+         avbuf->buf.length = VIDEO_MAX_PLANES;
+         avbuf->buf.m.planes = avbuf->planes;
+     }
+ 
+-    ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
++    ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf);
+     if (ret < 0)
+-        return AVERROR(errno);
++        goto fail;
+ 
+     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+         avbuf->num_planes = 0;
+@@ -518,33 +1085,41 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
+     } else
+         avbuf->num_planes = 1;
+ 
+-    for (i = 0; i < avbuf->num_planes; i++) {
++    want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
++        (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
+ 
++    for (i = 0; i < avbuf->num_planes; i++) {
+         avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
+             ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline :
+             ctx->format.fmt.pix.bytesperline;
+ 
+         if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+             avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
+-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
+-                                           PROT_READ | PROT_WRITE, MAP_SHARED,
+-                                           buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
++            avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset;
++
++            if (want_mmap)
++                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
++                                               PROT_READ | PROT_WRITE, MAP_SHARED,
++                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
+         } else {
+             avbuf->plane_info[i].length = avbuf->buf.length;
+-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
+-                                          PROT_READ | PROT_WRITE, MAP_SHARED,
+-                                          buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
++            avbuf->plane_info[i].offset = 0;
++
++            if (want_mmap)
++                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
++                                               PROT_READ | PROT_WRITE, MAP_SHARED,
++                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
+         }
+ 
+-        if (avbuf->plane_info[i].mm_addr == MAP_FAILED)
+-            return AVERROR(ENOMEM);
++        if (avbuf->plane_info[i].mm_addr == MAP_FAILED) {
++            avbuf->plane_info[i].mm_addr = NULL;
++            ret = AVERROR(ENOMEM);
++            goto fail;
++        }
+     }
+ 
+     avbuf->status = V4L2BUF_AVAILABLE;
+ 
+-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
+-        return 0;
+-
+     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+         avbuf->buf.m.planes = avbuf->planes;
+         avbuf->buf.length   = avbuf->num_planes;
+@@ -554,20 +1129,52 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
+         avbuf->buf.length    = avbuf->planes[0].length;
+     }
+ 
+-    return ff_v4l2_buffer_enqueue(avbuf);
++    if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) {
++        // export_drm does dmabuf alloc if we aren't using v4l2 alloc
++        ret = v4l2_buffer_export_drm(avbuf);
++        if (ret) {
++            av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n");
++            goto fail;
++        }
++    }
++
++    *pbufref = bufref;
++    return 0;
++
++fail:
++    av_buffer_unref(&bufref);
++    return ret;
+ }
+ 
+ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
+ {
+     int ret;
++    int qc;
+ 
+-    avbuf->buf.flags = avbuf->flags;
++    if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) {
++        av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
++               avbuf->context->name, avbuf->buf.index,
++               avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
++               avbuf->context->q_count);
++    }
+ 
+     ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf);
+-    if (ret < 0)
+-        return AVERROR(errno);
++    if (ret < 0) {
++        int err = errno;
++        av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n",
++               avbuf->context->name, avbuf->buf.index,
++               err, strerror(err));
++        return AVERROR(err);
++    }
+ 
++    // Lock not wanted - if called from buffer free then lock already obtained
++    qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1;
+     avbuf->status = V4L2BUF_IN_DRIVER;
++    pthread_cond_broadcast(&avbuf->context->cond);
++
++    av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
++           avbuf->context->name, avbuf->buf.index,
++           avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc);
+ 
+     return 0;
+ }
+diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
+index e35b16130944..444ad94b145e 100644
+--- a/libavcodec/v4l2_buffers.h
++++ b/libavcodec/v4l2_buffers.h
+@@ -28,31 +28,47 @@
+ #include <stddef.h>
+ #include <linux/videodev2.h>
+ 
++#include "avcodec.h"
++#include "libavutil/buffer.h"
+ #include "libavutil/frame.h"
++#include "libavutil/hwcontext_drm.h"
+ #include "packet.h"
+ 
+ enum V4L2Buffer_status {
+     V4L2BUF_AVAILABLE,
+     V4L2BUF_IN_DRIVER,
++    V4L2BUF_IN_USE,
+     V4L2BUF_RET_USER,
+ };
+ 
+ /**
+  * V4L2Buffer (wrapper for v4l2_buffer management)
+  */
++struct V4L2Context;
++struct ff_weak_link_client;
++struct dmabuf_h;
++
+ typedef struct V4L2Buffer {
+-    /* each buffer needs to have a reference to its context */
++    /* each buffer needs to have a reference to its context
++     * The pointer is good enough for most operation but once the buffer has
++     * been passed to the user the buffer may become orphaned so for free ops
++     * the weak link must be used to ensure that the context is actually
++     * there
++     */
+     struct V4L2Context *context;
++    struct ff_weak_link_client *context_wl;
+ 
+-    /* This object is refcounted per-plane, so we need to keep track
+-     * of how many context-refs we are holding.
+-     * This pointer is a RefStruct reference. */
+-    const struct V4L2m2mContext *context_ref;
+-    atomic_uint context_refcount;
++    /* DRM descriptor */
++    AVDRMFrameDescriptor drm_frame;
++    /* For DRM_PRIME encode - need to keep a ref to the source buffer till we
++     * are done
++     */
++    AVBufferRef * ref_buf;
+ 
+     /* keep track of the mmap address and mmap length */
+     struct V4L2Plane_info {
+-        int bytesperline;
++        size_t bytesperline;
++        size_t offset;
+         void * mm_addr;
+         size_t length;
+     } plane_info[VIDEO_MAX_PLANES];
+@@ -63,9 +79,9 @@ typedef struct V4L2Buffer {
+     struct v4l2_buffer buf;
+     struct v4l2_plane planes[VIDEO_MAX_PLANES];
+ 
+-    int flags;
+     enum V4L2Buffer_status status;
+ 
++    struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here
+ } V4L2Buffer;
+ 
+ /**
+@@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf);
+  */
+ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
+ 
++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out,
++                                    const void *extdata, size_t extlen,
++                                    const int64_t timestamp);
++
+ /**
+  * Extracts the data from an AVFrame to a V4L2Buffer
+  *
+@@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
+  *
+  * @returns 0 in case of success, a negative AVERROR code otherwise
+  */
+-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts);
+ 
+ /**
+  * Initializes a V4L2Buffer
+@@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out);
+  *
+  * @returns 0 in case of success, a negative AVERROR code otherwise
+  */
+-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem);
+ 
+ /**
+  * Enqueues a V4L2Buffer
+@@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
+  */
+ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf);
+ 
++static inline void
++ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf)
++{
++    avbuf->status = V4L2BUF_AVAILABLE;
++    av_buffer_unref(&avbuf->ref_buf);
++}
++
+ 
+ #endif // AVCODEC_V4L2_BUFFERS_H
+diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
+index be1df3785b28..e20e3e485c64 100644
+--- a/libavcodec/v4l2_context.c
++++ b/libavcodec/v4l2_context.c
+@@ -28,11 +28,14 @@
+ #include <fcntl.h>
+ #include <poll.h>
+ #include "libavutil/mem.h"
++#include "libavutil/avassert.h"
++#include "libavutil/pixdesc.h"
+ #include "libavcodec/avcodec.h"
+ #include "decode.h"
+ #include "v4l2_buffers.h"
+ #include "v4l2_fmt.h"
+ #include "v4l2_m2m.h"
++#include "weak_link.h"
+ 
+ struct v4l2_format_update {
+     uint32_t v4l2_fmt;
+@@ -42,26 +45,173 @@ struct v4l2_format_update {
+     int update_avfmt;
+ };
+ 
+-static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx)
++
++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
+ {
+-    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
+-        container_of(ctx, V4L2m2mContext, output) :
+-        container_of(ctx, V4L2m2mContext, capture);
++    return (int64_t)n;
+ }
+ 
+-static inline AVCodecContext *logger(V4L2Context *ctx)
++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
+ {
+-    return ctx_to_m2mctx(ctx)->avctx;
++    return (unsigned int)pts;
++}
++
++// FFmpeg requires us to propagate a number of vars from the coded pkt into
++// the decoded frame. The only thing that tracks like that in V4L2 stateful
++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
++// guarantees about PTS being unique or specified for every frame so replace
++// the supplied PTS with a simple incrementing number and keep a circular
++// buffer of all the things we want preserved (including the original PTS)
++// indexed by the tracking no.
++static int64_t
++xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt)
++{
++    int64_t track_pts;
++
++    // Avoid 0
++    if (++x->track_no == 0)
++        x->track_no = 1;
++
++    track_pts = track_to_pts(avctx, x->track_no);
++
++    av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no);
++    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
++        .discard          = 0,
++        .pending          = 1,
++        .pkt_size         = avpkt->size,
++        .pts              = avpkt->pts,
++        .dts              = avpkt->dts,
++        .pkt_pos          = avpkt->pos,
++        .duration         = avpkt->duration,
++        .track_pts        = track_pts
++    };
++    return track_pts;
++}
++
++static int64_t
++xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame)
++{
++    int64_t track_pts;
++
++    // Avoid 0
++    if (++x->track_no == 0)
++        x->track_no = 1;
++
++    track_pts = track_to_pts(avctx, x->track_no);
++
++    av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no);
++    x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
++        .discard          = 0,
++        .pending          = 1,
++        .pts              = frame->pts,
++        .dts              = AV_NOPTS_VALUE,
++        .duration         = frame->duration,
++        .track_pts        = track_pts
++    };
++#if FF_API_FRAME_PKT
++FF_DISABLE_DEPRECATION_WARNINGS
++        x->track_els[x->track_no  % FF_V4L2_M2M_TRACK_SIZE].pkt_pos = frame->pkt_pos;
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++    return track_pts;
++}
++
++
++// Returns -1 if we should discard the frame
++static int
++xlat_pts_frame_out(AVCodecContext *const avctx,
++             xlat_track_t * const x,
++             AVFrame *const frame)
++{
++    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
++    V4L2m2mTrackEl *const t = x->track_els + n;
++    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
++    {
++        av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
++               "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
++        frame->pts              = AV_NOPTS_VALUE;
++        frame->pkt_dts          = AV_NOPTS_VALUE;
++        frame->duration         = 0;
++#if FF_API_FRAME_PKT
++FF_DISABLE_DEPRECATION_WARNINGS
++        frame->pkt_size         = -1;
++        frame->pkt_pos          = -1;
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++    }
++    else if (!t->discard)
++    {
++        frame->pts              = t->pending ? t->pts : AV_NOPTS_VALUE;
++        frame->pkt_dts          = t->dts;
++        frame->duration         = t->duration;
++#if FF_API_FRAME_PKT
++FF_DISABLE_DEPRECATION_WARNINGS
++        frame->pkt_pos          = t->pkt_pos;
++        frame->pkt_size         = t->pkt_size;
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++
++        if (frame->pts != AV_NOPTS_VALUE)
++            x->last_pts = frame->pts;
++        t->pending = 0;
++    }
++    else
++    {
++        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
++        return -1;
++    }
++
++    av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n",
++           frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n);
++    return 0;
+ }
+ 
+-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt)
++// Returns -1 if we should discard the frame
++static int
++xlat_pts_pkt_out(AVCodecContext *const avctx,
++             xlat_track_t * const x,
++             AVPacket *const pkt)
+ {
+-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
++    unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE;
++    V4L2m2mTrackEl *const t = x->track_els + n;
++    if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts)
++    {
++        av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING,
++               "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
++        pkt->pts                = AV_NOPTS_VALUE;
++    }
++    else if (!t->discard)
++    {
++        pkt->pts                = t->pending ? t->pts : AV_NOPTS_VALUE;
++
++        if (pkt->pts != AV_NOPTS_VALUE)
++            x->last_pts = pkt->pts;
++        t->pending = 0;
++    }
++    else
++    {
++        av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts);
++        return -1;
++    }
++
++    // * Would like something much better than this...xlat(offset + out_count)?
++    pkt->dts = pkt->pts;
++    av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n",
++           pkt->pts, t->track_pts, n);
++    return 0;
++}
++
++
++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
++{
++    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
++        container_of(ctx, V4L2m2mContext, output) :
++        container_of(ctx, V4L2m2mContext, capture);
+ }
+ 
+-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
++static inline AVCodecContext *logger(const V4L2Context *ctx)
+ {
+-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
++    return ctx_to_m2mctx(ctx)->avctx;
+ }
+ 
+ static AVRational v4l2_get_sar(V4L2Context *ctx)
+@@ -82,21 +232,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx)
+     return sar;
+ }
+ 
+-static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2)
++static inline int ctx_buffers_alloced(const V4L2Context * const ctx)
+ {
+-    struct v4l2_format *fmt1 = &ctx->format;
+-    int ret =  V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
+-        fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
+-        fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
+-        :
+-        fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
+-        fmt1->fmt.pix.height != fmt2->fmt.pix.height;
++    return ctx->bufrefs != NULL;
++}
++
++// Width/Height changed or we don't have an alloc in the first place?
++static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2)
++{
++    const struct v4l2_format *fmt1 = &ctx->format;
++    int ret = !ctx_buffers_alloced(ctx) ||
++        (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
++            fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width ||
++            fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height
++            :
++            fmt1->fmt.pix.width != fmt2->fmt.pix.width ||
++            fmt1->fmt.pix.height != fmt2->fmt.pix.height);
+ 
+     if (ret)
+-        av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n",
++        av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n",
+             ctx->name,
+-            v4l2_get_width(fmt1), v4l2_get_height(fmt1),
+-            v4l2_get_width(fmt2), v4l2_get_height(fmt2));
++            ctx_buffers_alloced(ctx),
++            ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1),
++            ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2));
+ 
+     return ret;
+ }
+@@ -154,76 +312,100 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd
+     }
+ }
+ 
+-static int v4l2_start_decode(V4L2Context *ctx)
++static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r)
+ {
+-    struct v4l2_decoder_cmd cmd = {
+-        .cmd = V4L2_DEC_CMD_START,
+-        .flags = 0,
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++    struct v4l2_selection selection = {
++        .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
++        .target = V4L2_SEL_TGT_COMPOSE
+     };
+-    int ret;
+ 
+-    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd);
+-    if (ret)
++    memset(r, 0, sizeof(*r));
++    if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection))
+         return AVERROR(errno);
+ 
++    *r = selection.r;
+     return 0;
+ }
+ 
+-/**
+- * handle resolution change event and end of stream event
+- * returns 1 if reinit was successful, negative if it failed
+- * returns 0 if reinit was not executed
+- */
+-static int v4l2_handle_event(V4L2Context *ctx)
++static int do_source_change(V4L2m2mContext * const s)
+ {
+-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+-    struct v4l2_format cap_fmt = s->capture.format;
+-    struct v4l2_event evt = { 0 };
++    AVCodecContext *const avctx = s->avctx;
++
+     int ret;
++    int reinit;
++    struct v4l2_format cap_fmt = s->capture.format;
+ 
+-    ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt);
+-    if (ret < 0) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name);
+-        return 0;
+-    }
++    s->capture.done = 0;
+ 
+-    if (evt.type == V4L2_EVENT_EOS) {
+-        ctx->done = 1;
++    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
++    if (ret) {
++        av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name);
+         return 0;
+     }
+ 
+-    if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
+-        return 0;
++    get_default_selection(&s->capture, &s->capture.selection);
+ 
+-    ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
+-    if (ret) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name);
+-        return 0;
++    reinit = ctx_resolution_changed(&s->capture, &cap_fmt);
++    if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0)
++        reinit = 1;
++
++    s->capture.format = cap_fmt;
++    if (reinit) {
++        s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
++        s->capture.width = ff_v4l2_get_format_width(&cap_fmt);
+     }
+ 
+-    if (v4l2_resolution_changed(&s->capture, &cap_fmt)) {
+-        s->capture.height = v4l2_get_height(&cap_fmt);
+-        s->capture.width = v4l2_get_width(&cap_fmt);
+-        s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
+-    } else {
+-        v4l2_start_decode(ctx);
+-        return 0;
++    // If we don't support selection (or it is bust) and we obviously have HD then kludge
++    if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) &&
++        (s->capture.height == 1088 && s->capture.width == 1920)) {
++        s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080};
+     }
+ 
+-    s->reinit = 1;
++    s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
+ 
+-    if (s->avctx)
+-        ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
+-    if (ret < 0)
+-        av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n");
++    av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n",
++           av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)),
++           s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
++           s->capture.width, s->capture.height,
++           s->capture.selection.width, s->capture.selection.height,
++           s->capture.selection.left, s->capture.selection.top, reinit);
+ 
+-    ret = ff_v4l2_m2m_codec_reinit(s);
+-    if (ret) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n");
+-        return AVERROR(EINVAL);
++    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
++    if (ret)
++        av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
++    s->draining = 0;
++
++    if (!reinit) {
++        /* Buffers are OK so just stream off to ack */
++        av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__);
++    }
++    else {
++        if (avctx)
++            ret = ff_set_dimensions(s->avctx,
++                                    s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width,
++                                    s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height);
++        if (ret < 0)
++            av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
++
++        ff_v4l2_context_release(&s->capture);
++
++        if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) ||
++            s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) {
++            av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n",
++                   s->capture.width, s->capture.height,
++                   ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format));
++            return AVERROR(EINVAL);
++        }
++
++        // Update pixel format - should only actually do something on initial change
++        s->capture.av_pix_fmt =
++            ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
++        avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt;
++        avctx->sw_pix_fmt = s->capture.av_pix_fmt;
+     }
+ 
+-    /* reinit executed */
++    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON);
+     return 1;
+ }
+ 
+@@ -267,175 +449,293 @@ static int v4l2_stop_encode(V4L2Context *ctx)
+     return 0;
+ }
+ 
+-static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
++// DQ a buffer
++// Amalgamates all the various ways there are of signalling EOS/Event to
++// generate a consistant EPIPE.
++//
++// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped)
++//
++// Returns:
++//  0               Success
++//  AVERROR(EPIPE)  Nothing more to read
++//  AVERROR(ENOSPC) No buffers in Q to put result in
++//  *               AVERROR(..)
++
++ static int
++dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf)
+ {
+-    struct v4l2_plane planes[VIDEO_MAX_PLANES];
+-    struct v4l2_buffer buf = { 0 };
+-    V4L2Buffer *avbuf;
+-    struct pollfd pfd = {
+-        .events =  POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */
+-        .fd = ctx_to_m2mctx(ctx)->fd,
++    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
++    AVCodecContext * const avctx = m->avctx;
++    V4L2Buffer * avbuf;
++    const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type);
++
++    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
++
++    struct v4l2_buffer buf = {
++        .type = ctx->type,
++        .memory = V4L2_MEMORY_MMAP,
+     };
+-    int i, ret;
+ 
+-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) {
+-        for (i = 0; i < ctx->num_buffers; i++) {
+-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
+-                break;
+-        }
+-        if (i == ctx->num_buffers)
+-            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to "
+-                                                "userspace. Increase num_capture_buffers "
+-                                                "to prevent device deadlock or dropped "
+-                                                "packets/frames.\n");
+-    }
+-
+-    /* if we are draining and there are no more capture buffers queued in the driver we are done */
+-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) {
+-        for (i = 0; i < ctx->num_buffers; i++) {
+-            /* capture buffer initialization happens during decode hence
+-             * detection happens at runtime
+-             */
+-            if (!ctx->buffers)
+-                break;
++    *ppavbuf = NULL;
++
++    if (ctx->flag_last)
++        return AVERROR(EPIPE);
++
++    if (is_mp) {
++        buf.length = VIDEO_MAX_PLANES;
++        buf.m.planes = planes;
++    }
++
++    while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) {
++        const int err = errno;
++        av_assert0(AVERROR(err) < 0);
++        if (err != EINTR) {
++            av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
++                ctx->name, av_err2str(AVERROR(err)));
+ 
+-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
+-                goto start;
++            if (err == EPIPE)
++                ctx->flag_last = 1;
++
++            return AVERROR(err);
+         }
+-        ctx->done = 1;
+-        return NULL;
+     }
++    atomic_fetch_sub(&ctx->q_count, 1);
++
++    avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
++    ff_v4l2_buffer_set_avail(avbuf);
++    avbuf->buf = buf;
++    if (is_mp) {
++        memcpy(avbuf->planes, planes, sizeof(planes));
++        avbuf->buf.m.planes = avbuf->planes;
++    }
++    // Done with any attached buffer
++    av_buffer_unref(&avbuf->ref_buf);
+ 
+-start:
+-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
+-        pfd.events =  POLLOUT | POLLWRNORM;
+-    else {
+-        /* no need to listen to requests for more input while draining */
+-        if (ctx_to_m2mctx(ctx)->draining)
+-            pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
++    if (V4L2_TYPE_IS_CAPTURE(ctx->type)) {
++        // Zero length cap buffer return == EOS
++        if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) {
++            av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n");
++
++            // Must reQ so we don't leak
++            // May not matter if the next thing we do is release all the
++            // buffers but better to be tidy.
++            ff_v4l2_buffer_enqueue(avbuf);
++
++            ctx->flag_last = 1;
++            return AVERROR(EPIPE);
++        }
++
++#ifdef V4L2_BUF_FLAG_LAST
++        // If flag_last set then this contains data but is the last frame
++        // so remember that but return OK
++        if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0)
++            ctx->flag_last = 1;
++#endif
+     }
+ 
+-    for (;;) {
+-        ret = poll(&pfd, 1, timeout);
+-        if (ret > 0)
+-            break;
+-        if (errno == EINTR)
++    *ppavbuf = avbuf;
++    return 0;
++}
++
++/**
++ * handle resolution change event and end of stream event
++ * Expects to be called after the stream has stopped
++ *
++ * returns 1 if reinit was successful, negative if it failed
++ * returns 0 if reinit was not executed
++ */
++static int
++get_event(V4L2m2mContext * const m)
++{
++    AVCodecContext * const avctx = m->avctx;
++    struct v4l2_event evt = { 0 };
++
++    while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) {
++        const int rv = AVERROR(errno);
++        if (rv == AVERROR(EINTR))
+             continue;
+-        return NULL;
++        if (rv == AVERROR(EAGAIN)) {
++            av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n");
++            return AVERROR_EOF;
++        }
++        av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv));
++        return rv;
+     }
+ 
+-    /* 0. handle errors */
+-    if (pfd.revents & POLLERR) {
+-        /* if we are trying to get free buffers but none have been queued yet,
+-         * or if no buffers have been allocated yet, no need to raise a warning
+-         */
+-        if (timeout == 0) {
+-            if (!ctx->buffers)
+-                return NULL;
++    av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type);
+ 
+-            for (i = 0; i < ctx->num_buffers; i++) {
+-                if (ctx->buffers[i].status != V4L2BUF_AVAILABLE)
+-                    av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
+-            }
++    if (evt.type == V4L2_EVENT_EOS) {
++        av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n");
++        return AVERROR_EOF;
++    }
++
++    if (evt.type == V4L2_EVENT_SOURCE_CHANGE)
++        return do_source_change(m);
++
++    return 0;
++}
++
++static inline int
++dq_ok(const V4L2Context * const c)
++{
++    return c->streamon && atomic_load(&c->q_count) != 0;
++}
++
++// Get a buffer
++// If output then just gets the buffer in the expected way
++// If capture then runs the capture state m/c to deal with res change etc.
++// If return value == 0 then *ppavbuf != NULL
++
++static int
++get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout)
++{
++    V4L2m2mContext * const m = ctx_to_m2mctx(ctx);
++    AVCodecContext * const avctx = m->avctx;
++    const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type);
++
++    const unsigned int poll_cap = (POLLIN | POLLRDNORM);
++    const unsigned int poll_out = (POLLOUT | POLLWRNORM);
++    const unsigned int poll_event = POLLPRI;
++
++    *ppavbuf = NULL;
++
++    for (;;) {
++        struct pollfd pfd = {
++            .fd = m->fd,
++            // If capture && stream not started then assume we are waiting for the initial event
++            .events = !is_cap ? poll_out :
++                !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap :
++                    poll_event,
++        };
++        int ret;
++
++        if (ctx->done) {
++            av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name);
++            return AVERROR_EOF;
+         }
+-        else
+-            av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
+ 
+-        return NULL;
+-    }
++        // If capture && timeout == -1 then also wait for rx buffer free
++        if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining)
++            pfd.events |= poll_out;
+ 
+-    /* 1. handle resolution changes */
+-    if (pfd.revents & POLLPRI) {
+-        ret = v4l2_handle_event(ctx);
+-        if (ret < 0) {
+-            /* if re-init failed, abort */
+-            ctx->done = 1;
+-            return NULL;
++        // If nothing Qed all we will get is POLLERR - avoid that
++        if ((pfd.events == poll_out && !dq_ok(&m->output)) ||
++            (pfd.events == poll_cap && !dq_ok(&m->capture)) ||
++            (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) {
++            av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name);
++            return AVERROR(ENOSPC);
+         }
+-        if (ret) {
+-            /* if re-init was successful drop the buffer (if there was one)
+-             * since we had to reconfigure capture (unmap all buffers)
+-             */
+-            return NULL;
++
++        // Timeout kludged s.t. "forever" eventually gives up & produces logging
++        // If waiting for an event when we have seen a last_frame then we expect
++        //   it to be ready already so force a short timeout
++        ret = poll(&pfd, 1,
++                   ff_v4l2_ctx_eos(ctx) ? 10 :
++                   timeout == -1 ? 3000 : timeout);
++        if (ret < 0) {
++            ret = AVERROR(errno);  // Remember errno before logging etc.
++            av_assert0(ret < 0);
+         }
+-    }
+ 
+-    /* 2. dequeue the buffer */
+-    if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) {
++        av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n",
++               ctx->name, ret, timeout, pfd.events, pfd.revents);
+ 
+-        if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+-            /* there is a capture buffer ready */
+-            if (pfd.revents & (POLLIN | POLLRDNORM))
+-                goto dequeue;
++        if (ret < 0) {
++            if (ret == AVERROR(EINTR))
++                continue;
++            av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret));
++            return ret;
++        }
+ 
+-            /* the driver is ready to accept more input; instead of waiting for the capture
+-             * buffer to complete we return NULL so input can proceed (we are single threaded)
+-             */
+-            if (pfd.revents & (POLLOUT | POLLWRNORM))
+-                return NULL;
++        if (ret == 0) {
++            if (timeout == -1)
++                av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events);
++            if (ff_v4l2_ctx_eos(ctx)) {
++                av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name);
++                ret = get_event(m);
++                if (ret < 0) {
++                    ctx->done = 1;
++                    return ret;
++                }
++            }
++            return AVERROR(EAGAIN);
+         }
+ 
+-dequeue:
+-        memset(&buf, 0, sizeof(buf));
+-        buf.memory = V4L2_MEMORY_MMAP;
+-        buf.type = ctx->type;
+-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+-            memset(planes, 0, sizeof(planes));
+-            buf.length = VIDEO_MAX_PLANES;
+-            buf.m.planes = planes;
++        if ((pfd.revents & POLLERR) != 0) {
++            av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name);
++            return AVERROR_UNKNOWN;
+         }
+ 
+-        ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf);
+-        if (ret) {
+-            if (errno != EAGAIN) {
++        if ((pfd.revents & poll_event) != 0) {
++            ret = get_event(m);
++            if (ret < 0) {
+                 ctx->done = 1;
+-                if (errno != EPIPE)
+-                    av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
+-                        ctx->name, av_err2str(AVERROR(errno)));
++                return ret;
+             }
+-            return NULL;
++            continue;
+         }
+ 
+-        if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+-            int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ?
+-                            buf.m.planes[0].bytesused : buf.bytesused;
+-            if (bytesused == 0) {
+-                ctx->done = 1;
+-                return NULL;
+-            }
+-#ifdef V4L2_BUF_FLAG_LAST
+-            if (buf.flags & V4L2_BUF_FLAG_LAST)
+-                ctx->done = 1;
+-#endif
++        if ((pfd.revents & poll_cap) != 0) {
++            ret = dq_buf(ctx, ppavbuf);
++            if (ret == AVERROR(EPIPE))
++                continue;
++            return ret;
+         }
+ 
+-        avbuf = &ctx->buffers[buf.index];
+-        avbuf->status = V4L2BUF_AVAILABLE;
+-        avbuf->buf = buf;
+-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+-            memcpy(avbuf->planes, planes, sizeof(planes));
+-            avbuf->buf.m.planes = avbuf->planes;
++        if ((pfd.revents & poll_out) != 0) {
++            if (is_cap)
++                return AVERROR(EAGAIN);
++            return dq_buf(ctx, ppavbuf);
+         }
+-        return avbuf;
++
++        av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents);
++        return AVERROR_UNKNOWN;
+     }
++}
+ 
+-    return NULL;
++// Clear out flags and timestamps that should should be set by the user
++// Returns the passed avbuf
++static V4L2Buffer *
++clean_v4l2_buffer(V4L2Buffer * const avbuf)
++{
++    struct v4l2_buffer *const buf = &avbuf->buf;
++
++    buf->flags = 0;
++    buf->field = V4L2_FIELD_ANY;
++    buf->timestamp = (struct timeval){0};
++    buf->timecode = (struct v4l2_timecode){0};
++    buf->sequence = 0;
++
++    return avbuf;
++}
++
++int
++ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1)
++{
++    V4L2Buffer * avbuf;
++    if (timeout1 != 0) {
++        int rv = get_qbuf(ctx, &avbuf, timeout1);
++        if (rv != 0)
++            return rv;
++    }
++    do {
++        get_qbuf(ctx, &avbuf, 0);
++    } while (avbuf);
++    return 0;
+ }
+ 
+ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
+ {
+-    int timeout = 0; /* return when no more buffers to dequeue */
+     int i;
+ 
+     /* get back as many output buffers as possible */
+-    if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
+-          do {
+-          } while (v4l2_dequeue_v4l2buf(ctx, timeout));
+-    }
++    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
++        ff_v4l2_dq_all(ctx, 0);
+ 
+     for (i = 0; i < ctx->num_buffers; i++) {
+-        if (ctx->buffers[i].status == V4L2BUF_AVAILABLE)
+-            return &ctx->buffers[i];
++        V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
++        if (avbuf->status == V4L2BUF_AVAILABLE)
++            return clean_v4l2_buffer(avbuf);
+     }
+ 
+     return NULL;
+@@ -443,25 +743,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
+ 
+ static int v4l2_release_buffers(V4L2Context* ctx)
+ {
+-    struct v4l2_requestbuffers req = {
+-        .memory = V4L2_MEMORY_MMAP,
+-        .type = ctx->type,
+-        .count = 0, /* 0 -> unmaps buffers from the driver */
+-    };
+-    int i, j;
++    int i;
++    int ret = 0;
++    const int fd = ctx_to_m2mctx(ctx)->fd;
+ 
+-    for (i = 0; i < ctx->num_buffers; i++) {
+-        V4L2Buffer *buffer = &ctx->buffers[i];
++    // Orphan any buffers in the wild
++    ff_weak_link_break(&ctx->wl_master);
++
++    if (ctx->bufrefs) {
++        for (i = 0; i < ctx->num_buffers; i++)
++            av_buffer_unref(ctx->bufrefs + i);
++    }
+ 
+-        for (j = 0; j < buffer->num_planes; j++) {
+-            struct V4L2Plane_info *p = &buffer->plane_info[j];
+-            if (p->mm_addr && p->length)
+-                if (munmap(p->mm_addr, p->length) < 0)
+-                    av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno)));
++    if (fd != -1) {
++        struct v4l2_requestbuffers req = {
++            .memory = V4L2_MEMORY_MMAP,
++            .type = ctx->type,
++            .count = 0, /* 0 -> unmap all buffers from the driver */
++        };
++
++        while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) {
++            if (errno == EINTR)
++                continue;
++
++            ret = AVERROR(errno);
++
++            av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n",
++                ctx->name, av_err2str(AVERROR(errno)));
++
++            if (ctx_to_m2mctx(ctx)->output_drm)
++                av_log(logger(ctx), AV_LOG_ERROR,
++                    "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n"
++                    "for all buffers: \n"
++                    "  1. drmModeRmFB(..)\n"
++                    "  2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");
+         }
+     }
++    atomic_store(&ctx->q_count, 0);
+ 
+-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
++    return ret;
+ }
+ 
+ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
+@@ -490,6 +810,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm
+ 
+ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
+ {
++    V4L2m2mContext* s = ctx_to_m2mctx(ctx);
++    V4L2m2mPriv *priv = s->avctx->priv_data;
+     enum AVPixelFormat pixfmt = ctx->av_pix_fmt;
+     struct v4l2_fmtdesc fdesc;
+     int ret;
+@@ -503,21 +825,22 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
+             return 0;
+     }
+ 
+-    for (;;) {
++    for (;; ++fdesc.index) {
+         ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc);
+         if (ret)
+             return AVERROR(EINVAL);
+ 
++        if (priv->pix_fmt != AV_PIX_FMT_NONE) {
++            if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt))
++                continue;
++        }
++
+         pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
+         ret = v4l2_try_raw_format(ctx, pixfmt);
+-        if (ret){
+-            fdesc.index++;
+-            continue;
++        if (ret == 0) {
++            *p = pixfmt;
++            return 0;
+         }
+-
+-        *p = pixfmt;
+-
+-        return 0;
+     }
+ 
+     return AVERROR(EINVAL);
+@@ -560,30 +883,131 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p)
+   *
+   *****************************************************************************/
+ 
+-int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
++
++static void flush_all_buffers_status(V4L2Context* const ctx)
++{
++    int i;
++
++    if (!ctx->bufrefs)
++        return;
++
++    for (i = 0; i < ctx->num_buffers; ++i) {
++        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
++        if (buf->status == V4L2BUF_IN_DRIVER)
++            ff_v4l2_buffer_set_avail(buf);
++    }
++    atomic_store(&ctx->q_count, 0);
++}
++
++static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
++{
++    int i;
++    int rv;
++
++    if (!ctx->bufrefs) {
++        rv = ff_v4l2_context_init(ctx);
++        if (rv) {
++            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
++            return rv;
++        }
++    }
++
++    ff_mutex_lock(&ctx->lock);
++    for (i = 0; i < ctx->num_buffers; ++i) {
++        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
++        if (buf->status == V4L2BUF_AVAILABLE) {
++            rv = ff_v4l2_buffer_enqueue(buf);
++            if (rv < 0)
++                break;
++        }
++    }
++    ff_mutex_unlock(&ctx->lock);
++    return rv;
++}
++
++static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx)
+ {
+     int type = ctx->type;
+-    int ret;
++    int ret = 0;
+ 
+-    ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type);
+-    if (ret < 0)
+-        return AVERROR(errno);
++    if (!V4L2_TYPE_IS_OUTPUT(ctx->type))
++        stuff_all_buffers(avctx, ctx);
+ 
+-    ctx->streamon = (cmd == VIDIOC_STREAMON);
++    if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) {
++        ret = AVERROR(errno);
++        av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
++               av_err2str(ret));
++        return ret;
++    }
+ 
+-    return 0;
++    ctx->first_buf = 1;
++    ctx->streamon = 1;
++    ctx->flag_last = 0;
++    av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name);
++    return ret;
++}
++
++static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx)
++{
++    int type = ctx->type;
++    int ret = 0;
++    const int has_bufs = ctx_buffers_alloced(ctx);
++
++    // Avoid doing anything if there is nothing we can do
++    if (!has_bufs && !ctx->streamon)
++        return 0;
++
++    if (has_bufs)
++        ff_mutex_lock(&ctx->lock);
++
++    if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) {
++        ret = AVERROR(errno);
++        av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
++               av_err2str(ret));
++    }
++    else {
++        flush_all_buffers_status(ctx);
++
++        ctx->streamon = 0;
++        ctx->flag_last = 0;
++
++        av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name);
++    }
++
++    if (has_bufs)
++        ff_mutex_unlock(&ctx->lock);
++    return ret;
++}
++
++
++int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
++{
++    AVCodecContext * const avctx = logger(ctx);
++
++    switch (cmd) {
++        case VIDIOC_STREAMOFF:
++            return set_streamoff(avctx, ctx);
++        case VIDIOC_STREAMON:
++            return set_streamon(avctx, ctx);
++        default:
++            av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd);
++            break;
++    }
++    return AVERROR_BUG;
+ }
+ 
+ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
+ {
+-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    V4L2m2mContext *const s = ctx_to_m2mctx(ctx);
++    AVCodecContext *const avctx = s->avctx;
++    int64_t track_ts;
+     V4L2Buffer* avbuf;
+     int ret;
+ 
+     if (!frame) {
+         ret = v4l2_stop_encode(ctx);
+         if (ret)
+-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
++            av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name);
+         s->draining= 1;
+         return 0;
+     }
+@@ -592,23 +1016,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
+     if (!avbuf)
+         return AVERROR(EAGAIN);
+ 
+-    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf);
++    track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame);
++
++    ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts);
+     if (ret)
+         return ret;
+ 
+     return ff_v4l2_buffer_enqueue(avbuf);
+ }
+ 
+-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
++                                   const void * extdata, size_t extlen)
+ {
+     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    AVCodecContext *const avctx = s->avctx;
+     V4L2Buffer* avbuf;
+     int ret;
++    int64_t track_ts;
+ 
+     if (!pkt->size) {
+         ret = v4l2_stop_decode(ctx);
++        // Log but otherwise ignore stop failure
+         if (ret)
+-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name);
++            av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
+         s->draining = 1;
+         return 0;
+     }
+@@ -617,8 +1047,13 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
+     if (!avbuf)
+         return AVERROR(EAGAIN);
+ 
+-    ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf);
+-    if (ret)
++    track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt);
++
++    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts);
++    if (ret == AVERROR(ENOMEM))
++        av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
++               __func__, pkt->size, avbuf->planes[0].length);
++    else if (ret)
+         return ret;
+ 
+     return ff_v4l2_buffer_enqueue(avbuf);
+@@ -626,42 +1061,77 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
+ 
+ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
+ {
++    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    AVCodecContext *const avctx = s->avctx;
+     V4L2Buffer *avbuf;
++    int rv;
+ 
+-    /*
+-     * timeout=-1 blocks until:
+-     *  1. decoded frame available
+-     *  2. an input buffer is ready to be dequeued
+-     */
+-    avbuf = v4l2_dequeue_v4l2buf(ctx, timeout);
+-    if (!avbuf) {
+-        if (ctx->done)
+-            return AVERROR_EOF;
++    do {
++        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
++            return rv;
++        if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0)
++            return rv;
++    } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0);
+ 
+-        return AVERROR(EAGAIN);
+-    }
+-
+-    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
++   return 0;
+ }
+ 
+-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout)
+ {
++    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    AVCodecContext *const avctx = s->avctx;
+     V4L2Buffer *avbuf;
++    int rv;
+ 
+-    /*
+-     * blocks until:
+-     *  1. encoded packet available
+-     *  2. an input buffer ready to be dequeued
+-     */
+-    avbuf = v4l2_dequeue_v4l2buf(ctx, -1);
+-    if (!avbuf) {
+-        if (ctx->done)
+-            return AVERROR_EOF;
++    do {
++        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
++            return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
++        if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0)
++            return rv;
++    } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0);
+ 
+-        return AVERROR(EAGAIN);
++    return 0;
++}
++
++// Return 0 terminated list of drm fourcc video formats for this context
++// NULL if none found or error
++// Returned list is malloced so must be freed
++uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN)
++{
++    unsigned int i;
++    unsigned int n = 0;
++    unsigned int size = 0;
++    uint32_t * e = NULL;
++    *pN = 0;
++
++    for (i = 0; i < 1024; ++i) {
++        struct v4l2_fmtdesc fdesc = {
++            .index = i,
++            .type = ctx->type
++        };
++
++        if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc))
++            return e;
++
++        if (n + 1 >= size) {
++            unsigned int newsize = (size == 0) ? 16 : size * 2;
++            uint32_t * t = av_realloc(e, newsize * sizeof(*t));
++            if (!t)
++                return e;
++            e = t;
++            size = newsize;
++        }
++
++        e[n] = fdesc.pixelformat;
++        e[++n] = 0;
++        if (pN)
++            *pN = n;
+     }
+ 
+-    return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf);
++    // If we've looped 1024 times we are clearly confused
++    *pN = 0;
++    av_free(e);
++    return NULL;
+ }
+ 
+ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
+@@ -693,78 +1163,194 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
+ 
+ int ff_v4l2_context_set_format(V4L2Context* ctx)
+ {
+-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++    int ret;
++
++    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++    if (ret != 0)
++        return ret;
++
++    // Check returned size against min size and if smaller have another go
++    // Only worry about plane[0] as this is meant to enforce limits for
++    // encoded streams where we might know a bit more about the shape
++    // than the driver
++    if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) {
++        if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage)
++            return 0;
++        ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size;
++    }
++    else {
++        if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage)
++            return 0;
++        ctx->format.fmt.pix.sizeimage = ctx->min_buf_size;
++    }
++
++    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++    return ret;
+ }
+ 
+ void ff_v4l2_context_release(V4L2Context* ctx)
+ {
+     int ret;
+ 
+-    if (!ctx->buffers)
++    if (!ctx->bufrefs)
+         return;
+ 
+     ret = v4l2_release_buffers(ctx);
+     if (ret)
+         av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name);
+ 
+-    av_freep(&ctx->buffers);
++    av_freep(&ctx->bufrefs);
++    av_buffer_unref(&ctx->frames_ref);
++
++    ff_mutex_destroy(&ctx->lock);
++    pthread_cond_destroy(&ctx->cond);
+ }
+ 
+-int ff_v4l2_context_init(V4L2Context* ctx)
++
++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem)
+ {
+-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+     struct v4l2_requestbuffers req;
+-    int ret, i;
++    int ret;
++    int i;
+ 
+-    if (!v4l2_type_supported(ctx)) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
+-        return AVERROR_PATCHWELCOME;
+-    }
+-
+-    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
+-    if (ret)
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name);
++    av_assert0(ctx->bufrefs == NULL);
+ 
+     memset(&req, 0, sizeof(req));
+-    req.count = ctx->num_buffers;
+-    req.memory = V4L2_MEMORY_MMAP;
++    req.count = req_buffers;
++    req.memory = mem;
+     req.type = ctx->type;
+-    ret = ioctl(s->fd, VIDIOC_REQBUFS, &req);
+-    if (ret < 0) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno));
+-        return AVERROR(errno);
++    while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) {
++        if (errno != EINTR) {
++            ret = AVERROR(errno);
++            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret));
++            return ret;
++        }
+     }
+ 
+     ctx->num_buffers = req.count;
+-    ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer));
+-    if (!ctx->buffers) {
++    ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs));
++    if (!ctx->bufrefs) {
+         av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name);
+-        return AVERROR(ENOMEM);
++        goto fail_release;
+     }
+ 
+-    for (i = 0; i < req.count; i++) {
+-        ctx->buffers[i].context = ctx;
+-        ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i);
+-        if (ret < 0) {
++    ctx->wl_master = ff_weak_link_new(ctx);
++    if (!ctx->wl_master) {
++        ret = AVERROR(ENOMEM);
++        goto fail_release;
++    }
++
++    for (i = 0; i < ctx->num_buffers; i++) {
++        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem);
++        if (ret) {
+             av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
+-            goto error;
++            goto fail_release;
+         }
+     }
+ 
+     av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name,
+         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat),
+         req.count,
+-        v4l2_get_width(&ctx->format),
+-        v4l2_get_height(&ctx->format),
++        ff_v4l2_get_format_width(&ctx->format),
++        ff_v4l2_get_format_height(&ctx->format),
+         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage,
+         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline);
+ 
+     return 0;
+ 
+-error:
++fail_release:
+     v4l2_release_buffers(ctx);
++    av_freep(&ctx->bufrefs);
++    return ret;
++}
++
++int ff_v4l2_context_frames_set(V4L2Context *const ctx)
++{
++    AVHWFramesContext *hwframes;
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++    const int w = ctx->width != 0 ? ctx->width : s->avctx->width;
++    const int h = ctx->height != 0 ? ctx->height : s->avctx->height;
++    int ret;
++
++    if (ctx->frames_ref != NULL) {
++        const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data;
++        if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h)
++            return 0;
++        av_buffer_unref(&ctx->frames_ref);
++    }
++
++    ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
++    if (!ctx->frames_ref)
++        return AVERROR(ENOMEM);
+ 
+-    av_freep(&ctx->buffers);
++    hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
++    hwframes->format = AV_PIX_FMT_DRM_PRIME;
++    hwframes->sw_format = ctx->av_pix_fmt;
++    hwframes->width = w;
++    hwframes->height = h;
++    ret = av_hwframe_ctx_init(ctx->frames_ref);
++    if (ret < 0) {
++        av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret));
++        av_buffer_unref(&ctx->frames_ref);
++        return ret;
++    }
++
++    av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__,
++           av_get_pix_fmt_name(ctx->av_pix_fmt), w, h);
++    return 0;
++}
++
++int ff_v4l2_context_init(V4L2Context* ctx)
++{
++    struct v4l2_queryctrl qctrl;
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++    int ret;
++
++    // It is not valid to reinit a context without a previous release
++    av_assert0(ctx->bufrefs == NULL);
++
++    if (!v4l2_type_supported(ctx)) {
++        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
++        return AVERROR_PATCHWELCOME;
++    }
++
++    ff_mutex_init(&ctx->lock, NULL);
++    pthread_cond_init(&ctx->cond, NULL);
++    atomic_init(&ctx->q_count, 0);
++
++    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
++    if (ret) {
++        ret = AVERROR(errno);
++        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret));
++        goto fail_unlock;
++    }
++
++    memset(&qctrl, 0, sizeof(qctrl));
++    qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT;
++    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) {
++        ret = AVERROR(errno);
++        if (ret != AVERROR(EINVAL)) {
++            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret));
++            goto fail_unlock;
++        }
++        // Control unsupported - set default if wanted
++        if (ctx->num_buffers < 2)
++            ctx->num_buffers = 4;
++    }
++    else {
++        if (ctx->num_buffers < 2)
++            ctx->num_buffers = qctrl.minimum + 2;
++        ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum);
++    }
++
++    ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
++    if (ret < 0)
++        goto fail_unlock;
++
++    return 0;
+ 
++fail_unlock:
++    ff_mutex_destroy(&ctx->lock);
+     return ret;
+ }
+diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
+index 6f7460c89a9d..9f1c05a918ff 100644
+--- a/libavcodec/v4l2_context.h
++++ b/libavcodec/v4l2_context.h
+@@ -32,6 +32,8 @@
+ #include "libavutil/rational.h"
+ #include "codec_id.h"
+ #include "packet.h"
++#include "libavutil/buffer.h"
++#include "libavutil/thread.h"
+ #include "v4l2_buffers.h"
+ 
+ typedef struct V4L2Context {
+@@ -71,28 +73,57 @@ typedef struct V4L2Context {
+      */
+     int width, height;
+     AVRational sample_aspect_ratio;
++    struct v4l2_rect selection;
+ 
+     /**
+-     * Indexed array of V4L2Buffers
++     * If the default size of buffer is less than this then try to
++     * set to this.
+      */
+-    V4L2Buffer *buffers;
++    uint32_t min_buf_size;
++
++    /**
++     * Indexed array of pointers to V4L2Buffers
++     */
++    AVBufferRef **bufrefs;
+ 
+     /**
+      * Readonly after init.
+      */
+     int num_buffers;
+ 
++    /**
++     * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF
++     */
++    enum v4l2_memory buf_mem;
++
+     /**
+      * Whether the stream has been started (VIDIOC_STREAMON has been sent).
+      */
+     int streamon;
+ 
++    /* 1st buffer after stream on */
++    int first_buf;
++
+     /**
+      *  Either no more buffers available or an unrecoverable error was notified
+      *  by the V4L2 kernel driver: once set the context has to be exited.
+      */
+     int done;
+ 
++    int flag_last;
++
++    /**
++     * If NZ then when Qing frame/pkt use this rather than the
++     * "real" PTS
++     */
++    uint64_t track_ts;
++
++    AVBufferRef *frames_ref;
++    atomic_int q_count;
++    struct ff_weak_link_master *wl_master;
++
++    AVMutex lock;
++    pthread_cond_t cond;
+ } V4L2Context;
+ 
+ /**
+@@ -103,6 +134,14 @@ typedef struct V4L2Context {
+  */
+ int ff_v4l2_context_init(V4L2Context* ctx);
+ 
++/**
++ * (re)set the hwframecontext from the current v4l2 context
++ *
++ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables.
++ * @return 0 in case of success, a negative value representing the error otherwise.
++ */
++int ff_v4l2_context_frames_set(V4L2Context *const ctx);
++
+ /**
+  * Sets the V4L2Context format in the v4l2 driver.
+  *
+@@ -120,6 +159,19 @@ int ff_v4l2_context_set_format(V4L2Context* ctx);
+  */
+ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe);
+ 
++/**
++ * Get the list of drm fourcc pixel formats for this context
++ *
++ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context
++ *       description for required variables.
++ * @param[in] pN A pointer to receive the number of formats
++ *       found. May be NULL if not wanted.
++ * @return Pointer to malloced list of zero terminated formats,
++ *         NULL if none or error. As list is malloced it must be
++ *         freed.
++ */
++uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN);
++
+ /**
+  * Releases a V4L2Context.
+  *
+@@ -148,7 +200,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd);
+  * @param[inout] pkt The AVPacket to dequeue to.
+  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
+  */
+-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout);
+ 
+ /**
+  * Dequeues a buffer from a V4L2Context to an AVFrame.
+@@ -157,7 +209,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
+  * @param[in] ctx The V4L2Context to dequeue from.
+  * @param[inout] f The AVFrame to dequeue to.
+  * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
++ *
+  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
++ *                AVERROR(ENOSPC) if no buffer availible to put
++ *                the frame in
+  */
+ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
+ 
+@@ -171,7 +226,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
+  * @param[in] pkt A pointer to an AVPacket.
+  * @return 0 in case of success, a negative error otherwise.
+  */
+-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size);
+ 
+ /**
+  * Enqueues a buffer to a V4L2Context from an AVFrame
+@@ -184,4 +239,28 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
+  */
+ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
+ 
++/**
++ * Dequeue all buffers on this queue
++ *
++ * Used to recycle output buffers
++ *
++ * @param[in] ctx The V4L2Context to dequeue from.
++ * @param[in] timeout1 A timeout on dequeuing the 1st buffer, 
++ *       all others have a timeout of zero
++ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return
++ *         of the first dequeue operation, 0 otherwise.
++ */
++int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1);
++
++/**
++ * Returns the number of buffers currently queued
++ *
++ * @param[in] ctx The V4L2Context to evaluate
++ */
++static inline int
++ff_v4l2_context_q_count(const V4L2Context* const ctx)
++{
++    return atomic_load(&ctx->q_count);
++}
++
+ #endif // AVCODEC_V4L2_CONTEXT_H
+diff --git a/libavcodec/v4l2_fmt.c b/libavcodec/v4l2_fmt.c
+index 6df47e3f5a3c..c820a1d5227b 100644
+--- a/libavcodec/v4l2_fmt.c
++++ b/libavcodec/v4l2_fmt.c
+@@ -42,6 +42,14 @@ static const struct fmt_conversion {
+     { AV_FMT(RGB24),       AV_CODEC(RAWVIDEO),    V4L2_FMT(RGB24) },
+     { AV_FMT(BGR0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGR32) },
+     { AV_FMT(0RGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGB32) },
++    { AV_FMT(BGR0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGRX32) },
++    { AV_FMT(RGB0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGBX32) },
++    { AV_FMT(0BGR),        AV_CODEC(RAWVIDEO),    V4L2_FMT(XBGR32) },
++    { AV_FMT(0RGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(XRGB32) },
++    { AV_FMT(BGRA),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGRA32) },
++    { AV_FMT(RGBA),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGBA32) },
++    { AV_FMT(ABGR),        AV_CODEC(RAWVIDEO),    V4L2_FMT(ABGR32) },
++    { AV_FMT(ARGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(ARGB32) },
+     { AV_FMT(GRAY8),       AV_CODEC(RAWVIDEO),    V4L2_FMT(GREY) },
+     { AV_FMT(YUV420P),     AV_CODEC(RAWVIDEO),    V4L2_FMT(YUV420) },
+     { AV_FMT(YUYV422),     AV_CODEC(RAWVIDEO),    V4L2_FMT(YUYV) },
+diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
+index 15415cfc4eea..286191f6620c 100644
+--- a/libavcodec/v4l2_m2m.c
++++ b/libavcodec/v4l2_m2m.c
+@@ -36,6 +36,15 @@
+ #include "v4l2_context.h"
+ #include "v4l2_fmt.h"
+ #include "v4l2_m2m.h"
++#include "v4l2_req_dmabufs.h"
++
++static void
++xlat_init(xlat_track_t * const x)
++{
++    memset(x, 0, sizeof(*x));
++    x->last_pts = AV_NOPTS_VALUE;
++}
++
+ 
+ static inline int v4l2_splane_video(struct v4l2_capability *cap)
+ {
+@@ -69,7 +78,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
+ 
+     s->capture.done = s->output.done = 0;
+     s->capture.name = "capture";
++    s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
+     s->output.name = "output";
++    s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
+     atomic_init(&s->refcount, 0);
+     sem_init(&s->refsync, 0, 0);
+ 
+@@ -86,18 +97,58 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
+     if (v4l2_mplane_video(&cap)) {
+         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
++        s->output.format.type = s->output.type;
+         return 0;
+     }
+ 
+     if (v4l2_splane_video(&cap)) {
+         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
++        s->output.format.type = s->output.type;
+         return 0;
+     }
+ 
+     return AVERROR(EINVAL);
+ }
+ 
++static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
++{
++    struct v4l2_format fmt = {.type = s->output.type};
++    int rv;
++    uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt);
++    unsigned int w;
++    unsigned int h;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
++        fmt.fmt.pix_mp.pixelformat = pixfmt;
++        fmt.fmt.pix_mp.width = avctx->width;
++        fmt.fmt.pix_mp.height = avctx->height;
++    }
++    else {
++        fmt.fmt.pix.pixelformat = pixfmt;
++        fmt.fmt.pix.width = avctx->width;
++        fmt.fmt.pix.height = avctx->height;
++    }
++
++    rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt);
++
++    if (rv != 0) {
++        rv = AVERROR(errno);
++        av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv));
++        return rv;
++    }
++
++    w = ff_v4l2_get_format_width(&fmt);
++    h = ff_v4l2_get_format_height(&fmt);
++
++    if (w < avctx->width || h < avctx->height) {
++        av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h);
++        return AVERROR(EINVAL);
++    }
++
++    return 0;
++}
++
+ static int v4l2_probe_driver(V4L2m2mContext *s)
+ {
+     void *log_ctx = s->avctx;
+@@ -117,6 +168,11 @@ static int v4l2_probe_driver(V4L2m2mContext *s)
+         goto done;
+     }
+ 
++    // If being given frames (encode) check that V4L2 can cope with the size
++    if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO &&
++        (ret = check_size(s->avctx, s)) != 0)
++        goto done;
++
+     ret = ff_v4l2_context_get_format(&s->capture, 1);
+     if (ret) {
+         av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n");
+@@ -218,13 +274,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
+         av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
+ 
+     /* 2. unmap the capture buffers (v4l2 and ffmpeg):
+-     *    we must wait for all references to be released before being allowed
+-     *    to queue new buffers.
+      */
+-    av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n");
+-    if (atomic_load(&s->refcount))
+-        while(sem_wait(&s->refsync) == -1 && errno == EINTR);
+-
+     ff_v4l2_context_release(&s->capture);
+ 
+     /* 3. get the new capture format */
+@@ -243,7 +293,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s)
+ 
+     /* 5. complete reinit */
+     s->draining = 0;
+-    s->reinit = 0;
+ 
+     return 0;
+ }
+@@ -259,6 +308,9 @@ static void v4l2_m2m_destroy_context(FFRefStructOpaque unused, void *context)
+         close(s->fd);
+     av_frame_free(&s->frame);
+     av_packet_unref(&s->buf_pkt);
++    av_freep(&s->extdata_data);
++
++    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n");
+ }
+ 
+ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
+@@ -269,6 +321,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
+     if (!s)
+         return 0;
+ 
++    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n");
++
++    if (s->avctx && av_codec_is_decoder(s->avctx->codec))
++        av_packet_unref(&s->buf_pkt);
++
+     if (s->fd >= 0) {
+         ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
+         if (ret)
+@@ -280,6 +337,14 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
+     }
+ 
+     ff_v4l2_context_release(&s->output);
++    av_buffer_unref(&s->device_ref);
++
++    dmabufs_ctl_unref(&s->db_ctl);
++
++    if (s->fd != -1) {
++        close(s->fd);
++        s->fd = -1;
++    }
+ 
+     s->self_ref = NULL;
+     ff_refstruct_unref(&priv->context);
+@@ -341,6 +406,7 @@ int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s)
+     priv->context->output.num_buffers  = priv->num_output_buffers;
+     priv->context->self_ref = priv->context;
+     priv->context->fd = -1;
++    xlat_init(&priv->context->xlat);
+ 
+     priv->context->frame = av_frame_alloc();
+     if (!priv->context->frame) {
+diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
+index 4ba33dc33593..cb49e0c2fe66 100644
+--- a/libavcodec/v4l2_m2m.h
++++ b/libavcodec/v4l2_m2m.h
+@@ -30,6 +30,7 @@
+ #include <linux/videodev2.h>
+ 
+ #include "libavcodec/avcodec.h"
++#include "libavutil/pixfmt.h"
+ #include "v4l2_context.h"
+ 
+ #define container_of(ptr, type, member) ({ \
+@@ -40,6 +41,38 @@
+     { "num_output_buffers", "Number of buffers in the output context",\
+         OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS }
+ 
++#define FF_V4L2_M2M_TRACK_SIZE 128
++typedef struct V4L2m2mTrackEl {
++    int     discard;   // If we see this buffer its been flushed, so discard
++    int     pending;
++    int64_t pts;
++    int64_t dts;
++#if FF_API_FRAME_PKT
++    int64_t pkt_pos;
++    int     pkt_size;
++#endif
++    int64_t duration;
++    int64_t track_pts;
++} V4L2m2mTrackEl;
++
++typedef struct pts_stats_s
++{
++    void * logctx;
++    const char * name;  // For debug
++    unsigned int last_count;
++    unsigned int last_interval;
++    int64_t last_pts;
++    int64_t guess;
++} pts_stats_t;
++
++typedef struct xlat_track_s {
++    unsigned int track_no;
++    int64_t last_pts;    // Last valid PTS decoded
++    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
++} xlat_track_t;
++
++struct dmabufs_ctl;
++
+ typedef struct V4L2m2mContext {
+     char devname[PATH_MAX];
+     int fd;
+@@ -52,10 +85,10 @@ typedef struct V4L2m2mContext {
+     AVCodecContext *avctx;
+     sem_t refsync;
+     atomic_uint refcount;
+-    int reinit;
+ 
+     /* null frame/packet received */
+     int draining;
++    int running;
+     AVPacket buf_pkt;
+ 
+     /* Reference to a frame. Only used during encoding */
+@@ -66,6 +99,36 @@ typedef struct V4L2m2mContext {
+ 
+     /* reference back to V4L2m2mPriv */
+     void *priv;
++
++    AVBufferRef *device_ref;
++
++    /* generate DRM frames */
++    int output_drm;
++
++    /* input frames are drmprime */
++    int input_drm;
++
++    /* Frame tracking */
++    xlat_track_t xlat;
++
++    pts_stats_t pts_stat;
++
++    /* req pkt */
++    int req_pkt;
++    int reorder_size;
++
++    /* Ext data sent */
++    int extdata_sent;
++    /* Ext data sent in packet - overrides ctx */
++    void * extdata_data;
++    size_t extdata_size;
++
++#define FF_V4L2_QUIRK_REINIT_ALWAYS             1
++#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN    2
++    /* Quirks */
++    unsigned int quirks;
++
++    struct dmabufs_ctl * db_ctl;
+ } V4L2m2mContext;
+ 
+ typedef struct V4L2m2mPriv {
+@@ -75,6 +138,8 @@ typedef struct V4L2m2mPriv {
+ 
+     int num_output_buffers;
+     int num_capture_buffers;
++    const char * dmabuf_alloc;
++    enum AVPixelFormat pix_fmt;
+ } V4L2m2mPriv;
+ 
+ /**
+@@ -128,4 +193,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx);
+  */
+ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
+ 
++
++static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
++}
++
++static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
++}
++
++static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
++}
++
++static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx)
++{
++    return ctx->flag_last;
++}
++
++
+ #endif /* AVCODEC_V4L2_M2M_H */
+diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
+index aa2d759e1ea5..d3c3820379dd 100644
+--- a/libavcodec/v4l2_m2m_dec.c
++++ b/libavcodec/v4l2_m2m_dec.c
+@@ -21,8 +21,15 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
++#include "config_components.h"
++
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
++
++#include "libavutil/avassert.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavutil/mem.h"
+ #include "libavutil/pixfmt.h"
+ #include "libavutil/pixdesc.h"
+ #include "libavutil/opt.h"
+@@ -30,75 +37,279 @@
+ #include "codec_internal.h"
+ #include "libavcodec/decode.h"
+ 
++#include "libavcodec/hwaccels.h"
++#include "libavcodec/internal.h"
++#include "libavcodec/hwconfig.h"
++
+ #include "v4l2_context.h"
+ #include "v4l2_m2m.h"
+ #include "v4l2_fmt.h"
++#include "v4l2_req_dmabufs.h"
+ 
+-static int v4l2_try_start(AVCodecContext *avctx)
++#if CONFIG_H264_DECODER
++#include "h264_parse.h"
++#endif
++#if CONFIG_HEVC_DECODER
++#include "hevc/parse.h"
++#endif
++
++// Pick 64 for max last count - that is >1sec at 60fps
++#define STATS_LAST_COUNT_MAX 64
++#define STATS_INTERVAL_MAX (1 << 30)
++
++#ifndef FF_API_BUFFER_SIZE_T
++#define FF_API_BUFFER_SIZE_T 1
++#endif
++
++#define DUMP_FAILED_EXTRADATA 0
++
++#if DUMP_FAILED_EXTRADATA
++static inline char hex1(unsigned int x)
+ {
+-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+-    V4L2Context *const capture = &s->capture;
+-    V4L2Context *const output = &s->output;
+-    struct v4l2_selection selection = { 0 };
+-    int ret;
++    x &= 0xf;
++    return x <= 9 ? '0' + x : 'a' + x - 10;
++}
+ 
+-    /* 1. start the output process */
+-    if (!output->streamon) {
+-        ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON);
+-        if (ret < 0) {
+-            av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n");
+-            return ret;
+-        }
++static inline char * hex2(char * s, unsigned int x)
++{
++    *s++ = hex1(x >> 4);
++    *s++ = hex1(x);
++    return s;
++}
++
++static inline char * hex4(char * s, unsigned int x)
++{
++    s = hex2(s, x >> 8);
++    s = hex2(s, x);
++    return s;
++}
++
++static inline char * dash2(char * s)
++{
++    *s++ = '-';
++    *s++ = '-';
++    return s;
++}
++
++static void
++data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len)
++{
++    size_t i;
++    s = hex4(s, offset);
++    m += offset;
++    for (i = 0; i != 8; ++i) {
++        *s++ = ' ';
++        s = len > i + offset ? hex2(s, *m++) : dash2(s);
+     }
++    *s++ = ' ';
++    *s++ = ':';
++    for (; i != 16; ++i) {
++        *s++ = ' ';
++        s = len > i + offset ? hex2(s, *m++) : dash2(s);
++    }
++    *s++ = 0;
++}
+ 
+-    if (capture->streamon)
+-        return 0;
++static void
++log_dump(void * logctx, int lvl, const void * const data, const size_t len)
++{
++    size_t i;
++    for (i = 0; i < len; i += 16) {
++        char buf[80];
++        data16(buf, i, data, len);
++        av_log(logctx, lvl, "%s\n", buf);
++    }
++}
++#endif
+ 
+-    /* 2. get the capture format */
+-    capture->format.type = capture->type;
+-    ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format);
+-    if (ret) {
+-        av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n");
+-        return ret;
++static unsigned int pts_stats_interval(const pts_stats_t * const stats)
++{
++    return stats->last_interval;
++}
++
++static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess)
++{
++    if (stats->last_count <= 1)
++        return stats->last_pts;
++    if (stats->last_pts == AV_NOPTS_VALUE ||
++            fail_bad_guess && (stats->last_interval == 0 ||
++                               stats->last_count >= STATS_LAST_COUNT_MAX))
++        return AV_NOPTS_VALUE;
++    return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
++}
++
++static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
++{
++    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
++        if (stats->last_count < STATS_LAST_COUNT_MAX)
++            ++stats->last_count;
++        return;
+     }
+ 
+-    /* 2.1 update the AVCodecContext */
+-    avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
+-    capture->av_pix_fmt = avctx->pix_fmt;
++    if (stats->last_pts != AV_NOPTS_VALUE) {
++        const int64_t interval = pts - stats->last_pts;
+ 
+-    /* 3. set the crop parameters */
+-    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+-    selection.r.height = avctx->coded_height;
+-    selection.r.width = avctx->coded_width;
+-    ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
+-    if (!ret) {
+-        ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
+-        if (ret) {
+-            av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
+-        } else {
+-            av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height);
+-            /* update the size of the resulting frame */
+-            capture->height = selection.r.height;
+-            capture->width  = selection.r.width;
++        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
++            stats->last_count >= STATS_LAST_COUNT_MAX) {
++            if (stats->last_interval != 0)
++                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
++                       __func__, stats->name, interval, stats->last_count);
++            stats->last_interval = 0;
++        }
++        else {
++            const int64_t frame_time = interval / (int64_t)stats->last_count;
++
++            if (frame_time != stats->last_interval)
++                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
++                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
++            stats->last_interval = frame_time;
+         }
+     }
+ 
+-    /* 4. init the capture context now that we have the capture format */
+-    if (!capture->buffers) {
+-        ret = ff_v4l2_context_init(capture);
+-        if (ret) {
+-            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
+-            return AVERROR(ENOMEM);
++    stats->last_pts = pts;
++    stats->last_count = 1;
++}
++
++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
++{
++    *stats = (pts_stats_t){
++        .logctx = logctx,
++        .name = name,
++        .last_count = 1,
++        .last_interval = 0,
++        .last_pts = AV_NOPTS_VALUE
++    };
++}
++
++// If abdata == NULL then this just counts space required
++// Unpacks avcC if detected
++static int
++h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata)
++{
++    const uint8_t * const xdend = extradata + extrasize;
++    const uint8_t * p = extradata;
++    uint8_t * d = abdata;
++    unsigned int n;
++    unsigned int len;
++    const unsigned int hdrlen = 4;
++    unsigned int need_pps = 1;
++
++    if (extrasize < 8)
++        return AVERROR(EINVAL);
++
++    if (p[0] == 0 && p[1] == 0) {
++        // Assume a couple of leading zeros are good enough to indicate NAL
++        if (abdata)
++            memcpy(d, p, extrasize);
++        return extrasize;
++    }
++
++    // avcC starts with a 1
++    if (p[0] != 1)
++        return AVERROR(EINVAL);
++
++    p += 5;
++    n = *p++ & 0x1f;
++
++doxps:
++    while (n--) {
++        if (xdend - p < 2)
++            return AVERROR(EINVAL);
++        len = (p[0] << 8) | p[1];
++        p += 2;
++        if (xdend - p < (ptrdiff_t)len)
++            return AVERROR(EINVAL);
++        if (abdata) {
++            d[0] = 0;
++            d[1] = 0;
++            d[2] = 0;
++            d[3] = 1;
++            memcpy(d + 4, p, len);
+         }
++        d += len + hdrlen;
++        p += len;
++    }
++    if (need_pps) {
++        need_pps = 0;
++        if (p >= xdend)
++            return AVERROR(EINVAL);
++        n = *p++;
++        goto doxps;
+     }
+ 
+-    /* 5. start the capture process */
+-    ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
+-    if (ret) {
+-        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n");
++    return d - abdata;
++}
++
++static int
++copy_extradata(AVCodecContext * const avctx,
++               const void * const src_data, const int src_len,
++               void ** const pdst_data, size_t * const pdst_len)
++{
++    int len;
++
++    *pdst_len = 0;
++    av_freep(pdst_data);
++
++    if (avctx->codec_id == AV_CODEC_ID_H264)
++        len = h264_xd_copy(src_data, src_len, NULL);
++    else
++        len = src_len < 0 ? AVERROR(EINVAL) : src_len;
++
++    // Zero length is OK but we want to stop - -ve is error val
++    if (len <= 0)
++        return len;
++
++    if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
++        return AVERROR(ENOMEM);
++
++    if (avctx->codec_id == AV_CODEC_ID_H264)
++        h264_xd_copy(src_data, src_len, *pdst_data);
++    else
++        memcpy(*pdst_data, src_data, len);
++    *pdst_len = len;
++
++    return 0;
++}
++
++
++
++static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
++{
++    int ret;
++    struct v4l2_decoder_cmd cmd = {
++        .cmd = V4L2_DEC_CMD_START,
++        .flags = 0,
++    };
++
++    if (s->output.streamon)
++        return 0;
++
++    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
++    if (ret != 0) {
++        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret));
+         return ret;
+     }
+ 
++    // STREAMON should do implicit START so this just for those that don't.
++    // It is optional so don't worry if it fails
++    if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) {
++        ret = AVERROR(errno);
++        av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret));
++    }
++    else {
++        av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n");
++    }
++    return 0;
++}
++
++static int v4l2_try_start(AVCodecContext *avctx)
++{
++    V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
++    int ret;
++
++    /* 1. start the output process */
++    if ((ret = check_output_streamon(avctx, s)) != 0)
++        return ret;
+     return 0;
+ }
+ 
+@@ -133,51 +344,822 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s)
+     return 0;
+ }
+ 
+-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++static void
++set_best_effort_pts(AVCodecContext *const avctx,
++             pts_stats_t * const ps,
++             AVFrame *const frame)
++{
++    pts_stats_add(ps, frame->pts);
++
++    frame->best_effort_timestamp = pts_stats_guess(ps, 1);
++    // If we can't guess from just PTS - try DTS
++    if (frame->best_effort_timestamp == AV_NOPTS_VALUE)
++        frame->best_effort_timestamp = frame->pkt_dts;
++
++    // We can't emulate what s/w does in a useful manner and using the
++    // "correct" answer seems to just confuse things.
++    frame->pkt_dts               = frame->pts;
++    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n",
++           frame->pts, frame->best_effort_timestamp, frame->pkt_dts);
++}
++
++static void
++xlat_flush(xlat_track_t * const x)
++{
++    unsigned int i;
++    // Do not reset track_no - this ensures that any frames left in the decoder
++    // that turn up later get discarded.
++
++    x->last_pts = AV_NOPTS_VALUE;
++    for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) {
++        x->track_els[i].pending = 0;
++        x->track_els[i].discard = 1;
++    }
++}
++
++static void
++xlat_init(xlat_track_t * const x)
++{
++    memset(x, 0, sizeof(*x));
++    xlat_flush(x);
++}
++
++static int
++xlat_pending(const V4L2m2mContext * const s)
++{
++    const xlat_track_t *const x = &s->xlat;
++    unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE;
++    int i;
++    const int64_t now = pts_stats_guess(&s->pts_stat, 0);
++    int64_t first_dts = AV_NOPTS_VALUE;
++    int no_dts_count = 0;
++    unsigned int interval = pts_stats_interval(&s->pts_stat);
++
++    for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) {
++        const V4L2m2mTrackEl * const t = x->track_els + n;
++
++        if (first_dts == AV_NOPTS_VALUE)
++            if (t->dts == AV_NOPTS_VALUE)
++                ++no_dts_count;
++            else
++                first_dts = t->dts;
++
++        // Discard only set on never-set or flushed entries
++        // So if we get here we've never successfully decoded a frame so allow
++        // more frames into the buffer before stalling
++        if (t->discard)
++            return i - 16;
++
++        // If we've got this frame out then everything before this point
++        // must have entered the decoder
++        if (!t->pending)
++            break;
++
++        // If we've never seen a pts all we can do is count frames
++        if (now == AV_NOPTS_VALUE)
++            continue;
++
++        if (t->dts != AV_NOPTS_VALUE && now >= t->dts)
++            break;
++    }
++
++    if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) {
++        const int iframes = (first_dts - now) / (int)interval;
++        const int t = iframes - s->reorder_size + no_dts_count;
++
++//        av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n",
++//               x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count);
++
++        if (iframes > 0 && iframes < 64 && t < i) {
++            return t;
++        }
++    }
++
++    return i;
++}
++
++static inline int stream_started(const V4L2m2mContext * const s) {
++    return s->output.streamon;
++}
++
++#define NQ_OK        0
++#define NQ_Q_FULL    1
++#define NQ_SRC_EMPTY 2
++#define NQ_NONE      3
++#define NQ_DRAINING  4
++#define NQ_DEAD      5
++
++#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
++#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE)
++
++// do_not_get      If true then no new packet will be got but status will
++//                  be set appropriately
++
++// AVERROR_EOF     Flushing an already flushed stream
++// -ve             Error (all errors except EOF are unexpected)
++// NQ_OK (0)       OK
++// NQ_Q_FULL       Dst full (retry if we think V4L2 Q has space now)
++// NQ_SRC_EMPTY    Src empty (do not retry)
++// NQ_NONE         Enqueue not attempted
++// NQ_DRAINING     At EOS, dQ dest until EOS there too
++// NQ_DEAD         Not running (do not retry, do not attempt capture dQ)
++
++static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get)
+ {
+-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+-    V4L2Context *const capture = &s->capture;
+-    V4L2Context *const output = &s->output;
+     int ret;
+ 
+-    if (!s->buf_pkt.size) {
+-        ret = ff_decode_get_packet(avctx, &s->buf_pkt);
++    // If we don't already have a coded packet - get a new one
++    // We will already have a coded pkt if the output Q was full last time we
++    // tried to Q it
++    if (!s->buf_pkt.size && !do_not_get) {
++        unsigned int i;
++
++        for (i = 0; i < 256; ++i) {
++            uint8_t * side_data;
++            size_t side_size;
++
++            ret = ff_decode_get_packet(avctx, &s->buf_pkt);
++            if (ret != 0)
++                break;
++
++            // New extradata is the only side-data we undertand
++            side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
++            if (side_data) {
++                av_log(avctx, AV_LOG_DEBUG, "New extradata\n");
++                if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0)
++                    av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret));
++                s->extdata_sent = 0;
++            }
++
++            if (s->buf_pkt.size != 0)
++                break;
++
++            if (s->buf_pkt.side_data_elems == 0) {
++                av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n");
++                ret = AVERROR_EOF;
++                break;
++            }
++
++            // Retry a side-data only pkt
++        }
++        // If i >= 256 something has gone wrong
++        if (i >= 256) {
++            av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n");
++            return AVERROR(EIO);
++        }
++
++        if (ret == AVERROR(EAGAIN)) {
++            if (!stream_started(s)) {
++                av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__);
++                return NQ_DEAD;
++            }
++            return NQ_SRC_EMPTY;
++        }
++
++        if (ret == AVERROR_EOF) {
++            // EOF - enter drain mode
++            av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n",
++                   ret, s->buf_pkt.size, stream_started(s), s->draining);
++            if (!stream_started(s)) {
++                av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n");
++                s->draining = 1;
++                s->capture.done = 1;
++                return AVERROR_EOF;
++            }
++
++            if (!s->draining) {
++                // Calling enqueue with an empty pkt starts drain
++                av_assert0(s->buf_pkt.size == 0);
++                ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
++                if (ret) {
++                    av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret);
++                    return ret;
++                }
++            }
++            return NQ_DRAINING;
++        }
++
+         if (ret < 0) {
+-            if (ret == AVERROR(EAGAIN))
+-                return ff_v4l2_context_dequeue_frame(capture, frame, 0);
+-            else if (ret != AVERROR_EOF)
+-                return ret;
++            av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
++            return ret;
+         }
+     }
+ 
+-    if (s->draining)
+-        goto dequeue;
++    if (s->draining) {
++        if (s->buf_pkt.size) {
++            av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n");
++            av_packet_unref(&s->buf_pkt);
++        }
++        return NQ_DRAINING;
++    }
++
++    if (!s->buf_pkt.size)
++        return NQ_NONE;
++
++    if ((ret = check_output_streamon(avctx, s)) != 0)
++        return ret;
+ 
+-    ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt);
+-    if (ret < 0 && ret != AVERROR(EAGAIN))
+-        goto fail;
++    if (s->extdata_sent)
++        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
++    else
++        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
+ 
+-    /* if EAGAIN don't unref packet and try to enqueue in the next iteration */
+-    if (ret != AVERROR(EAGAIN))
++    if (ret == AVERROR(EAGAIN)) {
++        // Out of input buffers - keep packet
++        ret = NQ_Q_FULL;
++    }
++    else {
++        // In all other cases we are done with this packet
+         av_packet_unref(&s->buf_pkt);
++        s->extdata_sent = 1;
+ 
+-    if (!s->draining) {
+-        ret = v4l2_try_start(avctx);
+         if (ret) {
+-            /* cant recover */
+-            if (ret != AVERROR(ENOMEM))
+-                ret = 0;
+-            goto fail;
++            av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret);
++            return ret;
++        }
++    }
++
++    // Start if we haven't
++    {
++        const int ret2 = v4l2_try_start(avctx);
++        if (ret2) {
++            av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2);
++            ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD;
+         }
+     }
+ 
+-dequeue:
+-    return ff_v4l2_context_dequeue_frame(capture, frame, -1);
+-fail:
+-    av_packet_unref(&s->buf_pkt);
+     return ret;
+ }
+ 
++static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
++{
++    int rv = 0;
++
++    ff_mutex_lock(&ctx->lock);
++
++    while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) {
++        if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) {
++            rv = AVERROR(errno);
++            av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv));
++            break;
++        }
++    }
++
++    ff_mutex_unlock(&ctx->lock);
++    return rv;
++}
++
++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++{
++    V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
++    int src_rv = -1;
++    int dst_rv = 1;  // Non-zero (done), non-negative (error) number
++    unsigned int i = 0;
++
++    do {
++        const int pending = xlat_pending(s);
++        const int prefer_dq = (pending > 4);
++        const int last_src_rv = src_rv;
++
++        av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt);
++
++        // Enqueue another pkt for decode if
++        // (a) We don't have a lot of stuff in the buffer already OR
++        // (b) ... we (think we) do but we've failed to get a frame already OR
++        // (c) We've dequeued a lot of frames without asking for input
++        src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2));
++
++        // If we got a frame last time or we've already tried to get a frame and
++        // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN)
++        // indicating that we want more input.
++        // This should mean that once decode starts we enter a stable state where
++        // we alternately ask for input and produce output
++        if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY)
++            break;
++
++        if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) {
++            av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n");
++            break;
++        }
++
++        // Try to get a new frame if
++        // (a) we haven't already got one AND
++        // (b) enqueue returned a status indicating that decode should be attempted
++        if (dst_rv != 0 && TRY_DQ(src_rv)) {
++            // Pick a timeout depending on state
++            // The pending count isn't completely reliable so it is good enough
++            // hint that we want a frame but not good enough to require it in
++            // all cases; however if it has got > 31 that exceeds its margin of
++            // error so require a frame to prevent ridiculous levels of latency
++            const int t =
++                src_rv == NQ_Q_FULL ? -1 :
++                src_rv == NQ_DRAINING ? 300 :
++                prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0;
++
++            // Dequeue frame will unref any previous contents of frame
++            // if it returns success so we don't need an explicit unref
++            // when discarding
++            // This returns AVERROR(EAGAIN) on timeout or if
++            // there is room in the input Q and timeout == -1
++            dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
++
++            // Failure due to no buffer in Q?
++            if (dst_rv == AVERROR(ENOSPC)) {
++                // Wait & retry
++                if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) {
++                    dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t);
++                }
++            }
++
++            if (dst_rv == 0) {
++                set_best_effort_pts(avctx, &s->pts_stat, frame);
++                if (!s->running) {
++                    s->running = 1;
++                    av_log(avctx, AV_LOG_VERBOSE, "Decode running\n");
++                }
++            }
++
++            if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
++                av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
++                dst_rv = AVERROR_EOF;
++                s->capture.done = 1;
++            }
++            else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
++                av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
++                       s->draining, s->capture.done);
++            else if (dst_rv && dst_rv != AVERROR(EAGAIN))
++                av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
++                       s->draining, s->capture.done, dst_rv);
++        }
++
++        ++i;
++        if (i >= 256) {
++            av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i);
++            src_rv = AVERROR(EIO);
++        }
++
++        // Continue trying to enqueue packets if either
++        // (a) we succeeded last time OR
++        // (b) we didn't ret a frame and we can retry the input
++    } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv)));
++
++    // Ensure that the frame contains nothing if we aren't returning a frame
++    // (might happen when discarding)
++    if (dst_rv)
++        av_frame_unref(frame);
++
++    // If we got a frame this time ask for a pkt next time
++    s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0;
++
++#if 0
++    if (dst_rv == 0)
++    {
++        static int z = 0;
++        if (++z > 50) {
++            av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n");
++            ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
++            return -1;
++        }
++    }
++#endif
++
++    return dst_rv == 0 ? 0 :
++        src_rv < 0 ? src_rv :
++        dst_rv < 0 ? dst_rv :
++            AVERROR(EAGAIN);
++}
++
++#if 0
++#include <time.h>
++static int64_t us_time(void)
++{
++    struct timespec ts;
++    clock_gettime(CLOCK_MONOTONIC, &ts);
++    return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
++}
++
++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++{
++    int ret;
++    const int64_t now = us_time();
++    int64_t done;
++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++    ret = v4l2_receive_frame2(avctx, frame);
++    done = us_time();
++    av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret);
++    return ret;
++}
++#endif
++
++static uint32_t
++avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile)
++{
++    switch (codec_id) {
++        case AV_CODEC_ID_H264:
++            switch (avprofile) {
++                case FF_PROFILE_H264_BASELINE:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
++                case FF_PROFILE_H264_CONSTRAINED_BASELINE:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE;
++                case FF_PROFILE_H264_MAIN:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN;
++                case FF_PROFILE_H264_EXTENDED:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED;
++                case FF_PROFILE_H264_HIGH:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH;
++                case FF_PROFILE_H264_HIGH_10:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10;
++                case FF_PROFILE_H264_HIGH_10_INTRA:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA;
++                case FF_PROFILE_H264_MULTIVIEW_HIGH:
++                case FF_PROFILE_H264_HIGH_422:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422;
++                case FF_PROFILE_H264_HIGH_422_INTRA:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA;
++                case FF_PROFILE_H264_STEREO_HIGH:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH;
++                case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE;
++                case FF_PROFILE_H264_HIGH_444_INTRA:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA;
++                case FF_PROFILE_H264_CAVLC_444:
++                    return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA;
++                case FF_PROFILE_H264_HIGH_444:
++                default:
++                    break;
++//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE		= 12,
++//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH		= 13,
++//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA	= 14,
++//                    V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH		= 16,
++//                    V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH		= 17,
++            }
++            break;
++        case AV_CODEC_ID_MPEG2VIDEO:
++        case AV_CODEC_ID_MPEG4:
++        case AV_CODEC_ID_VC1:
++        case AV_CODEC_ID_VP8:
++        case AV_CODEC_ID_VP9:
++        case AV_CODEC_ID_AV1:
++            // Most profiles are a simple number that matches the V4L2 enum
++            return avprofile;
++        default:
++            break;
++    }
++    return ~(uint32_t)0;
++}
++
++// This check mirrors Chrome's profile check by testing to see if the profile
++// exists as a possible value for the V4L2 profile control
++static int
++check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s)
++{
++    struct v4l2_queryctrl query_ctrl;
++    struct v4l2_querymenu query_menu;
++    uint32_t profile_id;
++
++    // An unset profile is almost certainly zero or -99 - do not reject
++    if (avctx->profile <= 0) {
++        av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile);
++        return 0;
++    }
++
++    memset(&query_ctrl, 0, sizeof(query_ctrl));
++    switch (avctx->codec_id) {
++        case AV_CODEC_ID_MPEG2VIDEO:
++            profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE;
++            break;
++        case AV_CODEC_ID_MPEG4:
++            profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE;
++            break;
++        case AV_CODEC_ID_H264:
++            profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE;
++            break;
++        case AV_CODEC_ID_VP8:
++            profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE;
++            break;
++        case AV_CODEC_ID_VP9:
++            profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE;
++            break;
++#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE
++        case AV_CODEC_ID_AV1:
++            profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE;
++            break;
++#endif
++        default:
++            av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id);
++            return 0;
++    }
++
++    query_ctrl = (struct v4l2_queryctrl){.id = profile_id};
++    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) {
++        av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id);
++    }
++    else {
++        av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id);
++
++        query_menu = (struct v4l2_querymenu){
++            .id = query_ctrl.id,
++            .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile),
++        };
++
++        if (query_menu.index > query_ctrl.maximum ||
++            query_menu.index < query_ctrl.minimum ||
++            ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) {
++            return AVERROR(ENOENT);
++        }
++    }
++
++    return 0;
++};
++
++static int
++check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc)
++{
++    unsigned int i;
++    const uint32_t w = avctx->coded_width;
++    const uint32_t h = avctx->coded_height;
++
++    if (w == 0 || h == 0 || fcc == 0) {
++        av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc));
++        return 0;
++    }
++    if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) {
++        av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc));
++        return 0;
++    }
++
++    for (i = 0;; ++i) {
++        struct v4l2_frmsizeenum fs = {
++            .index = i,
++            .pixel_format = fcc,
++        };
++
++        while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) {
++            const int err = AVERROR(errno);
++            if (err == AVERROR(EINTR))
++                continue;
++            if (i == 0 && err == AVERROR(ENOTTY)) {
++                av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n");
++                return 0;
++            }
++            if (err != AVERROR(EINVAL)) {
++                av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err));
++                return err;
++            }
++            av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n",
++                   w, h, av_fourcc2str(fcc), i);
++            return err;
++        }
++
++        switch (fs.type) {
++            case V4L2_FRMSIZE_TYPE_DISCRETE:
++                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i,
++                       fs.discrete.width,fs.discrete.height);
++                if (w == fs.discrete.width && h == fs.discrete.height)
++                    return 0;
++                break;
++            case V4L2_FRMSIZE_TYPE_STEPWISE:
++                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
++                       fs.stepwise.min_width, fs.stepwise.min_height,
++                       fs.stepwise.max_width, fs.stepwise.max_height,
++                       fs.stepwise.step_width,fs.stepwise.step_height);
++                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
++                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height &&
++                    (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 &&
++                    (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0)
++                    return 0;
++                break;
++            case V4L2_FRMSIZE_TYPE_CONTINUOUS:
++                av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i,
++                       fs.stepwise.min_width, fs.stepwise.min_height,
++                       fs.stepwise.max_width, fs.stepwise.max_height,
++                       fs.stepwise.step_width,fs.stepwise.step_height);
++                if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width &&
++                    h >= fs.stepwise.min_height && h <= fs.stepwise.max_height)
++                    return 0;
++                break;
++            default:
++                av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type);
++                return AVERROR(EINVAL);
++        }
++    }
++}
++
++static int
++get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s)
++{
++    struct v4l2_capability cap;
++
++    memset(&cap, 0, sizeof(cap));
++    while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) {
++        int err = errno;
++        if (err == EINTR)
++            continue;
++        av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err));
++        return AVERROR(err);
++    }
++
++    // Could be made table driven if we have a few more but right now there
++    // seems no point
++
++    // Meson (amlogic) always gives a resolution changed event after output
++    // streamon and userspace must (re)allocate capture buffers and streamon
++    // capture to clear the event even if the capture buffers were the right
++    // size in the first place.
++    if (strcmp(cap.driver, "meson-vdec") == 0)
++        s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN;
++
++    av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks);
++    return 0;
++}
++
++// This heuristic is for H264 but use for everything
++static uint32_t max_coded_size(const AVCodecContext * const avctx)
++{
++    uint32_t wxh = avctx->coded_width * avctx->coded_height;
++    uint32_t size;
++
++    size = wxh * 3 / 2;
++    // H.264 Annex A table A-1 gives minCR which is either 2 or 4
++    // unfortunately that doesn't yield an actually useful limit
++    // and it should be noted that frame 0 is special cased to allow
++    // a bigger number which really isn't helpful for us. So just pick
++    // frame_size / 2
++    size /= 2;
++    // Add 64k to allow for any overheads and/or encoder hopefulness
++    // with small WxH
++    return size + (1 << 16);
++}
++
++static void
++parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
++{
++    s->reorder_size = 0;
++
++    if (!avctx->extradata || !avctx->extradata_size)
++        return;
++
++    switch (avctx->codec_id) {
++#if CONFIG_H264_DECODER
++        case AV_CODEC_ID_H264:
++        {
++            H264ParamSets ps;
++            int is_avc = 0;
++            int nal_length_size = 0;
++            int ret;
++
++            memset(&ps, 0, sizeof(ps));
++
++            ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
++                                           &ps, &is_avc, &nal_length_size,
++                                           avctx->err_recognition, avctx);
++            if (ret > 0) {
++                const SPS * sps = NULL;
++                unsigned int i;
++                for (i = 0; i != MAX_SPS_COUNT; ++i) {
++                    if (ps.sps_list[i]) {
++                        sps = ps.sps_list[i];
++                        break;
++                    }
++                }
++                if (sps) {
++                    avctx->profile = ff_h264_get_profile(sps);
++                    avctx->level = sps->level_idc;
++                    s->reorder_size = sps->num_reorder_frames;
++                }
++            }
++            ff_h264_ps_uninit(&ps);
++            break;
++        }
++#endif
++#if CONFIG_HEVC_DECODER
++        case AV_CODEC_ID_HEVC:
++        {
++            HEVCParamSets ps;
++            HEVCSEI sei;
++            int is_nalff = 0;
++            int nal_length_size = 0;
++            int ret;
++
++            memset(&ps, 0, sizeof(ps));
++            memset(&sei, 0, sizeof(sei));
++
++            ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size,
++                                           &ps, &sei, &is_nalff, &nal_length_size,
++                                           avctx->err_recognition, 0, avctx);
++            if (ret > 0) {
++                const HEVCSPS * sps = NULL;
++                unsigned int i;
++                for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) {
++                    if (ps.sps_list[i]) {
++                        sps = ps.sps_list[i];
++                        break;
++                    }
++                }
++                if (sps) {
++                    avctx->profile = sps->ptl.general_ptl.profile_idc;
++                    avctx->level   = sps->ptl.general_ptl.level_idc;
++                    s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering;
++                }
++            }
++            ff_hevc_ps_uninit(&ps);
++            ff_hevc_reset_sei(&sei);
++            break;
++        }
++#endif
++        default:
++            break;
++    }
++}
++
++static int
++choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
++{
++    const V4L2m2mPriv * const priv = avctx->priv_data;
++    unsigned int fmts_n;
++    uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n);
++    enum AVPixelFormat *fmts2 = NULL;
++    enum AVPixelFormat gf_pix_fmt;
++    unsigned int i;
++    unsigned int n = 0;
++    unsigned int pref_n = 1;
++    int rv = AVERROR(ENOENT);
++
++    if (!fmts)
++        return AVERROR(ENOENT);
++
++    if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) {
++        rv = AVERROR(ENOMEM);
++        goto error;
++    }
++
++    // Filter for formats that are supported by ffmpeg and
++    // can accomodate the stream size
++    fmts2[n++] = AV_PIX_FMT_DRM_PRIME;
++    for (i = 0; i != fmts_n; ++i) {
++        const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO);
++        av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f));
++        if (f == AV_PIX_FMT_NONE)
++            continue;
++
++        if (check_size(avctx, s, fmts[i]) != 0)
++            continue;
++
++        if (f == priv->pix_fmt)
++            pref_n = n;
++        fmts2[n++] = f;
++    }
++
++    if (n < 2) {
++        av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__);
++        goto error;
++    }
++
++    if (n != 2) {
++        // ffmpeg.c really only expects one s/w format. It thinks that the
++        // last format in the list is the s/w format of the h/w format but
++        // also chooses the first non-h/w format as the preferred s/w format.
++        // The only way of reconciling this is to dup our preferred format into
++        // both last & first place :-(
++        const enum AVPixelFormat t = fmts2[pref_n];
++        fmts2[pref_n] = fmts2[1];
++        fmts2[1] = t;
++        fmts2[n++] = t;
++    }
++
++    fmts2[n] = AV_PIX_FMT_NONE;
++
++    gf_pix_fmt = ff_get_format(avctx, fmts2);
++    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
++           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
++           avctx->coded_width, avctx->coded_height,
++           gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
++
++    if (gf_pix_fmt == AV_PIX_FMT_NONE)
++        goto error;
++
++    if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
++        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
++        s->capture.av_pix_fmt = avctx->sw_pix_fmt;
++        s->output_drm = 1;
++    }
++    else {
++        avctx->pix_fmt = gf_pix_fmt;
++        s->capture.av_pix_fmt = gf_pix_fmt;
++        s->output_drm = 0;
++    }
++
++    // Get format converts capture.av_pix_fmt back into a V4L2 format in the context
++    if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0)
++        goto error;
++    rv = ff_v4l2_context_set_format(&s->capture);
++
++error:
++    av_free(fmts2);
++    av_free(fmts);
++    return rv;
++}
++
+ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
+ {
+     V4L2Context *capture, *output;
+@@ -185,10 +1167,31 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
+     V4L2m2mPriv *priv = avctx->priv_data;
+     int ret;
+ 
++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++
++#if FF_API_FRAME_PKT
++FF_DISABLE_DEPRECATION_WARNINGS
++    if (avctx->codec_id == AV_CODEC_ID_H264) {
++        if (avctx->ticks_per_frame == 1) {
++            if(avctx->time_base.den < INT_MAX/2) {
++                avctx->time_base.den *= 2;
++            } else
++                avctx->time_base.num /= 2;
++        }
++        avctx->ticks_per_frame = 2;
++    }
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++
+     ret = ff_v4l2_m2m_create_context(priv, &s);
+     if (ret < 0)
+         return ret;
+ 
++    parse_extradata(avctx, s);
++
++    xlat_init(&s->xlat);
++    pts_stats_init(&s->pts_stat, avctx, "decoder");
++
+     capture = &s->capture;
+     output = &s->output;
+ 
+@@ -196,14 +1199,45 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
+      * by the v4l2 driver; this event will trigger a full pipeline reconfig and
+      * the proper values will be retrieved from the kernel driver.
+      */
+-    output->height = capture->height = avctx->coded_height;
+-    output->width = capture->width = avctx->coded_width;
++//    output->height = capture->height = avctx->coded_height;
++//    output->width = capture->width = avctx->coded_width;
++    output->height = capture->height = 0;
++    output->width = capture->width = 0;
+ 
+     output->av_codec_id = avctx->codec_id;
+     output->av_pix_fmt  = AV_PIX_FMT_NONE;
++    output->min_buf_size = max_coded_size(avctx);
+ 
+     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
+     capture->av_pix_fmt = avctx->pix_fmt;
++    capture->min_buf_size = 0;
++
++    capture->av_pix_fmt = AV_PIX_FMT_NONE;
++    s->output_drm = 0;
++
++    s->db_ctl = NULL;
++    if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) {
++        if (strcmp(priv->dmabuf_alloc, "cma") == 0)
++            s->db_ctl = dmabufs_ctl_new();
++        else {
++            av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc);
++            return AVERROR(EINVAL);
++        }
++        if (!s->db_ctl) {
++            av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc);
++            return AVERROR(ENOMEM);
++        }
++    }
++
++    s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
++    if (!s->device_ref) {
++        ret = AVERROR(ENOMEM);
++        return ret;
++    }
++
++    ret = av_hwdevice_ctx_init(s->device_ref);
++    if (ret < 0)
++        return ret;
+ 
+     s->avctx = avctx;
+     ret = ff_v4l2_m2m_codec_init(priv);
+@@ -212,12 +1246,90 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
+         return ret;
+     }
+ 
+-    return v4l2_prepare_decoder(s);
++    if (avctx->extradata &&
++        (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret));
++#if DUMP_FAILED_EXTRADATA
++        log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size);
++#endif
++        return ret;
++    }
++
++    if ((ret = get_quirks(avctx, s)) != 0)
++        return ret;
++
++    if ((ret = check_profile(avctx, s)) != 0) {
++        av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile);
++        return ret;
++    }
++
++    // Size check done as part of format filtering
++    if ((ret = choose_capture_format(avctx, s)) != 0)
++        return ret;
++
++    if ((ret = v4l2_prepare_decoder(s)) < 0)
++        return ret;
++
++    return 0;
+ }
+ 
+ static av_cold int v4l2_decode_close(AVCodecContext *avctx)
+ {
+-    return ff_v4l2_m2m_codec_end(avctx->priv_data);
++    int rv;
++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++    rv = ff_v4l2_m2m_codec_end(avctx->priv_data);
++    av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv);
++    return rv;
++}
++
++static void v4l2_decode_flush(AVCodecContext *avctx)
++{
++    // An alternatve and more drastic form of flush is to simply do this:
++    //    v4l2_decode_close(avctx);
++    //    v4l2_decode_init(avctx);
++    // The downside is that this keeps a decoder open until all the frames
++    // associated with it have been returned.  This is a bit wasteful on
++    // possibly limited h/w resources and fails on a Pi for this reason unless
++    // more GPU mem is allocated than is the default.
++
++    V4L2m2mPriv * const priv = avctx->priv_data;
++    V4L2m2mContext * const s = priv->context;
++    V4L2Context * const output = &s->output;
++    V4L2Context * const capture = &s->capture;
++
++    av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
++
++    // Reflushing everything is benign, quick and avoids having to worry about
++    // states like EOS processing so don't try to optimize out (having got it
++    // wrong once)
++
++    ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
++
++    // Clear any buffered input packet
++    av_packet_unref(&s->buf_pkt);
++
++    // Clear a pending EOS
++    if (ff_v4l2_ctx_eos(capture)) {
++        // Arguably we could delay this but this is easy and doesn't require
++        // thought or extra vars
++        ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF);
++        ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
++    }
++
++    // V4L2 makes no guarantees about whether decoded frames are flushed or not
++    // so mark all frames we are tracking to be discarded if they appear
++    xlat_flush(&s->xlat);
++
++    // resend extradata
++    s->extdata_sent = 0;
++    // clear status vars
++    s->running = 0;
++    s->draining = 0;
++    output->done = 0;
++    capture->done = 0;
++
++    // Stream on will occur when we actually submit a new frame
++    av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__);
+ }
+ 
+ #define OFFSET(x) offsetof(V4L2m2mPriv, x)
+@@ -227,9 +1339,16 @@ static const AVOption options[] = {
+     V4L_M2M_DEFAULT_OPTS,
+     { "num_capture_buffers", "Number of buffers in the capture context",
+         OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS },
++    { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS },
++    { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
+     { NULL},
+ };
+ 
++static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
++    HW_CONFIG_INTERNAL(DRM_PRIME),
++    NULL
++};
++
+ #define M2MDEC_CLASS(NAME) \
+     static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
+         .class_name = #NAME "_v4l2m2m_decoder", \
+@@ -250,11 +1369,17 @@ static const AVOption options[] = {
+         .init           = v4l2_decode_init, \
+         FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \
+         .close          = v4l2_decode_close, \
++        .flush          = v4l2_decode_flush, \
+         .bsfs           = bsf_name, \
+         .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
+         .caps_internal  = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \
+                           FF_CODEC_CAP_INIT_CLEANUP, \
+         .p.wrapper_name = "v4l2m2m", \
++        .p.pix_fmts     = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
++                                                         AV_PIX_FMT_NV12, \
++                                                         AV_PIX_FMT_YUV420P, \
++                                                         AV_PIX_FMT_NONE}, \
++        .hw_configs     = v4l2_m2m_hw_configs, \
+     }
+ 
+ M2MDEC(h264,  "H.264", AV_CODEC_ID_H264,       "h264_mp4toannexb");
+diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
+index 5770e87ea10b..6b00e113fa57 100644
+--- a/libavcodec/v4l2_m2m_enc.c
++++ b/libavcodec/v4l2_m2m_enc.c
+@@ -21,13 +21,17 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
++#include "config.h"
++
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
+ #include <search.h>
++
+ #include "encode.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavutil/pixdesc.h"
+ #include "libavutil/pixfmt.h"
++#include "libavutil/mem.h"
+ #include "libavutil/opt.h"
+ #include "codec_internal.h"
+ #include "profiles.h"
+@@ -38,6 +42,39 @@
+ #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x
+ #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x
+ 
++#if CONFIG_LIBDRM
++#include <drm_fourcc.h>
++
++// P030 should be defined in drm_fourcc.h and hopefully will be sometime
++// in the future but until then...
++#ifndef DRM_FORMAT_P030
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
++#endif
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++#endif
++
++#ifndef V4L2_CID_CODEC_BASE
++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
++#endif
++
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in videodev2.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
+ static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den)
+ {
+     struct v4l2_streamparm parm = { 0 };
+@@ -148,15 +185,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p)
+ static int v4l2_check_b_frame_support(V4L2m2mContext *s)
+ {
+     if (s->avctx->max_b_frames)
+-        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n");
++        av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames);
+ 
+-    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0);
++    v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1);
+     v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0);
+     if (s->avctx->max_b_frames == 0)
+         return 0;
+ 
+     avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding");
+-
+     return AVERROR_PATCHWELCOME;
+ }
+ 
+@@ -271,17 +307,212 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s)
+     return 0;
+ }
+ 
++static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame)
++{
++#if !CONFIG_LIBDRM
++    return AVERROR_OPTION_NOT_FOUND;
++#else
++    const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0];
++
++    const uint32_t drm_fmt = src->layers[0].format;
++    // Treat INVALID as LINEAR
++    const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ?
++        DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier;
++    uint32_t pix_fmt = 0;
++    uint32_t w = 0;
++    uint32_t h = 0;
++    uint32_t bpl = src->layers[0].planes[0].pitch;
++
++    // We really don't expect multiple layers
++    // All formats that we currently cope with are single object
++
++    if (src->nb_layers != 1 || src->nb_objects != 1)
++        return AVERROR(EINVAL);
++
++    switch (drm_fmt) {
++        case DRM_FORMAT_YUV420:
++            if (mod == DRM_FORMAT_MOD_LINEAR) {
++                if (src->layers[0].nb_planes != 3)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_YUV420;
++                h = src->layers[0].planes[1].offset / bpl;
++                w = bpl;
++            }
++            break;
++
++        case DRM_FORMAT_NV12:
++            if (mod == DRM_FORMAT_MOD_LINEAR) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_NV12;
++                h = src->layers[0].planes[1].offset / bpl;
++                w = bpl;
++            }
++            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt = V4L2_PIX_FMT_NV12_COL128;
++                w = bpl;
++                h = src->layers[0].planes[1].offset / 128;
++                bpl = fourcc_mod_broadcom_param(mod);
++            }
++            break;
++
++        case DRM_FORMAT_P030:
++            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
++                if (src->layers[0].nb_planes != 2)
++                    break;
++                pix_fmt =  V4L2_PIX_FMT_NV12_10_COL128;
++                w = bpl / 2;  // Matching lie to how we construct this
++                h = src->layers[0].planes[1].offset / 128;
++                bpl = fourcc_mod_broadcom_param(mod);
++            }
++            break;
++
++        default:
++            break;
++    }
++
++    if (!pix_fmt)
++        return AVERROR(EINVAL);
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
++        struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp;
++
++        pix->width = w;
++        pix->height = h;
++        pix->pixelformat = pix_fmt;
++        pix->plane_fmt[0].bytesperline = bpl;
++        pix->num_planes = 1;
++    }
++    else {
++        struct v4l2_pix_format *const pix = &format->fmt.pix;
++
++        pix->width = w;
++        pix->height = h;
++        pix->pixelformat = pix_fmt;
++        pix->bytesperline = bpl;
++    }
++
++    return 0;
++#endif
++}
++
++// Do we have similar enough formats to be usable?
++static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b)
++{
++    if (a->type != b->type)
++        return 0;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) {
++        const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp;
++        const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp;
++        unsigned int i;
++        if (pa->pixelformat != pb->pixelformat ||
++            pa->num_planes != pb->num_planes)
++            return 0;
++        for (i = 0; i != pa->num_planes; ++i) {
++            if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline)
++                return 0;
++        }
++    }
++    else {
++        const struct v4l2_pix_format *const pa = &a->fmt.pix;
++        const struct v4l2_pix_format *const pb = &b->fmt.pix;
++        if (pa->pixelformat != pb->pixelformat ||
++            pa->bytesperline != pb->bytesperline)
++            return 0;
++    }
++    return 1;
++}
++
++static inline int q_full(const V4L2Context *const output)
++{
++    return ff_v4l2_context_q_count(output) == output->num_buffers;
++}
++
+ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+ {
+     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+     V4L2Context *const output = &s->output;
++    int rv;
++    const int needs_slot = q_full(output);
++
++    av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot);
++
++    // Signal EOF if needed (doesn't need q slot)
++    if (!frame) {
++        av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__);
++        return ff_v4l2_context_enqueue_frame(output, frame);
++    }
++
++    if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) {
++        // We should be able to return AVERROR(EAGAIN) to indicate buffer
++        // exhaustion, but ffmpeg currently treats that as fatal.
++        av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv));
++        return rv;
++    }
++
++    if (s->input_drm && !output->streamon) {
++        struct v4l2_format req_format = {.type = output->format.type};
++
++        // Set format when we first get a buffer
++        if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n");
++            return rv;
++        }
++
++        ff_v4l2_context_release(output);
++
++        output->format = req_format;
++
++        if ((rv = ff_v4l2_context_set_format(output)) != 0) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n");
++            return rv;
++        }
++
++        if (!fmt_eq(&req_format, &output->format)) {
++            av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n");
++            return AVERROR(EINVAL);
++        }
++
++        output->selection.top = frame->crop_top;
++        output->selection.left = frame->crop_left;
++        output->selection.width = av_frame_cropped_width(frame);
++        output->selection.height = av_frame_cropped_height(frame);
++
++        if ((rv = ff_v4l2_context_init(output)) != 0) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n");
++            return rv;
++        }
++
++        {
++            struct v4l2_selection selection = {
++                .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
++                .target = V4L2_SEL_TGT_CROP,
++                .r = output->selection
++            };
++            if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) {
++                av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n",
++                       selection.r.width, selection.r.height, selection.r.left, selection.r.top,
++                       av_err2str(AVERROR(errno)));
++            }
++            av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n",
++                   selection.r.width, selection.r.height, selection.r.left, selection.r.top);
++        }
++    }
+ 
+ #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME
+-    if (frame && frame->pict_type == AV_PICTURE_TYPE_I)
++    if (frame->pict_type == AV_PICTURE_TYPE_I)
+         v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1);
  #endif
  
+-    return ff_v4l2_context_enqueue_frame(output, frame);
++    rv = ff_v4l2_context_enqueue_frame(output, frame);
++    if (rv) {
++        av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv));
++    }
++
++    return rv;
+ }
+ 
+ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+@@ -292,6 +523,11 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+     AVFrame *frame = s->frame;
+     int ret;
+ 
++    av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__,
++           ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture));
++
++    ff_v4l2_dq_all(output, 0);
++
+     if (s->draining)
+         goto dequeue;
+ 
+@@ -328,7 +564,115 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
+     }
+ 
+ dequeue:
+-    return ff_v4l2_context_dequeue_packet(capture, avpkt);
++    // Dequeue a frame
++    for (;;) {
++        int t = q_full(output) ? -1 : s->draining ? 300 : 0;
++        int rv2;
++
++        // If output is full wait for either a packet or output to become not full
++        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t);
++
++        // If output was full retry packet dequeue
++        t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300;
++        rv2 = ff_v4l2_dq_all(output, t);
++        if (t == 0 || rv2 != 0)
++            break;
++    }
++    if (ret)
++        return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret;
++
++    if (capture->first_buf == 1) {
++        uint8_t * data;
++        const int len = avpkt->size;
++
++        // 1st buffer after streamon should be SPS/PPS
++        capture->first_buf = 2;
++
++        // Clear both possible stores so there is no chance of confusion
++        av_freep(&s->extdata_data);
++        s->extdata_size = 0;
++        av_freep(&avctx->extradata);
++        avctx->extradata_size = 0;
++
++        if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
++            goto fail_no_mem;
++
++        memcpy(data, avpkt->data, len);
++        av_packet_unref(avpkt);
++
++        // We need to copy the header, but keep local if not global
++        if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) {
++            avctx->extradata = data;
++            avctx->extradata_size = len;
++        }
++        else {
++            s->extdata_data = data;
++            s->extdata_size = len;
++        }
++
++        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0);
++        ff_v4l2_dq_all(output, 0);
++        if (ret)
++            return ret;
++    }
++
++    // First frame must be key so mark as such even if encoder forgot
++    if (capture->first_buf == 2) {
++        avpkt->flags |= AV_PKT_FLAG_KEY;
++
++        // Add any extradata to the 1st packet we emit as we cannot create it at init
++        if (avctx->extradata_size > 0 && avctx->extradata) {
++            void * const side = av_packet_new_side_data(avpkt,
++                                           AV_PKT_DATA_NEW_EXTRADATA,
++                                           avctx->extradata_size);
++            if (!side)
++                goto fail_no_mem;
++
++            memcpy(side, avctx->extradata, avctx->extradata_size);
++        }
++    }
++
++    // Add SPS/PPS to the start of every key frame if non-global headers
++    if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) {
++        const size_t newlen = s->extdata_size + avpkt->size;
++        AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE);
++
++        if (buf == NULL)
++            goto fail_no_mem;
++
++        memcpy(buf->data, s->extdata_data, s->extdata_size);
++        memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size);
++
++        av_buffer_unref(&avpkt->buf);
++        avpkt->buf = buf;
++        avpkt->data = buf->data;
++        avpkt->size = newlen;
++    }
++    else if (ff_v4l2_context_q_count(capture) < 2) {
++        // Avoid running out of capture buffers
++        // In most cases the buffers will be returned quickly in which case
++        // we don't copy and can use the v4l2 buffers directly but sometimes
++        // ffmpeg seems to hold onto all of them for a long time (.mkv
++        // creation?) so avoid deadlock in those cases.
++        AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
++        if (buf == NULL)
++            goto fail_no_mem;
++
++        memcpy(buf->data, avpkt->data, avpkt->size);
++        av_buffer_unref(&avpkt->buf);  // Will recycle the V4L2 buffer
++
++        avpkt->buf = buf;
++        avpkt->data = buf->data;
++    }
++
++    capture->first_buf = 0;
++    return 0;
++
++fail_no_mem:
++    av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n");
++    ret = AVERROR(ENOMEM);
++    av_packet_unref(avpkt);
++    return ret;
+ }
+ 
+ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+@@ -340,6 +684,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+     uint32_t v4l2_fmt_output;
+     int ret;
+ 
++    av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt);
++
+     ret = ff_v4l2_m2m_create_context(priv, &s);
+     if (ret < 0)
+         return ret;
+@@ -347,13 +693,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+     capture = &s->capture;
+     output  = &s->output;
+ 
++    s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME);
++
+     /* common settings output/capture */
+     output->height = capture->height = avctx->height;
+     output->width = capture->width = avctx->width;
+ 
+     /* output context */
+     output->av_codec_id = AV_CODEC_ID_RAWVIDEO;
+-    output->av_pix_fmt = avctx->pix_fmt;
++    output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt :
++            avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt :
++            AV_PIX_FMT_YUV420P;
+ 
+     /* capture context */
+     capture->av_codec_id = avctx->codec_id;
+@@ -372,7 +722,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx)
+         v4l2_fmt_output = output->format.fmt.pix.pixelformat;
+ 
+     pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO);
+-    if (pix_fmt_output != avctx->pix_fmt) {
++    if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) {
+         const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output);
+         av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name);
+         return AVERROR(EINVAL);
+@@ -390,9 +740,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx)
+ #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+ 
+ #define V4L_M2M_CAPTURE_OPTS \
+-    V4L_M2M_DEFAULT_OPTS,\
++    { "num_output_buffers", "Number of buffers in the output context",\
++        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\
+     { "num_capture_buffers", "Number of buffers in the capture context", \
+-        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS }
++        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS }
+ 
+ static const AVOption mpeg4_options[] = {
+     V4L_M2M_CAPTURE_OPTS,
+diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c
+new file mode 100644
+index 000000000000..93c56ba13f85
+--- /dev/null
++++ b/libavcodec/v4l2_req_decode_q.c
+@@ -0,0 +1,108 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#include <memory.h>
++#include <semaphore.h>
++#include <pthread.h>
++
++#include "v4l2_req_decode_q.h"
++
++int decode_q_in_q(const req_decode_ent * const d)
++{
++    return d->in_q;
++}
++
++void decode_q_add(req_decode_q * const q, req_decode_ent * const d)
++{
++    pthread_mutex_lock(&q->q_lock);
++    if (!q->head) {
++        q->head = d;
++        q->tail = d;
++        d->prev = NULL;
++    }
++    else {
++        q->tail->next = d;
++        d->prev = q->tail;
++        q->tail = d;
++    }
++    d->next = NULL;
++    d->in_q = 1;
++    pthread_mutex_unlock(&q->q_lock);
++}
++
++// Remove entry from Q - if head wake-up anything that was waiting
++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d)
++{
++    int try_signal = 0;
++
++    if (!d->in_q)
++        return;
++
++    pthread_mutex_lock(&q->q_lock);
++    if (d->prev)
++        d->prev->next = d->next;
++    else {
++        try_signal = 1;  // Only need to signal if we were head
++        q->head = d->next;
++    }
++
++    if (d->next)
++        d->next->prev = d->prev;
++    else
++        q->tail = d->prev;
++
++    // Not strictly needed but makes debug easier
++    d->next = NULL;
++    d->prev = NULL;
++    d->in_q = 0;
++    pthread_mutex_unlock(&q->q_lock);
++
++    if (try_signal)
++        pthread_cond_broadcast(&q->q_cond);
++}
++
++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d)
++{
++    pthread_mutex_lock(&q->q_lock);
++
++    while (q->head != d)
++        pthread_cond_wait(&q->q_cond, &q->q_lock);
++
++    pthread_mutex_unlock(&q->q_lock);
++}
++
++void decode_q_uninit(req_decode_q * const q)
++{
++    pthread_mutex_destroy(&q->q_lock);
++    pthread_cond_destroy(&q->q_cond);
++}
++
++void decode_q_init(req_decode_q * const q)
++{
++    memset(q, 0, sizeof(*q));
++    pthread_mutex_init(&q->q_lock, NULL);
++    pthread_cond_init(&q->q_cond, NULL);
++}
++
++
+diff --git a/libavcodec/v4l2_req_decode_q.h b/libavcodec/v4l2_req_decode_q.h
+new file mode 100644
+index 000000000000..0ff8bbe88207
+--- /dev/null
++++ b/libavcodec/v4l2_req_decode_q.h
+@@ -0,0 +1,51 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H
++#define AVCODEC_V4L2_REQ_DECODE_Q_H
++
++#include <pthread.h>
++
++typedef struct req_decode_ent {
++    struct req_decode_ent * next;
++    struct req_decode_ent * prev;
++    int in_q;
++} req_decode_ent;
++
++typedef struct req_decode_q {
++    pthread_mutex_t q_lock;
++    pthread_cond_t q_cond;
++    req_decode_ent * head;
++    req_decode_ent * tail;
++} req_decode_q;
++
++int decode_q_in_q(const req_decode_ent * const d);
++void decode_q_add(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_uninit(req_decode_q * const q);
++void decode_q_init(req_decode_q * const q);
++
++#endif
++
+diff --git a/libavcodec/v4l2_req_devscan.c b/libavcodec/v4l2_req_devscan.c
+new file mode 100644
+index 000000000000..99a8c19710bc
+--- /dev/null
++++ b/libavcodec/v4l2_req_devscan.c
+@@ -0,0 +1,475 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#include <errno.h>
++#include <fcntl.h>
++#include <libudev.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++
++#include <sys/ioctl.h>
++#include <sys/sysmacros.h>
++
++#include <linux/media.h>
++#include <linux/videodev2.h>
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_utils.h"
++
++struct decdev {
++    enum v4l2_buf_type src_type;
++    uint32_t src_fmt_v4l2;
++    const char * vname;
++    const char * mname;
++};
++
++struct devscan {
++    struct decdev env;
++    unsigned int dev_size;
++    unsigned int dev_count;
++    struct decdev *devs;
++};
++
++static int video_src_pixfmt_supported(uint32_t fmt)
++{
++    return 1;
++}
++
++static void v4l2_setup_format(struct v4l2_format *format, unsigned int type,
++                  unsigned int width, unsigned int height,
++                  unsigned int pixelformat)
++{
++    unsigned int sizeimage;
++
++    memset(format, 0, sizeof(*format));
++    format->type = type;
++
++    sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
++        format->fmt.pix_mp.width = width;
++        format->fmt.pix_mp.height = height;
++        format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage;
++        format->fmt.pix_mp.pixelformat = pixelformat;
++    } else {
++        format->fmt.pix.width = width;
++        format->fmt.pix.height = height;
++        format->fmt.pix.sizeimage = sizeimage;
++        format->fmt.pix.pixelformat = pixelformat;
++    }
++}
++
++static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat,
++            unsigned int width, unsigned int height)
++{
++    struct v4l2_format format;
++
++    v4l2_setup_format(&format, type, width, height, pixelformat);
++
++    return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0;
++}
++
++static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities)
++{
++    struct v4l2_capability capability = { 0 };
++    int rc;
++
++    rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability);
++    if (rc < 0)
++        return -errno;
++
++    if (capabilities != NULL) {
++        if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0)
++            *capabilities = capability.device_caps;
++        else
++            *capabilities = capability.capabilities;
++    }
++
++    return 0;
++}
++
++static int devscan_add(struct devscan *const scan,
++                       enum v4l2_buf_type src_type,
++                       uint32_t src_fmt_v4l2,
++                       const char * vname,
++                       const char * mname)
++{
++    struct decdev *d;
++
++    if (scan->dev_size <= scan->dev_count) {
++        unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2;
++        d = realloc(scan->devs, n * sizeof(*d));
++        if (!d)
++            return -ENOMEM;
++        scan->devs = d;
++        scan->dev_size = n;
++    }
++
++    d = scan->devs + scan->dev_count;
++    d->src_type = src_type;
++    d->src_fmt_v4l2 = src_fmt_v4l2;
++    d->vname = strdup(vname);
++    if (!d->vname)
++        return -ENOMEM;
++    d->mname = strdup(mname);
++    if (!d->mname) {
++        free((char *)d->vname);
++        return -ENOMEM;
++    }
++    ++scan->dev_count;
++    return 0;
++}
++
++void devscan_delete(struct devscan **const pScan)
++{
++    unsigned int i;
++    struct devscan * const scan = *pScan;
++
++    if (!scan)
++        return;
++    *pScan = NULL;
++
++    for (i = 0; i < scan->dev_count; ++i) {
++        free((char*)scan->devs[i].mname);
++        free((char*)scan->devs[i].vname);
++    }
++    free(scan->devs);
++    free(scan);
++}
++
++#define REQ_BUF_CAPS (\
++    V4L2_BUF_CAP_SUPPORTS_DMABUF |\
++    V4L2_BUF_CAP_SUPPORTS_REQUESTS |\
++    V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
++
++static void probe_formats(void * const dc,
++              struct devscan *const scan,
++              const int fd,
++              const unsigned int type_v4l2,
++              const char *const mpath,
++              const char *const vpath)
++{
++    unsigned int i;
++    for (i = 0;; ++i) {
++        struct v4l2_fmtdesc fmtdesc = {
++            .index = i,
++            .type = type_v4l2
++        };
++        struct v4l2_requestbuffers rbufs = {
++            .count = 0,
++            .type = type_v4l2,
++            .memory = V4L2_MEMORY_MMAP
++        };
++        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
++            if (errno == EINTR)
++                continue;
++            if (errno != EINVAL)
++                request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2);
++            return;
++        }
++        if (!video_src_pixfmt_supported(fmtdesc.pixelformat))
++            continue;
++
++        if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) {
++            request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat);
++            continue;
++        }
++
++        while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) {
++            if (errno != EINTR) {
++                request_debug(dc, "%s: Reqbufs failed\n", vpath);
++                continue;
++            }
++        }
++
++        if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) {
++            request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities);
++            continue;
++        }
++
++        request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n",
++                 mpath, vpath, fmtdesc.pixelformat, type_v4l2);
++        devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath);
++    }
++}
++
++
++static int probe_video_device(void * const dc,
++                   struct udev_device *const device,
++                   struct devscan *const scan,
++                   const char *const mpath)
++{
++    int ret;
++    unsigned int capabilities = 0;
++    int video_fd = -1;
++
++    const char *path = udev_device_get_devnode(device);
++    if (!path) {
++        request_err(dc, "%s: get video device devnode failed\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    video_fd = open(path, O_RDWR, 0);
++    if (video_fd == -1) {
++        ret = -errno;
++        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno);
++        goto fail;
++    }
++
++    ret = v4l2_query_capabilities(video_fd, &capabilities);
++    if (ret < 0) {
++        request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities);
++
++    if (!(capabilities & V4L2_CAP_STREAMING)) {
++        request_debug(dc, "%s: missing required streaming capability\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) {
++        request_debug(dc, "%s: missing required mem2mem capability\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    /* Should check capture formats too... */
++    if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0)
++        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path);
++    if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0)
++        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path);
++
++    close(video_fd);
++    return 0;
++
++fail:
++    if (video_fd >= 0)
++        close(video_fd);
++    return ret;
++}
++
++static int probe_media_device(void * const dc,
++                   struct udev_device *const device,
++                   struct devscan *const scan)
++{
++    int ret;
++    int rv;
++    struct media_device_info device_info = { 0 };
++    struct media_v2_topology topology = { 0 };
++    struct media_v2_interface *interfaces = NULL;
++    struct udev *udev = udev_device_get_udev(device);
++    struct udev_device *video_device;
++    dev_t devnum;
++    int media_fd = -1;
++
++    const char *path = udev_device_get_devnode(device);
++    if (!path) {
++        request_err(dc, "%s: get media device devnode failed\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    media_fd = open(path, O_RDWR, 0);
++    if (media_fd < 0) {
++        ret = -errno;
++        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info);
++    if (rv < 0) {
++        ret = -errno;
++        request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
++    if (rv < 0) {
++        ret = -errno;
++        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    if (topology.num_interfaces <= 0) {
++        request_err(dc, "%s: media device has no interfaces\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    interfaces = calloc(topology.num_interfaces, sizeof(*interfaces));
++    if (!interfaces) {
++        request_err(dc, "%s: allocating media interface struct failed\n", __func__);
++        ret = -ENOMEM;
++        goto fail;
++    }
++
++    topology.ptr_interfaces = (__u64)(uintptr_t)interfaces;
++    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
++    if (rv < 0) {
++        ret = -errno;
++        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    for (int i = 0; i < topology.num_interfaces; i++) {
++        if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO)
++            continue;
++
++        devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor);
++        video_device = udev_device_new_from_devnum(udev, 'c', devnum);
++        if (!video_device) {
++            ret = -errno;
++            request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device);
++            continue;
++        }
++
++        ret = probe_video_device(dc, video_device, scan, path);
++        udev_device_unref(video_device);
++
++        if (ret != 0)
++            goto fail;
++    }
++
++fail:
++    free(interfaces);
++    if (media_fd != -1)
++        close(media_fd);
++    return ret;
++}
++
++const char *decdev_media_path(const struct decdev *const dev)
++{
++    return !dev ? NULL : dev->mname;
++}
++
++const char *decdev_video_path(const struct decdev *const dev)
++{
++    return !dev ? NULL : dev->vname;
++}
++
++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev)
++{
++    return !dev ? 0 : dev->src_type;
++}
++
++uint32_t decdev_src_pixelformat(const struct decdev *const dev)
++{
++    return !dev ? 0 : dev->src_fmt_v4l2;
++}
++
++
++const struct decdev *devscan_find(struct devscan *const scan,
++                  const uint32_t src_fmt_v4l2)
++{
++    unsigned int i;
++
++    if (scan->env.mname && scan->env.vname)
++        return &scan->env;
++
++    if (!src_fmt_v4l2)
++        return scan->dev_count ? scan->devs + 0 : NULL;
++
++    for (i = 0; i != scan->dev_count; ++i) {
++        if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2)
++            return scan->devs + i;
++    }
++    return NULL;
++}
++
++int devscan_build(void * const dc, struct devscan **pscan)
++{
++    int ret;
++    struct udev *udev;
++    struct udev_enumerate *enumerate;
++    struct udev_list_entry *devices;
++    struct udev_list_entry *entry;
++    struct udev_device *device;
++    struct devscan * scan;
++
++    *pscan = NULL;
++
++    scan = calloc(1, sizeof(*scan));
++    if (!scan) {
++        ret = -ENOMEM;
++        goto fail;
++    }
++
++    scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH");
++    scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH");
++    if (scan->env.mname && scan->env.vname) {
++        request_info(dc, "Media/video device env overrides found: %s,%s\n",
++                 scan->env.mname, scan->env.vname);
++        *pscan = scan;
++        return 0;
++    }
++
++    udev = udev_new();
++    if (!udev) {
++        request_err(dc, "%s: allocating udev context failed\n", __func__);
++        ret = -ENOMEM;
++        goto fail;
++    }
++
++    enumerate = udev_enumerate_new(udev);
++    if (!enumerate) {
++        request_err(dc, "%s: allocating udev enumerator failed\n", __func__);
++        ret = -ENOMEM;
++        goto fail;
++    }
++
++    udev_enumerate_add_match_subsystem(enumerate, "media");
++    udev_enumerate_scan_devices(enumerate);
++
++    devices = udev_enumerate_get_list_entry(enumerate);
++    udev_list_entry_foreach(entry, devices) {
++        const char *path = udev_list_entry_get_name(entry);
++        if (!path)
++            continue;
++
++        device = udev_device_new_from_syspath(udev, path);
++        if (!device)
++            continue;
++
++        probe_media_device(dc, device, scan);
++        udev_device_unref(device);
++    }
++
++    udev_enumerate_unref(enumerate);
++    udev_unref(udev);
++
++    *pscan = scan;
++    return 0;
++
++fail:
++    if (udev)
++        udev_unref(udev);
++    devscan_delete(&scan);
++    return ret;
++}
++
+diff --git a/libavcodec/v4l2_req_devscan.h b/libavcodec/v4l2_req_devscan.h
+new file mode 100644
+index 000000000000..49b5bb44b2d7
+--- /dev/null
++++ b/libavcodec/v4l2_req_devscan.h
+@@ -0,0 +1,47 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef AVCODEC_V4L2_REQ_DEVSCAN_H
++#define AVCODEC_V4L2_REQ_DEVSCAN_H
++
++#include <stdint.h>
++
++struct devscan;
++struct decdev;
++enum v4l2_buf_type;
++
++/* These return pointers to data in the devscan structure and so are vaild
++ * for the lifetime of that
++ */
++const char *decdev_media_path(const struct decdev *const dev);
++const char *decdev_video_path(const struct decdev *const dev);
++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev);
++uint32_t decdev_src_pixelformat(const struct decdev *const dev);
++
++const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2);
++
++int devscan_build(void * const dc, struct devscan **pscan);
++void devscan_delete(struct devscan **const pScan);
++
++#endif
+diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
+new file mode 100644
+index 000000000000..e157d4d55749
+--- /dev/null
++++ b/libavcodec/v4l2_req_dmabufs.c
+@@ -0,0 +1,433 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#include <stdatomic.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <inttypes.h>
++#include <fcntl.h>
++#include <errno.h>
++#include <string.h>
++#include <sys/ioctl.h>
++#include <sys/mman.h>
++#include <linux/mman.h>
++#include <linux/dma-buf.h>
++#include <linux/dma-heap.h>
++
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_utils.h"
++
++#define TRACE_ALLOC 0
++
++#ifndef __O_CLOEXEC
++#define __O_CLOEXEC 0
++#endif
++
++struct dmabufs_ctl;
++struct dmabuf_h;
++
++struct dmabuf_fns {
++    int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size);
++    void (*buf_free)(struct dmabuf_h * dh);
++    int (*ctl_new)(struct dmabufs_ctl * dbsc);
++    void (*ctl_free)(struct dmabufs_ctl * dbsc);
++};
++
++struct dmabufs_ctl {
++    atomic_int ref_count;
++    int fd;
++    size_t page_size;
++    void * v;
++    const struct dmabuf_fns * fns;
++};
++
++struct dmabuf_h {
++    int fd;
++    size_t size;
++    size_t len;
++    void * mapptr;
++    void * v;
++    const struct dmabuf_fns * fns;
++};
++
++#if TRACE_ALLOC
++static unsigned int total_bufs = 0;
++static size_t total_size = 0;
++#endif
++
 +struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size)
 +{
 +    struct dmabuf_h *dh;
@@ -25269,52 +8006,2114 @@ index ae6c64836972..c4bbed18c680 100644
 +    return dh;
 +}
 +
- struct dmabuf_h * dmabuf_import(int fd, size_t size)
- {
-     struct dmabuf_h *dh;
-@@ -122,6 +142,8 @@ int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags)
-     struct dma_buf_sync sync = {
-         .flags = flags
-     };
++struct dmabuf_h * dmabuf_import(int fd, size_t size)
++{
++    struct dmabuf_h *dh;
++
++    fd = dup(fd);
++    if (fd < 0  || size == 0)
++        return NULL;
++
++    dh = malloc(sizeof(*dh));
++    if (!dh) {
++        close(fd);
++        return NULL;
++    }
++
++    *dh = (struct dmabuf_h) {
++        .fd = fd,
++        .size = size,
++        .mapptr = MAP_FAILED
++    };
++
++#if TRACE_ALLOC
++    ++total_bufs;
++    total_size += dh->size;
++    request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++    return dh;
++}
++
++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size)
++{
++    struct dmabuf_h * dh;
++    if (old != NULL) {
++        if (old->size >= size) {
++            return old;
++        }
++        dmabuf_free(old);
++    }
++
++    if (size == 0 ||
++        (dh = malloc(sizeof(*dh))) == NULL)
++        return NULL;
++
++    *dh = (struct dmabuf_h){
++        .fd = -1,
++        .mapptr = MAP_FAILED,
++        .fns = dbsc->fns
++    };
++
++    if (dh->fns->buf_alloc(dbsc, dh, size) != 0)
++        goto fail;
++
++
++#if TRACE_ALLOC
++    ++total_bufs;
++    total_size += dh->size;
++    request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++    return dh;
++
++fail:
++    free(dh);
++    return NULL;
++}
++
++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags)
++{
++    struct dma_buf_sync sync = {
++        .flags = flags
++    };
 +    if (dh->fd == -1)
 +        return 0;
-     while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
-         const int err = errno;
-         if (errno == EINTR)
++    while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
++        const int err = errno;
++        if (errno == EINTR)
++            continue;
++        request_log("%s: ioctl failed: flags=%#x\n", __func__, flags);
++        return -err;
++    }
++    return 0;
++}
++
++int dmabuf_write_start(struct dmabuf_h * const dh)
++{
++    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
++}
++
++int dmabuf_write_end(struct dmabuf_h * const dh)
++{
++    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
++}
++
++int dmabuf_read_start(struct dmabuf_h * const dh)
++{
++    if (!dmabuf_map(dh))
++        return -1;
++    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
++}
++
++int dmabuf_read_end(struct dmabuf_h * const dh)
++{
++    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
++}
++
++
++void * dmabuf_map(struct dmabuf_h * const dh)
++{
++    if (!dh)
++        return NULL;
++    if (dh->mapptr != MAP_FAILED)
++        return dh->mapptr;
++    dh->mapptr = mmap(NULL, dh->size,
++              PROT_READ | PROT_WRITE,
++              MAP_SHARED | MAP_POPULATE,
++              dh->fd, 0);
++    if (dh->mapptr == MAP_FAILED) {
++        request_log("%s: Map failed\n", __func__);
++        return NULL;
++    }
++    return dh->mapptr;
++}
++
++int dmabuf_fd(const struct dmabuf_h * const dh)
++{
++    if (!dh)
++        return -1;
++    return dh->fd;
++}
++
++size_t dmabuf_size(const struct dmabuf_h * const dh)
++{
++    if (!dh)
++        return 0;
++    return dh->size;
++}
++
++size_t dmabuf_len(const struct dmabuf_h * const dh)
++{
++    if (!dh)
++        return 0;
++    return dh->len;
++}
++
++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len)
++{
++    dh->len = len;
++}
++
++void dmabuf_free(struct dmabuf_h * dh)
++{
++    if (!dh)
++        return;
++
++#if TRACE_ALLOC
++    --total_bufs;
++    total_size -= dh->size;
++    request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++    if (dh->fns != NULL && dh->fns->buf_free)
++        dh->fns->buf_free(dh);
++
++    if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL)
++        munmap(dh->mapptr, dh->size);
++    if (dh->fd != -1)
++        while (close(dh->fd) == -1 && errno == EINTR)
++            /* loop */;
++    free(dh);
++}
++
++static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns)
++{
++    struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc));
++
++    if (!dbsc)
++        return NULL;
++
++    dbsc->fd = -1;
++    dbsc->fns = fns;
++    dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
++
++    if (fns->ctl_new(dbsc) != 0)
++        goto fail;
++
++    return dbsc;
++
++fail:
++    free(dbsc);
++    return NULL;
++}
++
++static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc)
++{
++    request_debug(NULL, "Free dmabuf ctl\n");
++
++    dbsc->fns->ctl_free(dbsc);
++
++    free(dbsc);
++}
++
++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc)
++{
++    struct dmabufs_ctl * const dbsc = *pDbsc;
++
++    if (!dbsc)
++        return;
++    *pDbsc = NULL;
++
++    if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0)
++        return;
++
++    dmabufs_ctl_free(dbsc);
++}
++
++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc)
++{
++    atomic_fetch_add(&dbsc->ref_count, 1);
++    return dbsc;
++}
++
++//-----------------------------------------------------------------------------
++//
++// Alloc dmabuf via CMA
++
++static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names)
++{
++    for (; *names != NULL; ++names)
++    {
++        while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 &&
++               errno == EINTR)
++            /* Loop */;
++        if (dbsc->fd != -1)
++        {
++            request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names);
++            return 0;
++        }
++        request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno));
++    }
++    request_log("Unable to open any dma_heap device\n");
++    return -1;
++}
++
++static int ctl_cma_new(struct dmabufs_ctl * dbsc)
++{
++    static const char * const names[] = {
++        "/dev/dma_heap/linux,cma",
++        "/dev/dma_heap/reserved",
++        NULL
++    };
++
++    return ctl_cma_new2(dbsc, names);
++}
++
++static void ctl_cma_free(struct dmabufs_ctl * dbsc)
++{
++    if (dbsc->fd != -1)
++        while (close(dbsc->fd) == -1 && errno == EINTR)
++            /* loop */;
++}
++
++static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size)
++{
++    struct dma_heap_allocation_data data = {
++        .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
++        .fd = 0,
++        .fd_flags = O_RDWR,
++        .heap_flags = 0
++    };
++
++    while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
++        int err = errno;
++        request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
++                (uint64_t)data.len,
++                dbsc->fd,
++                err,
++                strerror(err));
++        if (err == EINTR)
++            continue;
++        return -err;
++    }
++
++    dh->fd = data.fd;
++    dh->size = (size_t)data.len;
++
++//    fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__,
++//            dh->size, (size_t)lseek(dh->fd, 0, SEEK_END));
++
++    return 0;
++}
++
++static void buf_cma_free(struct dmabuf_h * dh)
++{
++    // Nothing needed
++}
++
++static const struct dmabuf_fns dmabuf_cma_fns = {
++    .buf_alloc  = buf_cma_alloc,
++    .buf_free   = buf_cma_free,
++    .ctl_new    = ctl_cma_new,
++    .ctl_free   = ctl_cma_free,
++};
++
++struct dmabufs_ctl * dmabufs_ctl_new(void)
++{
++    request_debug(NULL, "Dmabufs using CMA\n");
++    return dmabufs_ctl_new2(&dmabuf_cma_fns);
++}
++
++static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc)
++{
++    static const char * const names[] = {
++        "/dev/dma_heap/vidbuf_cached",
++        "/dev/dma_heap/linux,cma",
++        "/dev/dma_heap/reserved",
++        NULL
++    };
++
++    return ctl_cma_new2(dbsc, names);
++}
++
++static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = {
++    .buf_alloc  = buf_cma_alloc,
++    .buf_free   = buf_cma_free,
++    .ctl_new    = ctl_cma_new_vidbuf_cached,
++    .ctl_free   = ctl_cma_free,
++};
++
++struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void)
++{
++    request_debug(NULL, "Dmabufs using Vidbuf\n");
++    return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns);
++}
++
 diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h
-index cfb17e801d59..c1d3d8c8d751 100644
---- a/libavcodec/v4l2_req_dmabufs.h
+new file mode 100644
+index 000000000000..9226ab2498a5
+--- /dev/null
 +++ b/libavcodec/v4l2_req_dmabufs.h
-@@ -18,6 +18,9 @@ static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t s
- }
- /* Create from existing fd - dups(fd) */
- struct dmabuf_h * dmabuf_import(int fd, size_t size);
+@@ -0,0 +1,69 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef AVCODEC_V4L2_REQ_DMABUFS_H
++#define AVCODEC_V4L2_REQ_DMABUFS_H
++
++#include <stddef.h>
++
++struct dmabufs_ctl;
++struct dmabuf_h;
++
++struct dmabufs_ctl * dmabufs_ctl_new(void);
++struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void);
++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc);
++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc);
++
++// Need not preserve old contents
++// On NULL return old buffer is freed
++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size);
++
++static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) {
++    return dmabuf_realloc(dbsc, NULL, size);
++}
++/* Create from existing fd - dups(fd) */
++struct dmabuf_h * dmabuf_import(int fd, size_t size);
 +/* Import an MMAP - return NULL if mapptr = MAP_FAIL */
 +struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size);
 +
- void * dmabuf_map(struct dmabuf_h * const dh);
- 
- /* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */
++void * dmabuf_map(struct dmabuf_h * const dh);
++
++/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */
++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags);
++
++int dmabuf_write_start(struct dmabuf_h * const dh);
++int dmabuf_write_end(struct dmabuf_h * const dh);
++int dmabuf_read_start(struct dmabuf_h * const dh);
++int dmabuf_read_end(struct dmabuf_h * const dh);
++
++int dmabuf_fd(const struct dmabuf_h * const dh);
++/* Allocated size */
++size_t dmabuf_size(const struct dmabuf_h * const dh);
++/* Bytes in use */
++size_t dmabuf_len(const struct dmabuf_h * const dh);
++/* Set bytes in use */
++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len);
++void dmabuf_free(struct dmabuf_h * dh);
++
++#endif
+diff --git a/libavcodec/v4l2_req_hevc_v1.c b/libavcodec/v4l2_req_hevc_v1.c
+new file mode 100644
+index 000000000000..4d908dac126e
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v1.c
+@@ -0,0 +1,27 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#define HEVC_CTRLS_VERSION 1
++#include "v4l2_req_hevc_vx.c"
++
+diff --git a/libavcodec/v4l2_req_hevc_v2.c b/libavcodec/v4l2_req_hevc_v2.c
+new file mode 100644
+index 000000000000..1874e20a5d8d
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v2.c
+@@ -0,0 +1,27 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#define HEVC_CTRLS_VERSION 2
++#include "v4l2_req_hevc_vx.c"
++
+diff --git a/libavcodec/v4l2_req_hevc_v3.c b/libavcodec/v4l2_req_hevc_v3.c
+new file mode 100644
+index 000000000000..1c5a84bea8d1
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v3.c
+@@ -0,0 +1,27 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#define HEVC_CTRLS_VERSION 3
++#include "v4l2_req_hevc_vx.c"
++
+diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c
+new file mode 100644
+index 000000000000..472df7cb0e39
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v4.c
+@@ -0,0 +1,27 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#define HEVC_CTRLS_VERSION 4
++#include "v4l2_req_hevc_vx.c"
++
+diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
+new file mode 100644
+index 000000000000..bb7535a49201
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_vx.c
+@@ -0,0 +1,1454 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++// File included by v4l2_req_hevc_v* - not compiled on its own
++
++#include "decode.h"
++#include "hevc/hevcdec.h"
++#include "hwconfig.h"
++#include "internal.h"
++#include "thread.h"
++
++#include "libavutil/mem.h"
++
++#if HEVC_CTRLS_VERSION == 1
++#include "hevc-ctrls-v1.h"
++
++// Fixup renamed entries
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT
++
++#elif HEVC_CTRLS_VERSION == 2
++#include "hevc-ctrls-v2.h"
++#elif HEVC_CTRLS_VERSION == 3
++#include "hevc-ctrls-v3.h"
++#elif HEVC_CTRLS_VERSION == 4
++#include <linux/v4l2-controls.h>
++#if !defined(V4L2_CID_STATELESS_HEVC_SPS)
++#include "hevc-ctrls-v4.h"
++#endif
++#else
++#error Unknown HEVC_CTRLS_VERSION
++#endif
++
++#ifndef V4L2_CID_STATELESS_HEVC_SPS
++#define V4L2_CID_STATELESS_HEVC_SPS                     V4L2_CID_MPEG_VIDEO_HEVC_SPS
++#define V4L2_CID_STATELESS_HEVC_PPS                     V4L2_CID_MPEG_VIDEO_HEVC_PPS
++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS            V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS
++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX          V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX
++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS           V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS
++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE             V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE
++#define V4L2_CID_STATELESS_HEVC_START_CODE              V4L2_CID_MPEG_VIDEO_HEVC_START_CODE
++
++#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED
++#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED     V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED
++#define V4L2_STATELESS_HEVC_START_CODE_NONE             V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE
++#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B          V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B
++#endif
++
++#include "v4l2_request_hevc.h"
++
++#include "libavutil/hwcontext_drm.h"
++
++#include <semaphore.h>
++#include <pthread.h>
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_utils.h"
++
++// Attached to buf[0] in frame
++// Pooled in hwcontext so generally create once - 1/frame
++typedef struct V4L2MediaReqDescriptor {
++    AVDRMFrameDescriptor drm;
++
++    // Media
++    uint64_t timestamp;
++    struct qent_dst * qe_dst;
++
++    // Refs to source frames
++    AVBufferRef * refs[18]; // 16 + 1 + 1
++
++    // Decode only - should be NULL by the time we emit the frame
++    struct req_decode_ent decode_ent;
++
++    struct media_request *req;
++    struct qent_src *qe_src;
++
++#if HEVC_CTRLS_VERSION >= 2
++    struct v4l2_ctrl_hevc_decode_params dec;
++#endif
++
++    size_t num_slices;
++    size_t alloced_slices;
++    struct v4l2_ctrl_hevc_slice_params * slice_params;
++    struct slice_info * slices;
++
++    size_t num_offsets;
++    size_t alloced_offsets;
++    uint32_t *offsets;
++
++} V4L2MediaReqDescriptor;
++
++struct slice_info {
++    const uint8_t * ptr;
++    size_t len; // bytes
++    size_t n_offsets;
++};
++
++// Handy container for accumulating controls before setting
++struct req_controls {
++    int has_scaling;
++    struct timeval tv;
++    struct v4l2_ctrl_hevc_sps sps;
++    struct v4l2_ctrl_hevc_pps pps;
++    struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
++};
++
++//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 };
++
++
++// Get an FFmpeg format from the v4l2 format
++static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format)
++{
++    switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
++            format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) {
++    case V4L2_PIX_FMT_YUV420:
++        return AV_PIX_FMT_YUV420P;
++    case V4L2_PIX_FMT_NV12:
++        return AV_PIX_FMT_NV12;
++#if CONFIG_SAND
++    case V4L2_PIX_FMT_NV12_COL128:
++        return AV_PIX_FMT_RPI4_8;
++    case V4L2_PIX_FMT_NV12_10_COL128:
++        return AV_PIX_FMT_RPI4_10;
++#endif
++    default:
++        break;
++    }
++    return AV_PIX_FMT_NONE;
++}
++
++static inline uint64_t frame_capture_dpb(const AVFrame * const frame)
++{
++    const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
++    return rd->timestamp;
++}
++
++static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp)
++{
++    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
++    rd->timestamp = dpb_stamp;
++}
++
++static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table)
++{
++    int32_t luma_weight_denom, chroma_weight_denom;
++    const SliceHeader * const sh = &h->sh;
++    const HEVCPPS * const pps = h->pps;
++    const HEVCSPS * const sps = pps->sps;
++
++    if (sh->slice_type == HEVC_SLICE_I ||
++        (sh->slice_type == HEVC_SLICE_P && !pps->weighted_pred_flag) ||
++        (sh->slice_type == HEVC_SLICE_B && !pps->weighted_bipred_flag))
++        return;
++
++    table->luma_log2_weight_denom = sh->luma_log2_weight_denom;
++
++    if (sps->chroma_format_idc)
++        table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom;
++
++    luma_weight_denom = (1 << sh->luma_log2_weight_denom);
++    chroma_weight_denom = (1 << sh->chroma_log2_weight_denom);
++
++    for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) {
++        table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom;
++        table->luma_offset_l0[i] = sh->luma_offset_l0[i];
++        table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom;
++        table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom;
++        table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0];
++        table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1];
++    }
++
++    if (sh->slice_type != HEVC_SLICE_B)
++        return;
++
++    for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) {
++        table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom;
++        table->luma_offset_l1[i] = sh->luma_offset_l1[i];
++        table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom;
++        table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom;
++        table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0];
++        table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1];
++    }
++}
++
++#if HEVC_CTRLS_VERSION <= 2
++static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp)
++{
++    const HEVCFrame *frame;
++    int i;
++
++    for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
++        frame = h->rps[ST_CURR_BEF].ref[i];
++        if (frame && timestamp == frame_capture_dpb(frame->f))
++            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE;
++    }
++
++    for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
++        frame = h->rps[ST_CURR_AFT].ref[i];
++        if (frame && timestamp == frame_capture_dpb(frame->f))
++            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER;
++    }
++
++    for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
++        frame = h->rps[LT_CURR].ref[i];
++        if (frame && timestamp == frame_capture_dpb(frame->f))
++            return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR;
++    }
++
++    return 0;
++}
++#endif
++
++static unsigned int
++get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame,
++                  const struct v4l2_hevc_dpb_entry * const entries,
++                  const unsigned int num_entries)
++{
++    uint64_t timestamp;
++
++    if (!frame)
++        return 0;
++
++    timestamp = frame_capture_dpb(frame->f);
++
++    for (unsigned int i = 0; i < num_entries; i++) {
++        if (entries[i].timestamp == timestamp)
++            return i;
++    }
++
++    return 0;
++}
++
++static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx)
++{
++    unsigned int z = 0;
++    while (idx--) {
++        if (*b++ == 0) {
++            ++z;
++            if (z >= 2 && *b == 3) {
++                ++b;
++                z = 0;
++            }
++        }
++        else {
++            z = 0;
++        }
++    }
++    return b;
++}
++
++static int slice_add(V4L2MediaReqDescriptor * const rd)
++{
++    if (rd->num_slices >= rd->alloced_slices) {
++        size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2;
++
++        if (av_reallocp_array(&rd->slice_params, n2, sizeof(*rd->slice_params)))
++            goto fail;
++        if (av_reallocp_array(&rd->slices, n2, sizeof(*rd->slices)))
++            goto fail;
++        rd->alloced_slices = n2;
++    }
++    ++rd->num_slices;
++    return 0;
++
++fail:
++    av_freep(&rd->slices);
++    rd->alloced_slices = 0;
++    rd->num_slices = 0;
++    return AVERROR(ENOMEM);
++}
++
++static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets)
++{
++    if (rd->num_offsets + n > rd->alloced_offsets) {
++        size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2;
++        void * p2;
++        while (rd->num_offsets + n > n2)
++            n2 *= 2;
++        if (av_reallocp_array(&rd->offsets, n2, sizeof(*rd->offsets))) {
++            rd->alloced_offsets = 0;
++            rd->num_offsets = 0;
++            return AVERROR(ENOMEM);
++        }
++        rd->offsets = p2;
++        rd->alloced_offsets = n2;
++    }
++    for (size_t i = 0; i != n; ++i)
++        rd->offsets[rd->num_offsets++] = offsets[i] - 1;
++    return 0;
++}
++
++static unsigned int
++fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries)
++{
++    unsigned int i;
++    unsigned int n = 0;
++    const HEVCFrame * const pic = h->cur_frame;
++    const HEVCLayerContext * const layer = &h->layers[h->cur_layer];
++
++    for (i = 0; i < FF_ARRAY_ELEMS(layer->DPB); i++) {
++        const HEVCFrame * const frame = &layer->DPB[i];
++        if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) {
++            struct v4l2_hevc_dpb_entry * const entry = entries + n++;
++
++            entry->timestamp = frame_capture_dpb(frame->f);
++#if HEVC_CTRLS_VERSION <= 2
++            entry->rps = find_frame_rps_type(h, entry->timestamp);
++#else
++            entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 :
++                V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE;
++#endif
++            entry->field_pic = (frame->f->flags & AV_FRAME_FLAG_INTERLACED) != 0;
++
++#if HEVC_CTRLS_VERSION <= 3
++            /* TODO: Interleaved: Get the POC for each field. */
++            entry->pic_order_cnt[0] = frame->poc;
++            entry->pic_order_cnt[1] = frame->poc;
++#else
++            entry->pic_order_cnt_val = frame->poc;
++#endif
++        }
++    }
++    return n;
++}
++
++static void fill_slice_params(const HEVCContext * const h,
++#if HEVC_CTRLS_VERSION >= 2
++                              const struct v4l2_ctrl_hevc_decode_params * const dec,
++#endif
++                              struct v4l2_ctrl_hevc_slice_params *slice_params,
++                              uint32_t bit_size, uint32_t bit_offset)
++{
++    const SliceHeader * const sh = &h->sh;
++#if HEVC_CTRLS_VERSION >= 2
++    const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb;
++    const unsigned int dpb_n = dec->num_active_dpb_entries;
++#else
++    struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb;
++    unsigned int dpb_n;
++#endif
++    unsigned int i;
++    RefPicList *rpl;
++
++    *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
++        .bit_size = bit_size,
++#if HEVC_CTRLS_VERSION <= 3
++        .data_bit_offset = bit_offset,
++#else
++        .data_byte_offset = bit_offset / 8 + 1,
++#endif
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++        .slice_segment_addr = sh->slice_segment_addr,
++
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++        .nal_unit_type = h->nal_unit_type,
++        .nuh_temporal_id_plus1 = h->temporal_id + 1,
++
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++        .slice_type = sh->slice_type,
++        .colour_plane_id = sh->colour_plane_id,
++        .slice_pic_order_cnt = h->cur_frame->poc,
++        .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0,
++        .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0,
++        .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0,
++        .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand,
++        .slice_qp_delta = sh->slice_qp_delta,
++        .slice_cb_qp_offset = sh->slice_cb_qp_offset,
++        .slice_cr_qp_offset = sh->slice_cr_qp_offset,
++        .slice_act_y_qp_offset = 0,
++        .slice_act_cb_qp_offset = 0,
++        .slice_act_cr_qp_offset = 0,
++        .slice_beta_offset_div2 = sh->beta_offset / 2,
++        .slice_tc_offset_div2 = sh->tc_offset / 2,
++
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++        .pic_struct = h->sei.picture_timing.picture_struct,
++
++#if HEVC_CTRLS_VERSION < 2
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++        .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
++        .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
++        .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs,
++#endif
++    };
++
++    if (sh->slice_sample_adaptive_offset_flag[0])
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
++
++    if (sh->slice_sample_adaptive_offset_flag[1])
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
++
++    if (sh->slice_temporal_mvp_enabled_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
++
++    if (sh->mvd_l1_zero_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
++
++    if (sh->cabac_init_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
++
++    if (sh->collocated_list == L0)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
++
++    if (sh->disable_deblocking_filter_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
++
++    if (sh->slice_loop_filter_across_slices_enabled_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
++
++    if (sh->dependent_slice_segment_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
++
++#if HEVC_CTRLS_VERSION < 2
++    dpb_n = fill_dpb_entries(h, dpb);
++    slice_params->num_active_dpb_entries = dpb_n;
++#endif
++
++    if (sh->slice_type != HEVC_SLICE_I) {
++        rpl = &h->cur_frame->refPicList[0];
++        for (i = 0; i < rpl->nb_refs; i++)
++            slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
++    }
++
++    if (sh->slice_type == HEVC_SLICE_B) {
++        rpl = &h->cur_frame->refPicList[1];
++        for (i = 0; i < rpl->nb_refs; i++)
++            slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
++    }
++
++    fill_pred_table(h, &slice_params->pred_weight_table);
++
++    slice_params->num_entry_point_offsets = sh->num_entry_point_offsets;
++#if HEVC_CTRLS_VERSION <= 3
++    if (slice_params->num_entry_point_offsets > 256) {
++        slice_params->num_entry_point_offsets = 256;
++        av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
++    }
++
++    for (i = 0; i < slice_params->num_entry_point_offsets; i++)
++        slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
++#endif
++}
++
++#if HEVC_CTRLS_VERSION >= 2
++static void
++fill_decode_params(const HEVCContext * const h,
++                   struct v4l2_ctrl_hevc_decode_params * const dec)
++{
++    unsigned int i;
++
++    *dec = (struct v4l2_ctrl_hevc_decode_params){
++        .pic_order_cnt_val = h->poc,
++        .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
++        .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
++        .num_poc_lt_curr = h->rps[LT_CURR].nb_refs,
++    };
++
++    dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb);
++
++    // The docn does seem to ask that we fit our 32 bit signed POC into
++    // a U8 so... (To be fair 16 bits would be enough)
++    // Luckily we (Pi) don't use these fields
++    for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i)
++        dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc;
++    for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i)
++        dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc;
++    for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i)
++        dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc;
++
++    if (IS_IRAP(h))
++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
++    if (IS_IDR(h))
++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
++    if (h->sh.no_output_of_prior_pics_flag)
++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR;
++
++}
++#endif
++
++static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps)
++{
++    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++    *ctrl = (struct v4l2_ctrl_hevc_sps) {
++        .chroma_format_idc = sps->chroma_format_idc,
++        .pic_width_in_luma_samples = sps->width,
++        .pic_height_in_luma_samples = sps->height,
++        .bit_depth_luma_minus8 = sps->bit_depth - 8,
++        .bit_depth_chroma_minus8 = sps->bit_depth - 8,
++        .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
++        .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1,
++        .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
++        .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1,
++        .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
++        .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
++        .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
++        .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size,
++        .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
++        .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
++        .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
++        .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
++        .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
++        .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
++        .num_short_term_ref_pic_sets = sps->nb_st_rps,
++        .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
++        .chroma_format_idc = sps->chroma_format_idc,
++        .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
++    };
++
++    if (sps->separate_colour_plane)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
++
++    if (sps->scaling_list_enabled)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
++
++    if (sps->amp_enabled)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
++
++    if (sps->sao_enabled)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
++
++    if (sps->pcm_enabled)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
++
++    if (sps->pcm_loop_filter_disabled)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
++
++    if (sps->long_term_ref_pics_present)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
++
++    if (sps->temporal_mvp_enabled)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
++
++    if (sps->strong_intra_smoothing_enabled)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
++}
++
++static void fill_scaling_matrix(const ScalingList * const sl,
++                                struct v4l2_ctrl_hevc_scaling_matrix * const sm)
++{
++    unsigned int i;
++
++    for (i = 0; i < 6; i++) {
++        unsigned int j;
++
++        for (j = 0; j < 16; j++)
++            sm->scaling_list_4x4[i][j] = sl->sl[0][i][j];
++        for (j = 0; j < 64; j++) {
++            sm->scaling_list_8x8[i][j]   = sl->sl[1][i][j];
++            sm->scaling_list_16x16[i][j] = sl->sl[2][i][j];
++            if (i < 2)
++                sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j];
++        }
++        sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i];
++        if (i < 2)
++            sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3];
++    }
++}
++
++static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps)
++{
++    uint64_t flags = 0;
++
++    if (pps->dependent_slice_segments_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
++
++    if (pps->output_flag_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
++
++    if (pps->sign_data_hiding_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
++
++    if (pps->cabac_init_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
++
++    if (pps->constrained_intra_pred_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
++
++    if (pps->transform_skip_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
++
++    if (pps->cu_qp_delta_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
++
++    if (pps->pic_slice_level_chroma_qp_offsets_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
++
++    if (pps->weighted_pred_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
++
++    if (pps->weighted_bipred_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
++
++    if (pps->transquant_bypass_enable_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
++
++    if (pps->tiles_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
++
++    if (pps->entropy_coding_sync_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
++
++    if (pps->loop_filter_across_tiles_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
++
++    if (pps->seq_loop_filter_across_slices_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
++
++    if (pps->deblocking_filter_override_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
++
++    if (pps->disable_dbf)
++        flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
++
++    if (pps->lists_modification_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
++
++    if (pps->slice_header_extension_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT;
++
++    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++    *ctrl = (struct v4l2_ctrl_hevc_pps) {
++        .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
++        .init_qp_minus26 = pps->pic_init_qp_minus26,
++        .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
++        .pps_cb_qp_offset = pps->cb_qp_offset,
++        .pps_cr_qp_offset = pps->cr_qp_offset,
++        .pps_beta_offset_div2 = pps->beta_offset / 2,
++        .pps_tc_offset_div2 = pps->tc_offset / 2,
++        .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
++        .flags = flags
++    };
++
++
++    if (pps->tiles_enabled_flag) {
++        ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1;
++        ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1;
++
++        for (int i = 0; i < pps->num_tile_columns; i++)
++            ctrl->column_width_minus1[i] = pps->column_width[i] - 1;
++
++        for (int i = 0; i < pps->num_tile_rows; i++)
++            ctrl->row_height_minus1[i] = pps->row_height[i] - 1;
++    }
++}
++
++static int frame_finish(V4L2MediaReqDescriptor * const rd)
++{
++    int rv = 0;
++
++    if (rd->qe_dst) {
++        MediaBufsStatus stat = qent_dst_wait(rd->qe_dst);
++        if (stat != MEDIABUFS_STATUS_SUCCESS)
++            rv = -1;
++    }
++
++    {
++        AVBufferRef **p = rd->refs;
++        for (; *p != NULL; ++p)
++            av_buffer_unref(p);
++    }
++
++    return rv;
++}
++
++// Called before finally returning the frame to the user
++// Set corrupt flag here as this is actually the frame structure that
++// is going to the user (in MT land each thread has its own pool)
++static int frame_post_process(void *logctx, AVFrame *frame)
++{
++    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0];
++
++    fprintf(stderr, "<<< %s\n", __func__);
++
++//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
++    frame->flags &= ~AV_FRAME_FLAG_CORRUPT;
++    if (frame_finish(rd) != 0) {
++        av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__);
++        frame->flags |= AV_FRAME_FLAG_CORRUPT;
++    }
++    return 0;
++}
++
++static inline struct timeval cvt_dpb_to_tv(uint64_t t)
++{
++    t /= 1000;
++    return (struct timeval){
++        .tv_usec = t % 1000000,
++        .tv_sec = t / 1000000
++    };
++}
++
++static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t)
++{
++    return (uint64_t)t * 1000;
++}
++
++static int v4l2_request_hevc_start_frame(AVCodecContext *avctx,
++                                         V4L2RequestContextHEVC *const ctx,
++                                         av_unused const uint8_t *buffer,
++                                         av_unused uint32_t size)
++{
++    const HEVCContext *h = avctx->priv_data;
++    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->cur_frame->f->data[0];
++    static int z = 0;
++
++    fprintf(stderr, "<<< %s: %d\n", __func__, ++z);
++//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
++    decode_q_add(&ctx->decode_q, &rd->decode_ent);
++
++    rd->num_slices = 0;
++    ctx->timestamp++;
++    rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp);
++
++    {
++        FrameDecodeData * const fdd = (FrameDecodeData*)h->cur_frame->f->private_ref->data;
++        fdd->post_process = frame_post_process;
++    }
++
++    // qe_dst needs to be bound to the data buffer and only returned when that is
++    if (!rd->qe_dst)
++    {
++        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
++            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
++            return AVERROR(ENOMEM);
++        }
++    }
++
++    // ff_thread_finish_setup by caller
++
++    return 0;
++}
++
++// Object fd & size will be zapped by this & need setting later
++static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format)
++{
++    AVDRMLayerDescriptor *layer = &desc->layers[0];
++    unsigned int width;
++    unsigned int height;
++    unsigned int bpl;
++    uint32_t pixelformat;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
++        width       = format->fmt.pix_mp.width;
++        height      = format->fmt.pix_mp.height;
++        pixelformat = format->fmt.pix_mp.pixelformat;
++        bpl         = format->fmt.pix_mp.plane_fmt[0].bytesperline;
++    }
++    else {
++        width       = format->fmt.pix.width;
++        height      = format->fmt.pix.height;
++        pixelformat = format->fmt.pix.pixelformat;
++        bpl         = format->fmt.pix.bytesperline;
++    }
++
++    switch (pixelformat) {
++    case V4L2_PIX_FMT_NV12:
++        layer->format = DRM_FORMAT_NV12;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        break;
++#if CONFIG_SAND
++    case V4L2_PIX_FMT_NV12_COL128:
++        layer->format = DRM_FORMAT_NV12;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
++        break;
++    case V4L2_PIX_FMT_NV12_10_COL128:
++        layer->format = DRM_FORMAT_P030;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
++        break;
++#endif
++#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED
++    case V4L2_PIX_FMT_SUNXI_TILED_NV12:
++        layer->format = DRM_FORMAT_NV12;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED;
++        break;
++#endif
++#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15)
++    case V4L2_PIX_FMT_NV15:
++        layer->format = DRM_FORMAT_NV15;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        break;
++#endif
++    case V4L2_PIX_FMT_NV16:
++        layer->format = DRM_FORMAT_NV16;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        break;
++#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20)
++    case V4L2_PIX_FMT_NV20:
++        layer->format = DRM_FORMAT_NV20;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        break;
++#endif
++    default:
++        return -1;
++    }
++
++    desc->nb_objects = 1;
++    desc->objects[0].fd = -1;
++    desc->objects[0].size = 0;
++
++    desc->nb_layers = 1;
++    layer->nb_planes = 2;
++
++    layer->planes[0].object_index = 0;
++    layer->planes[0].offset = 0;
++    layer->planes[0].pitch = bpl;
++#if CONFIG_SAND
++    if (pixelformat == V4L2_PIX_FMT_NV12_COL128) {
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = height * 128;
++        layer->planes[0].pitch = width;
++        layer->planes[1].pitch = width;
++    }
++    else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = height * 128;
++        layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy
++        layer->planes[1].pitch = width * 2;
++    }
++    else
++#endif
++    {
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = layer->planes[0].pitch * height;
++        layer->planes[1].pitch = layer->planes[0].pitch;
++    }
++
++    return 0;
++}
++
++static int
++set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
++    struct req_controls *const controls,
++#if HEVC_CTRLS_VERSION >= 2
++    struct v4l2_ctrl_hevc_decode_params * const dec,
++#endif
++    struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count,
++    void * const offsets, const size_t offset_count)
++{
++    int rv;
++#if HEVC_CTRLS_VERSION >= 2
++    unsigned int n = 3;
++#else
++    unsigned int n = 2;
++#endif
++
++    struct v4l2_ext_control control[6] = {
++        {
++            .id = V4L2_CID_STATELESS_HEVC_SPS,
++            .ptr = &controls->sps,
++            .size = sizeof(controls->sps),
++        },
++        {
++            .id = V4L2_CID_STATELESS_HEVC_PPS,
++            .ptr = &controls->pps,
++            .size = sizeof(controls->pps),
++        },
++#if HEVC_CTRLS_VERSION >= 2
++        {
++            .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS,
++            .ptr = dec,
++            .size = sizeof(*dec),
++        },
++#endif
++    };
++
++    if (slices)
++        control[n++] = (struct v4l2_ext_control) {
++            .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS,
++            .ptr = slices,
++            .size = sizeof(*slices) * slice_count,
++        };
++
++    if (controls->has_scaling)
++        control[n++] = (struct v4l2_ext_control) {
++            .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX,
++            .ptr = &controls->scaling_matrix,
++            .size = sizeof(controls->scaling_matrix),
++        };
++
++#if HEVC_CTRLS_VERSION >= 4
++    if (offsets)
++        control[n++] = (struct v4l2_ext_control) {
++            .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS,
++            .ptr = offsets,
++            .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count,
++        };
++#endif
++
++    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n);
++
++    return rv;
++}
++
++static void
++add_ref_once(V4L2MediaReqDescriptor * const rd, struct HEVCFrame * const ref)
++{
++    AVBufferRef **p = rd->refs;
++    int i = 0;
++    while (*p != NULL) {
++        if (ref->f->buf[0]->data == (*p)->data)
++            return;
++        ++p;
++        av_assert0(++i < 16);
++    }
++    *p = av_buffer_ref(ref->f->buf[0]);
++}
++
++// This only works because we started out from a single coded frame buffer
++// that will remain intact until after end_frame
++static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, const uint8_t *buffer, uint32_t size)
++{
++    const HEVCContext * const h = avctx->priv_data;
++    const SliceHeader * const sh = &h->sh;
++    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->cur_frame->f->data[0];
++    uint32_t boff = (ptr_from_index(buffer, sh->data_offset) - buffer) * 8 - 1;
++
++    const unsigned int n = rd->num_slices;
++    const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices;
++
++    int rv;
++    struct slice_info * si;
++
++    fprintf(stderr, "<<< %s: boff=%u\n", __func__, boff);
++    // This looks dodgy but we know that FFmpeg has parsed this from a buffer
++    // that contains the entire frame including the start code
++    if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) {
++        buffer -= 3;
++        size += 3;
++        boff += 24;
++        if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) {
++            av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n",
++                   buffer[0], buffer[1], buffer[2]);
++        }
++    }
++
++    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) {
++        if (rd->slices == NULL) {
++            if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL)
++                return AVERROR(ENOMEM);
++            rd->slices->ptr = buffer;
++            rd->num_slices = 1;
++        }
++        rd->slices->len = buffer - rd->slices->ptr + size;
++        return 0;
++    }
++
++    if ((rv = slice_add(rd)) != 0)
++        return rv;
++
++    si = rd->slices + n;
++    si->ptr = buffer;
++    si->len = size;
++    si->n_offsets = rd->num_offsets;
++
++    if (n != block_start) {
++        struct slice_info *const si0 = rd->slices + block_start;
++        const size_t offset = (buffer - si0->ptr);
++        boff += offset * 8;
++        size += offset;
++        si0->len = si->len + offset;
++    }
++
++#if HEVC_CTRLS_VERSION >= 2
++    if (n == 0)
++        fill_decode_params(h, &rd->dec);
++    fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff);
++#else
++    fill_slice_params(h, rd->slice_params + n, size * 8, boff);
++#endif
++
++    {
++        RefPicList *rpl;
++        int i;
++
++        if (sh->slice_type != HEVC_SLICE_I) {
++            rpl = &h->cur_frame->refPicList[0];
++            for (i = 0; i < rpl->nb_refs; i++)
++                add_ref_once(rd, rpl->ref[i]);
++        }
++
++        if (sh->slice_type == HEVC_SLICE_B) {
++            rpl = &h->cur_frame->refPicList[1];
++            for (i = 0; i < rpl->nb_refs; i++)
++                add_ref_once(rd, rpl->ref[i]);
++        }
++    }
++
++    if (ctx->max_offsets != 0 &&
++        (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0)
++        return rv;
++
++    return 0;
++}
++
++static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx, V4L2RequestContextHEVC *const ctx)
++{
++    const HEVCContext * const h = avctx->priv_data;
++    fprintf(stderr, "<<< %s\n", __func__);
++    if (h->cur_frame != NULL) {
++        V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->cur_frame->f->data[0];
++
++        media_request_abort(&rd->req);
++        mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src);
++
++        decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++    }
++}
++
++static int send_slice(AVCodecContext * const avctx,
++                      V4L2RequestContextHEVC * const ctx,
++                      V4L2MediaReqDescriptor * const rd,
++                      struct req_controls *const controls,
++                      const unsigned int i, const unsigned int j)
++{
++    const int is_last = (j == rd->num_slices);
++    struct slice_info *const si = rd->slices + i;
++    struct media_request * req = NULL;
++    struct qent_src * src = NULL;
++    MediaBufsStatus stat;
++    void * offsets = rd->offsets + rd->slices[i].n_offsets;
++    size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets;
++
++    if ((req = media_request_get(ctx->mpool)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__);
++        return AVERROR(ENOMEM);
++    }
++
++    if (set_req_ctls(ctx, req,
++                     controls,
++#if HEVC_CTRLS_VERSION >= 2
++                     &rd->dec,
++#endif
++                     rd->slice_params + i, j - i,
++                     offsets, n_offsets)) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__);
++        goto fail1;
++    }
++
++    if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__);
++        goto fail1;
++    }
++
++    if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__);
++        goto fail2;
++    }
++
++    if (qent_src_params_set(src, &controls->tv)) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__);
++        goto fail2;
++    }
++
++    stat = mediabufs_start_request(ctx->mbufs, &req, &src,
++                                   i == 0 ? rd->qe_dst : NULL,
++                                   is_last);
++
++    if (stat != MEDIABUFS_STATUS_SUCCESS) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__);
++        return AVERROR_UNKNOWN;
++    }
++    return 0;
++
++fail2:
++    mediabufs_src_qent_abort(ctx->mbufs, &src);
++fail1:
++    media_request_abort(&req);
++    return AVERROR_UNKNOWN;
++}
++
++static int v4l2_request_hevc_end_frame(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx)
++{
++    const HEVCContext * const h = avctx->priv_data;
++    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->cur_frame->f->data[0];
++    struct req_controls rc;
++    unsigned int i;
++    int rv;
++    static int z = 0;
++
++    fprintf(stderr, "<<< %s: %d\n", __func__, ++z);
++
++    // It is possible, though maybe a bug, to get an end_frame without
++    // a previous start_frame.  If we do then give up.
++    if (!decode_q_in_q(&rd->decode_ent)) {
++        av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__);
++        return AVERROR_INVALIDDATA;
++    }
++
++    {
++        const HEVCPPS *pps = h->pps;
++        const HEVCSPS *sps = pps->sps;
++        const ScalingList *sl = pps->scaling_list_data_present_flag ?
++                                    &pps->scaling_list :
++                                sps->scaling_list_enabled ?
++                                    &sps->scaling_list : NULL;
++
++
++        memset(&rc, 0, sizeof(rc));
++        rc.tv = cvt_dpb_to_tv(rd->timestamp);
++        fill_sps(&rc.sps, sps);
++        fill_pps(&rc.pps, pps);
++        if (sl) {
++            rc.has_scaling = 1;
++            fill_scaling_matrix(sl, &rc.scaling_matrix);
++        }
++    }
++
++    decode_q_wait(&ctx->decode_q, &rd->decode_ent);
++
++    // qe_dst needs to be bound to the data buffer and only returned when that is
++    // Alloc almost certainly wants to be serialised if there is any chance of blocking
++    // so we get the next frame to be free in the thread that needs it for decode first.
++    //
++    // In our current world this probably isn't a concern but put it here anyway
++    if (!rd->qe_dst)
++    {
++        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
++            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
++            rv = AVERROR(ENOMEM);
++            goto fail;
++        }
++    }
++
++    // Send as slices
++    for (i = 0; i < rd->num_slices; i += ctx->max_slices) {
++        const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices);
++        if ((rv = send_slice(avctx, ctx, rd, &rc, i, e)) != 0)
++            goto fail;
++    }
++
++    // Set the drm_prime desriptor
++    drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs));
++    rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0));
++    rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0));
++
++    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++    return 0;
++
++fail:
++    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++    return rv;
++}
++
++static inline int
++ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v)
++{
++    return v >= c->minimum && v <= c->maximum;
++}
++
++// Initial check & init
++static int
++probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
++{
++    const HEVCContext *h = avctx->priv_data;
++    const HEVCSPS * const sps = h->pps->sps;
++    struct v4l2_ctrl_hevc_sps ctrl_sps;
++    unsigned int i;
++
++    // Check for var slice array
++    struct v4l2_query_ext_ctrl qc[] = {
++        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS },
++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_SPS },
++        { .id = V4L2_CID_STATELESS_HEVC_PPS },
++        { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX },
++#if HEVC_CTRLS_VERSION >= 2
++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS },
++#endif
++    };
++    // Order & size must match!
++    static const size_t ctrl_sizes[] = {
++        sizeof(struct v4l2_ctrl_hevc_slice_params),
++        sizeof(int32_t),
++        sizeof(struct v4l2_ctrl_hevc_sps),
++        sizeof(struct v4l2_ctrl_hevc_pps),
++        sizeof(struct v4l2_ctrl_hevc_scaling_matrix),
++#if HEVC_CTRLS_VERSION >= 2
++        sizeof(struct v4l2_ctrl_hevc_decode_params),
++#endif
++    };
++    const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc);
++
++#if HEVC_CTRLS_VERSION == 2
++    if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0))
++        return AVERROR(EINVAL);
++#elif HEVC_CTRLS_VERSION == 3
++    if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0))
++        return AVERROR(EINVAL);
++#endif
++
++    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls);
++    i = 0;
++#if HEVC_CTRLS_VERSION >= 4
++    // Skip slice check if no slice mode
++    if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
++        i = 1;
++#else
++    // Fail frame mode silently for anything prior to V4
++    if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
++        return AVERROR(EINVAL);
++#endif
++    for (; i != noof_ctrls; ++i) {
++        if (qc[i].type == 0) {
++            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id);
++            return AVERROR(EINVAL);
++        }
++        if (ctrl_sizes[i] != (size_t)qc[i].elem_size) {
++            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n",
++                   HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size);
++            return AVERROR(EINVAL);
++        }
++    }
++
++    fill_sps(&ctrl_sps, sps);
++
++    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n");
++        return AVERROR(EINVAL);
++    }
++
++    return 0;
++}
++
++// Final init
++static int
++set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
++{
++    int ret;
++
++    struct v4l2_query_ext_ctrl querys[] = {
++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, },
++#if HEVC_CTRLS_VERSION >= 4
++        { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, },
++#endif
++    };
++
++    struct v4l2_ext_control ctrls[] = {
++        { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, },
++        { .id = V4L2_CID_STATELESS_HEVC_START_CODE, },
++    };
++
++    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
++
++    ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) ||
++                       querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ?
++        1 : querys[2].dims[0];
++    av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices);
++
++#if HEVC_CTRLS_VERSION >= 4
++    ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ?
++        0 : querys[3].dims[0];
++    av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets);
++#else
++    ctx->max_offsets = 0;
++#endif
++
++    if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED ||
++        querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)
++        ctx->decode_mode = querys[0].default_value;
++    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED))
++        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED;
++    else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED))
++        ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED;
++    else {
++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__);
++        return AVERROR(EINVAL);
++    }
++
++    if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE ||
++        querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)
++        ctx->start_code = querys[1].default_value;
++    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B))
++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B;
++    else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
++    else {
++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__);
++        return AVERROR(EINVAL);
++    }
++
++    // If we are in slice mode & START_CODE_NONE supported then pick that
++    // as it doesn't require the slightly dodgy look backwards in our raw buffer
++    if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED &&
++        ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE))
++        ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE;
++
++    ctrls[0].value = ctx->decode_mode;
++    ctrls[1].value = ctx->start_code;
++
++    ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls));
++    return !ret ? 0 : AVERROR(-ret);
++}
++
++static void v4l2_req_frame_free(void *opaque, uint8_t *data)
++{
++    AVCodecContext *avctx = opaque;
++    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data;
++
++    av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data);
++
++    frame_finish(rd);
++
++    qent_dst_unref(&rd->qe_dst);
++
++    // We don't expect req or qe_src to be set
++    if (rd->req || rd->qe_src)
++        av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src);
++
++    av_freep(&rd->slices);
++    av_freep(&rd->slice_params);
++    av_freep(&rd->offsets);
++
++    av_free(rd);
++}
++
++static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size)
++{
++    AVCodecContext *avctx = opaque;
++//    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++//    V4L2MediaReqDescriptor *req;
++    AVBufferRef *ref;
++    uint8_t *data;
++//    int ret;
++
++    data = av_mallocz(size);
++    if (!data)
++        return NULL;
++
++    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data);
++    ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0);
++    if (!ref) {
++        av_freep(&data);
++        return NULL;
++    }
++    return ref;
++}
++
++#if 0
++static void v4l2_req_pool_free(void *opaque)
++{
++    av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque);
++}
++
++static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc)
++{
++    av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool);
++
++    av_buffer_pool_uninit(&hwfc->pool);
++}
++#endif
++
++static int frame_params(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, AVBufferRef *hw_frames_ctx)
++{
++    AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data;
++    const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs);
++
++    fprintf(stderr, "<<< %s\n", __func__);
++
++    hwfc->format = AV_PIX_FMT_DRM_PRIME;
++    hwfc->sw_format = pixel_format_from_format(vfmt);
++    if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) {
++        hwfc->width = vfmt->fmt.pix_mp.width;
++        hwfc->height = vfmt->fmt.pix_mp.height;
++    } else {
++        hwfc->width = vfmt->fmt.pix.width;
++        hwfc->height = vfmt->fmt.pix.height;
++    }
++#if 0
++    hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free);
++    if (!hwfc->pool)
++        return AVERROR(ENOMEM);
++
++    hwfc->free = v4l2_req_hwframe_ctx_free;
++
++    hwfc->initial_pool_size = 1;
++
++    switch (avctx->codec_id) {
++    case AV_CODEC_ID_VP9:
++        hwfc->initial_pool_size += 8;
++        break;
++    case AV_CODEC_ID_VP8:
++        hwfc->initial_pool_size += 3;
++        break;
++    default:
++        hwfc->initial_pool_size += 2;
++    }
++#endif
++    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size);
++
++    return 0;
++}
++
++static int alloc_frame(AVCodecContext * avctx, V4L2RequestContextHEVC *const ctx, AVFrame *frame)
++{
++    int rv;
++
++    fprintf(stderr, "<<< %s\n", __func__);
++
++    frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor));
++    if (!frame->buf[0])
++        return AVERROR(ENOMEM);
++
++    frame->data[0] = frame->buf[0]->data;
++
++    frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx);
++
++    if ((rv = ff_attach_decode_data(frame)) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n");
++        av_frame_unref(frame);
++        return rv;
++    }
++
++    return 0;
++}
++
++const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = {
++    .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE,
++    .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION),
++    .probe = probe,
++    .set_controls = set_controls,
++
++    .start_frame    = v4l2_request_hevc_start_frame,
++    .decode_slice   = v4l2_request_hevc_decode_slice,
++    .end_frame      = v4l2_request_hevc_end_frame,
++    .abort_frame    = v4l2_request_hevc_abort_frame,
++    .frame_params   = frame_params,
++    .alloc_frame    = alloc_frame,
++};
++
 diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c
-index 980b306b8a72..910ac77bb6f9 100644
---- a/libavcodec/v4l2_req_media.c
+new file mode 100644
+index 000000000000..c94cc5b0f684
+--- /dev/null
 +++ b/libavcodec/v4l2_req_media.c
-@@ -33,9 +33,11 @@
- #include <string.h>
- #include <unistd.h>
- #include <linux/media.h>
+@@ -0,0 +1,1808 @@
++/*
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include <errno.h>
++#include <fcntl.h>
++#include <poll.h>
++#include <pthread.h>
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <stdbool.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++#include <linux/media.h>
 +#include <linux/mman.h>
- #include <sys/ioctl.h>
- #include <sys/select.h>
- #include <sys/ioctl.h>
++#include <sys/ioctl.h>
++#include <sys/select.h>
++#include <sys/ioctl.h>
 +#include <sys/mman.h>
- 
- #include <linux/videodev2.h>
- 
-@@ -95,6 +97,32 @@ struct media_request {
-     struct polltask * pt;
- };
- 
++
++#include <linux/videodev2.h>
++
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_utils.h"
++#include "weak_link.h"
++
++
++/* floor(log2(x)) */
++static unsigned int log2_size(size_t x)
++{
++    unsigned int n = 0;
++
++    if (x & ~0xffff) {
++        n += 16;
++        x >>= 16;
++    }
++    if (x & ~0xff) {
++        n += 8;
++        x >>= 8;
++    }
++    if (x & ~0xf) {
++        n += 4;
++        x >>= 4;
++    }
++    if (x & ~3) {
++        n += 2;
++        x >>= 2;
++    }
++    return (x & ~1) ? n + 1 : n;
++}
++
++static size_t round_up_size(const size_t x)
++{
++    /* Admit no size < 256 */
++    const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
++
++    return x >= (3 << n) ? 4 << n : (3 << n);
++}
++
++struct media_request;
++
++struct media_pool {
++    int fd;
++    sem_t sem;
++    pthread_mutex_t lock;
++    unsigned int pool_n;
++    struct media_request * pool_reqs;
++    struct media_request * free_reqs;
++    struct pollqueue * pq;
++};
++
++struct media_request {
++    struct media_request * next;
++    struct media_pool * mp;
++    int fd;
++    struct polltask * pt;
++};
++
 +static inline enum v4l2_memory
 +mediabufs_memory_to_v4l2(const enum mediabufs_memory m)
 +{
@@ -25341,190 +10140,1054 @@ index 980b306b8a72..910ac77bb6f9 100644
 +    return "Unknown";
 +}
 +
- 
- static inline int do_trywait(sem_t *const sem)
- {
-@@ -115,14 +143,14 @@ static inline int do_wait(sem_t *const sem)
- }
- 
- static int request_buffers(int video_fd, unsigned int type,
--                           enum v4l2_memory memory, unsigned int buffers_count)
++
++static inline int do_trywait(sem_t *const sem)
++{
++    while (sem_trywait(sem)) {
++        if (errno != EINTR)
++            return -errno;
++    }
++    return 0;
++}
++
++static inline int do_wait(sem_t *const sem)
++{
++    while (sem_wait(sem)) {
++        if (errno != EINTR)
++            return -errno;
++    }
++    return 0;
++}
++
++static int request_buffers(int video_fd, unsigned int type,
 +                           enum mediabufs_memory memory, unsigned int buffers_count)
- {
-     struct v4l2_requestbuffers buffers;
-     int rc;
- 
-     memset(&buffers, 0, sizeof(buffers));
-     buffers.type = type;
--    buffers.memory = memory;
++{
++    struct v4l2_requestbuffers buffers;
++    int rc;
++
++    memset(&buffers, 0, sizeof(buffers));
++    buffers.type = type;
 +    buffers.memory = mediabufs_memory_to_v4l2(memory);
-     buffers.count = buffers_count;
- 
-     rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers);
-@@ -324,6 +352,7 @@ struct qent_base {
-     struct qent_base *next;
-     struct qent_base *prev;
-     enum qent_status status;
++    buffers.count = buffers_count;
++
++    rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers);
++    if (rc < 0) {
++        rc = -errno;
++        request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc));
++        return rc;
++    }
++
++    return 0;
++}
++
++
++static int set_stream(int video_fd, unsigned int type, bool enable)
++{
++    enum v4l2_buf_type buf_type = type;
++    int rc;
++
++    rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF,
++           &buf_type);
++    if (rc < 0) {
++        rc = -errno;
++        request_log("Unable to %sable stream: %s\n",
++                enable ? "en" : "dis", strerror(-rc));
++        return rc;
++    }
++
++    return 0;
++}
++
++
++
++struct media_request * media_request_get(struct media_pool * const mp)
++{
++    struct media_request *req = NULL;
++
++    /* Timeout handled by poll code */
++    if (do_wait(&mp->sem))
++        return NULL;
++
++    pthread_mutex_lock(&mp->lock);
++    req = mp->free_reqs;
++    if (req) {
++        mp->free_reqs = req->next;
++        req->next = NULL;
++    }
++    pthread_mutex_unlock(&mp->lock);
++    return req;
++}
++
++int media_request_fd(const struct media_request * const req)
++{
++    return req->fd;
++}
++
++int media_request_start(struct media_request * const req)
++{
++    while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1)
++    {
++        const int err = errno;
++        if (err == EINTR)
++            continue;
++        request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err));
++        return -err;
++    }
++
++    pollqueue_add_task(req->pt, 2000);
++    return 0;
++}
++
++static void media_request_done(void *v, short revents)
++{
++    struct media_request *const req = v;
++    struct media_pool *const mp = req->mp;
++
++    /* ** Not sure what to do about timeout */
++
++    if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0)
++        request_log("Unable to reinit media request: %s\n",
++                strerror(errno));
++
++    pthread_mutex_lock(&mp->lock);
++    req->next = mp->free_reqs;
++    mp->free_reqs = req;
++    pthread_mutex_unlock(&mp->lock);
++    sem_post(&mp->sem);
++}
++
++int media_request_abort(struct media_request ** const preq)
++{
++    struct media_request * const req = *preq;
++
++    if (req == NULL)
++        return 0;
++    *preq = NULL;
++
++    media_request_done(req, 0);
++    return 0;
++}
++
++static void free_req_pool(struct media_request * const pool, const unsigned int n)
++{
++    unsigned int i;
++    for (i = 0; i != n; ++i) {
++        struct media_request * const req = pool + i;
++        if (req->pt)
++            polltask_delete(&req->pt);
++        if (req->fd != -1)
++            close(req->fd);
++    }
++    free(pool);
++}
++
++struct media_pool * media_pool_new(const char * const media_path,
++                   struct pollqueue * const pq,
++                   const unsigned int n)
++{
++    struct media_pool * const mp = calloc(1, sizeof(*mp));
++    unsigned int i;
++
++    if (!mp)
++        goto fail0;
++
++    mp->pq = pq;
++    pthread_mutex_init(&mp->lock, NULL);
++    mp->fd = open(media_path, O_RDWR | O_NONBLOCK);
++    if (mp->fd == -1) {
++        request_log("Failed to open '%s': %s\n", media_path, strerror(errno));
++        goto fail1;
++    }
++
++    if ((mp->pool_reqs = calloc(n, sizeof(*mp->pool_reqs))) == NULL)
++        goto fail3;
++    mp->pool_n = n;
++    for (i = 0; i != n; ++i) {
++        mp->pool_reqs[i].mp = mp;
++        mp->pool_reqs[i].fd = -1;
++    }
++
++    for (i = 0; i != n; ++i) {
++        struct media_request * const req = mp->pool_reqs + i;
++
++        if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) {
++            request_log("Failed to alloc request %d: %s\n", i, strerror(errno));
++            goto fail4;
++        }
++
++        req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req);
++        if (!req->pt)
++            goto fail4;
++
++        req->next = mp->free_reqs,
++        mp->free_reqs = req;
++    }
++
++    sem_init(&mp->sem, 0, n);
++
++    return mp;
++
++fail4:
++    free_req_pool(mp->pool_reqs, mp->pool_n);
++fail3:
++    close(mp->fd);
++    pthread_mutex_destroy(&mp->lock);
++fail1:
++    free(mp);
++fail0:
++    return NULL;
++}
++
++void media_pool_delete(struct media_pool ** pMp)
++{
++    struct media_pool * const mp = *pMp;
++
++    if (!mp)
++        return;
++    *pMp = NULL;
++
++    free_req_pool(mp->pool_reqs, mp->pool_n);
++    close(mp->fd);
++    sem_destroy(&mp->sem);
++    pthread_mutex_destroy(&mp->lock);
++    free(mp);
++}
++
++
++#define INDEX_UNSET (~(uint32_t)0)
++
++enum qent_status {
++    QENT_NEW = 0,       // Initial state - shouldn't last
++    QENT_FREE,          // On free chain
++    QENT_PENDING,       // User has ent
++    QENT_WAITING,       // On inuse
++    QENT_DONE,          // Frame rx
++    QENT_ERROR,         // Error
++    QENT_IMPORT
++};
++
++struct qent_base {
++    atomic_int ref_count;
++    struct qent_base *next;
++    struct qent_base *prev;
++    enum qent_status status;
 +    enum mediabufs_memory memtype;
-     uint32_t index;
-     struct dmabuf_h *dh[VIDEO_MAX_PLANES];
-     struct timeval timestamp;
-@@ -348,9 +377,9 @@ struct qe_list_head {
- };
- 
- struct buf_pool {
++    uint32_t index;
++    struct dmabuf_h *dh[VIDEO_MAX_PLANES];
++    struct timeval timestamp;
++};
++
++struct qent_src {
++    struct qent_base base;
++    int fixed_size;
++};
++
++struct qent_dst {
++    struct qent_base base;
++    bool waiting;
++    pthread_mutex_t lock;
++    pthread_cond_t cond;
++    struct ff_weak_link_client * mbc_wl;
++};
++
++struct qe_list_head {
++    struct qent_base *head;
++    struct qent_base *tail;
++};
++
++struct buf_pool {
 +    enum mediabufs_memory memtype;
-     pthread_mutex_t lock;
-     sem_t free_sem;
--    enum v4l2_buf_type buf_type;
-     struct qe_list_head free;
-     struct qe_list_head inuse;
- };
-@@ -367,9 +396,10 @@ static inline struct qent_src *base_to_src(struct qent_base *be)
- }
- 
- 
--#define QENT_BASE_INITIALIZER {\
++    pthread_mutex_t lock;
++    sem_t free_sem;
++    struct qe_list_head free;
++    struct qe_list_head inuse;
++};
++
++
++static inline struct qent_dst *base_to_dst(struct qent_base *be)
++{
++    return (struct qent_dst *)be;
++}
++
++static inline struct qent_src *base_to_src(struct qent_base *be)
++{
++    return (struct qent_src *)be;
++}
++
++
 +#define QENT_BASE_INITIALIZER(mtype) {\
-     .ref_count = ATOMIC_VAR_INIT(0),\
-     .status = QENT_NEW,\
++    .ref_count = ATOMIC_VAR_INIT(0),\
++    .status = QENT_NEW,\
 +    .memtype = (mtype),\
-     .index  = INDEX_UNSET\
- }
- 
-@@ -390,13 +420,13 @@ static void qe_src_free(struct qent_src *const be_src)
-     free(be_src);
- }
- 
--static struct qent_src * qe_src_new(void)
++    .index  = INDEX_UNSET\
++}
++
++static void qe_base_uninit(struct qent_base *const be)
++{
++    unsigned int i;
++    for (i = 0; i != VIDEO_MAX_PLANES; ++i) {
++        dmabuf_free(be->dh[i]);
++        be->dh[i] = NULL;
++    }
++}
++
++static void qe_src_free(struct qent_src *const be_src)
++{
++    if (!be_src)
++        return;
++    qe_base_uninit(&be_src->base);
++    free(be_src);
++}
++
 +static struct qent_src * qe_src_new(enum mediabufs_memory mtype)
- {
-     struct qent_src *const be_src = malloc(sizeof(*be_src));
-     if (!be_src)
-         return NULL;
-     *be_src = (struct qent_src){
--        .base = QENT_BASE_INITIALIZER
++{
++    struct qent_src *const be_src = malloc(sizeof(*be_src));
++    if (!be_src)
++        return NULL;
++    *be_src = (struct qent_src){
 +        .base = QENT_BASE_INITIALIZER(mtype)
-     };
-     return be_src;
- }
-@@ -413,13 +443,13 @@ static void qe_dst_free(struct qent_dst *const be_dst)
-     free(be_dst);
- }
- 
--static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl)
++    };
++    return be_src;
++}
++
++static void qe_dst_free(struct qent_dst *const be_dst)
++{
++    if (!be_dst)
++        return;
++
++    ff_weak_link_unref(&be_dst->mbc_wl);
++    pthread_cond_destroy(&be_dst->cond);
++    pthread_mutex_destroy(&be_dst->lock);
++    qe_base_uninit(&be_dst->base);
++    free(be_dst);
++}
++
 +static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl, const enum mediabufs_memory memtype)
- {
-     struct qent_dst *const be_dst = malloc(sizeof(*be_dst));
-     if (!be_dst)
-         return NULL;
-     *be_dst = (struct qent_dst){
--        .base = QENT_BASE_INITIALIZER,
++{
++    struct qent_dst *const be_dst = malloc(sizeof(*be_dst));
++    if (!be_dst)
++        return NULL;
++    *be_dst = (struct qent_dst){
 +        .base = QENT_BASE_INITIALIZER(memtype),
-         .lock = PTHREAD_MUTEX_INITIALIZER,
-         .cond = PTHREAD_COND_INITIALIZER,
-         .mbc_wl = ff_weak_link_ref(wl)
-@@ -553,14 +583,14 @@ static struct qent_base *queue_tryget_free(struct buf_pool *const bp)
-     return buf;
- }
- 
--static struct qent_base * queue_find_extract_fd(struct buf_pool *const bp, const int fd)
++        .lock = PTHREAD_MUTEX_INITIALIZER,
++        .cond = PTHREAD_COND_INITIALIZER,
++        .mbc_wl = ff_weak_link_ref(wl)
++    };
++    return be_dst;
++}
++
++static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be)
++{
++    if (ql->tail)
++        ql->tail->next = be;
++    else
++        ql->head = be;
++    be->prev = ql->tail;
++    be->next = NULL;
++    ql->tail = be;
++}
++
++static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be)
++{
++    if (!be)
++        return NULL;
++
++    if (be->next)
++        be->next->prev = be->prev;
++    else
++        ql->tail = be->prev;
++    if (be->prev)
++        be->prev->next = be->next;
++    else
++        ql->head = be->next;
++    be->next = NULL;
++    be->prev = NULL;
++    return be;
++}
++
++
++static void bq_put_free(struct buf_pool *const bp, struct qent_base * be)
++{
++    ql_add_tail(&bp->free, be);
++}
++
++static struct qent_base * bq_get_free(struct buf_pool *const bp)
++{
++    return ql_extract(&bp->free, bp->free.head);
++}
++
++static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be)
++{
++    return ql_extract(&bp->inuse, be);
++}
++
++static struct qent_base * bq_get_inuse(struct buf_pool *const bp)
++{
++    return ql_extract(&bp->inuse, bp->inuse.head);
++}
++
++static void bq_free_all_free_src(struct buf_pool *const bp)
++{
++    struct qent_base *be;
++    while ((be = bq_get_free(bp)) != NULL)
++        qe_src_free(base_to_src(be));
++}
++
++static void bq_free_all_inuse_src(struct buf_pool *const bp)
++{
++    struct qent_base *be;
++    while ((be = bq_get_inuse(bp)) != NULL)
++        qe_src_free(base_to_src(be));
++}
++
++static void bq_free_all_free_dst(struct buf_pool *const bp)
++{
++    struct qent_base *be;
++    while ((be = bq_get_free(bp)) != NULL)
++        qe_dst_free(base_to_dst(be));
++}
++
++static void queue_put_free(struct buf_pool *const bp, struct qent_base *be)
++{
++    unsigned int i;
++
++    pthread_mutex_lock(&bp->lock);
++    /* Clear out state vars */
++    be->timestamp.tv_sec = 0;
++    be->timestamp.tv_usec = 0;
++    be->status = QENT_FREE;
++    for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i)
++        dmabuf_len_set(be->dh[i], 0);
++    bq_put_free(bp, be);
++    pthread_mutex_unlock(&bp->lock);
++    sem_post(&bp->free_sem);
++}
++
++static bool queue_is_inuse(const struct buf_pool *const bp)
++{
++    return bp->inuse.tail != NULL;
++}
++
++static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be)
++{
++    if (!be)
++        return;
++    pthread_mutex_lock(&bp->lock);
++    ql_add_tail(&bp->inuse, be);
++    be->status = QENT_WAITING;
++    pthread_mutex_unlock(&bp->lock);
++}
++
++static struct qent_base *queue_get_free(struct buf_pool *const bp)
++{
++    struct qent_base *buf;
++
++    if (do_wait(&bp->free_sem))
++        return NULL;
++    pthread_mutex_lock(&bp->lock);
++    buf = bq_get_free(bp);
++    pthread_mutex_unlock(&bp->lock);
++    return buf;
++}
++
++static struct qent_base *queue_tryget_free(struct buf_pool *const bp)
++{
++    struct qent_base *buf;
++
++    if (do_trywait(&bp->free_sem))
++        return NULL;
++    pthread_mutex_lock(&bp->lock);
++    buf = bq_get_free(bp);
++    pthread_mutex_unlock(&bp->lock);
++    return buf;
++}
++
 +static struct qent_base * queue_find_extract_index(struct buf_pool *const bp, const unsigned int index)
- {
-     struct qent_base *be;
- 
-     pthread_mutex_lock(&bp->lock);
-     /* Expect 1st in Q, but allow anywhere */
-     for (be = bp->inuse.head; be; be = be->next) {
--        if (dmabuf_fd(be->dh[0]) == fd) {
++{
++    struct qent_base *be;
++
++    pthread_mutex_lock(&bp->lock);
++    /* Expect 1st in Q, but allow anywhere */
++    for (be = bp->inuse.head; be; be = be->next) {
 +        if (be->index == index) {
-             bq_extract_inuse(bp, be);
-             break;
-         }
-@@ -602,6 +632,8 @@ struct mediabufs_ctl {
-     struct pollqueue * pq;
-     struct ff_weak_link_master * this_wlm;
- 
++            bq_extract_inuse(bp, be);
++            break;
++        }
++    }
++    pthread_mutex_unlock(&bp->lock);
++
++    return be;
++}
++
++static void queue_delete(struct buf_pool *const bp)
++{
++    sem_destroy(&bp->free_sem);
++    pthread_mutex_destroy(&bp->lock);
++    free(bp);
++}
++
++static struct buf_pool* queue_new(const int vfd)
++{
++    struct buf_pool *bp = calloc(1, sizeof(*bp));
++    if (!bp)
++        return NULL;
++    pthread_mutex_init(&bp->lock, NULL);
++    sem_init(&bp->free_sem, 0, 0);
++    return bp;
++}
++
++
++struct mediabufs_ctl {
++    atomic_int ref_count;  /* 0 is single ref for easier atomics */
++    void * dc;
++    int vfd;
++    bool stream_on;
++    bool polling;
++    bool dst_fixed;             // Dst Q is fixed size
++    pthread_mutex_t lock;
++    struct buf_pool * src;
++    struct buf_pool * dst;
++    struct polltask * pt;
++    struct pollqueue * pq;
++    struct ff_weak_link_master * this_wlm;
++
 +    enum mediabufs_memory src_memtype;
 +    enum mediabufs_memory dst_memtype;
-     struct v4l2_format src_fmt;
-     struct v4l2_format dst_fmt;
-     struct v4l2_capability capability;
-@@ -614,7 +646,7 @@ static int qe_v4l2_queue(struct qent_base *const be,
- {
-     struct v4l2_buffer buffer = {
-         .type = fmt->type,
--        .memory = V4L2_MEMORY_DMABUF,
++    struct v4l2_format src_fmt;
++    struct v4l2_format dst_fmt;
++    struct v4l2_capability capability;
++};
++
++static int qe_v4l2_queue(struct qent_base *const be,
++               const int vfd, struct media_request *const mreq,
++               const struct v4l2_format *const fmt,
++               const bool is_dst, const bool hold_flag)
++{
++    struct v4l2_buffer buffer = {
++        .type = fmt->type,
 +        .memory = mediabufs_memory_to_v4l2(be->memtype),
-         .index = be->index
-     };
-     struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
-@@ -628,7 +660,10 @@ static int qe_v4l2_queue(struct qent_base *const be,
-             /* *** Really need a pixdesc rather than a format so we can fill in data_offset */
-             planes[i].length = dmabuf_size(be->dh[i]);
-             planes[i].bytesused = dmabuf_len(be->dh[i]);
--            planes[i].m.fd = dmabuf_fd(be->dh[i]);
++        .index = be->index
++    };
++    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++        unsigned int i;
++        for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++            if (is_dst)
++                dmabuf_len_set(be->dh[i], 0);
++
++            /* *** Really need a pixdesc rather than a format so we can fill in data_offset */
++            planes[i].length = dmabuf_size(be->dh[i]);
++            planes[i].bytesused = dmabuf_len(be->dh[i]);
 +            if (be->memtype == MEDIABUFS_MEMORY_DMABUF)
 +                planes[i].m.fd = dmabuf_fd(be->dh[i]);
 +            else
 +                planes[i].m.mem_offset = 0;
-         }
-         buffer.m.planes = planes;
-         buffer.length = i;
-@@ -639,7 +674,10 @@ static int qe_v4l2_queue(struct qent_base *const be,
- 
-         buffer.bytesused = dmabuf_len(be->dh[0]);
-         buffer.length = dmabuf_size(be->dh[0]);
--        buffer.m.fd = dmabuf_fd(be->dh[0]);
++        }
++        buffer.m.planes = planes;
++        buffer.length = i;
++    }
++    else {
++        if (is_dst)
++            dmabuf_len_set(be->dh[0], 0);
++
++        buffer.bytesused = dmabuf_len(be->dh[0]);
++        buffer.length = dmabuf_size(be->dh[0]);
 +        if (be->memtype == MEDIABUFS_MEMORY_DMABUF)
 +            buffer.m.fd = dmabuf_fd(be->dh[0]);
 +        else
 +            buffer.m.offset = 0;
-     }
- 
-     if (!is_dst && mreq) {
-@@ -668,14 +706,13 @@ static struct qent_base * qe_dequeue(struct buf_pool *const bp,
-                      const int vfd,
-                      const struct v4l2_format * const f)
- {
--    int fd;
-     struct qent_base *be;
-     int rc;
-     const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type);
-     struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
-     struct v4l2_buffer buffer = {
-         .type =  f->type,
--        .memory = V4L2_MEMORY_DMABUF
++    }
++
++    if (!is_dst && mreq) {
++        buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD;
++        buffer.request_fd = media_request_fd(mreq);
++        if (hold_flag)
++            buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
++    }
++
++    if (is_dst)
++        be->timestamp = (struct timeval){0,0};
++
++    buffer.timestamp = be->timestamp;
++
++    while (ioctl(vfd, VIDIOC_QBUF, &buffer)) {
++        const int err = errno;
++        if (err != EINTR) {
++            request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err));
++            return -err;
++        }
++    }
++    return 0;
++}
++
++static struct qent_base * qe_dequeue(struct buf_pool *const bp,
++                     const int vfd,
++                     const struct v4l2_format * const f)
++{
++    struct qent_base *be;
++    int rc;
++    const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type);
++    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
++    struct v4l2_buffer buffer = {
++        .type =  f->type,
 +        .memory = mediabufs_memory_to_v4l2(bp->memtype)
-     };
-     if (mp) {
-         buffer.length = f->fmt.pix_mp.num_planes;
-@@ -690,10 +727,9 @@ static struct qent_base * qe_dequeue(struct buf_pool *const bp,
-         return NULL;
-     }
- 
--    fd = mp ? planes[0].m.fd : buffer.m.fd;
--    be = queue_find_extract_fd(bp, fd);
++    };
++    if (mp) {
++        buffer.length = f->fmt.pix_mp.num_planes;
++        buffer.m.planes = planes;
++    }
++
++    while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 &&
++           errno == EINTR)
++        /* Loop */;
++    if (rc) {
++        request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno));
++        return NULL;
++    }
++
 +    be = queue_find_extract_index(bp, buffer.index);
-     if (!be) {
--        request_log("Failed to find fd %d in Q\n", fd);
++    if (!be) {
 +        request_log("Failed to find index %d in Q\n", buffer.index);
-         return NULL;
-     }
- 
-@@ -1104,7 +1140,7 @@ static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, stru
- 
-     struct v4l2_create_buffers cbuf = {
-         .count = n,
--        .memory = V4L2_MEMORY_DMABUF,
++        return NULL;
++    }
++
++    if (mp) {
++        unsigned int i;
++        for (i = 0; i != buffer.length; ++i)
++            dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0);
++    }
++    else
++        dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0);
++
++    be->timestamp = buffer.timestamp;
++    be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE;
++    return be;
++}
++
++static void qe_dst_done(struct qent_dst * dst_be)
++{
++    pthread_mutex_lock(&dst_be->lock);
++    dst_be->waiting = false;
++    pthread_cond_broadcast(&dst_be->cond);
++    pthread_mutex_unlock(&dst_be->lock);
++
++    qent_dst_unref(&dst_be);
++}
++
++static bool qe_dst_waiting(struct qent_dst *const dst_be)
++{
++    bool waiting;
++    pthread_mutex_lock(&dst_be->lock);
++    waiting = dst_be->waiting;
++    dst_be->waiting = true;
++    pthread_mutex_unlock(&dst_be->lock);
++    return waiting;
++}
++
++
++static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc)
++{
++    return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst);
++}
++
++static void mediabufs_poll_cb(void * v, short revents)
++{
++    struct mediabufs_ctl *mbc = v;
++    struct qent_src *src_be = NULL;
++    struct qent_dst *dst_be = NULL;
++
++    if (!revents)
++        request_err(mbc->dc, "%s: Timeout\n", __func__);
++
++    pthread_mutex_lock(&mbc->lock);
++    mbc->polling = false;
++
++    if ((revents & POLLOUT) != 0)
++        src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt));
++    if ((revents & POLLIN) != 0)
++        dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt));
++
++    /* Reschedule */
++    if (mediabufs_wants_poll(mbc)) {
++        mbc->polling = true;
++        pollqueue_add_task(mbc->pt, 2000);
++    }
++    pthread_mutex_unlock(&mbc->lock);
++
++    if (src_be)
++        queue_put_free(mbc->src, &src_be->base);
++    if (dst_be)
++        qe_dst_done(dst_be);
++}
++
++int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp)
++{
++    struct qent_base *const be = &be_src->base;
++
++    be->timestamp = *timestamp;
++    return 0;
++}
++
++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst)
++{
++    return be_dst->base.timestamp;
++}
++
++static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc)
++{
++    if (!be->dh[0] || len > dmabuf_size(be->dh[0])) {
++        size_t newsize = round_up_size(len);
++        request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize);
++        if (!dbsc) {
++            request_log("%s: No dmbabuf_ctrl for realloc\n", __func__);
++            return -ENOMEM;
++        }
++        if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) {
++            request_log("%s: Realloc %zd failed\n", __func__, newsize);
++            return -ENOMEM;
++        }
++    }
++    return 0;
++}
++
++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc)
++{
++    struct qent_base *const be = &be_src->base;
++    return qent_base_realloc(be, len, dbsc);
++}
++
++
++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc)
++{
++    void * dst;
++    struct qent_base *const be = &be_src->base;
++    int rv;
++
++    // Realloc doesn't copy so don't alloc if offset != 0
++    if ((rv = qent_base_realloc(be, offset + len,
++                                be_src->fixed_size || offset ? NULL : dbsc)) != 0)
++        return rv;
++
++    dmabuf_write_start(be->dh[0]);
++    dst = dmabuf_map(be->dh[0]);
++    if (!dst)
++        return -1;
++    memcpy((char*)dst + offset, src, len);
++    dmabuf_len_set(be->dh[0], len);
++    dmabuf_write_end(be->dh[0]);
++    return 0;
++}
++
++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane)
++{
++    const struct qent_base *const be = &be_dst->base;
++
++    return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane];
++}
++
++int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane)
++{
++    return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane)));
++}
++
++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
++                struct media_request **const pmreq,
++                struct qent_src **const psrc_be,
++                struct qent_dst *const dst_be,
++                const bool is_final)
++{
++    struct media_request * mreq = *pmreq;
++    struct qent_src *const src_be = *psrc_be;
++
++    // Req & src are always both "consumed"
++    *pmreq = NULL;
++    *psrc_be = NULL;
++
++    pthread_mutex_lock(&mbc->lock);
++
++    if (!src_be)
++        goto fail1;
++
++    if (dst_be) {
++        if (qe_dst_waiting(dst_be)) {
++            request_info(mbc->dc, "Request buffer already waiting on start\n");
++            goto fail1;
++        }
++        dst_be->base.timestamp = (struct timeval){0,0};
++        if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false))
++            goto fail1;
++
++        qent_dst_ref(dst_be);
++        queue_put_inuse(mbc->dst, &dst_be->base);
++    }
++
++    if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final))
++        goto fail1;
++    queue_put_inuse(mbc->src, &src_be->base);
++
++    if (!mbc->polling && mediabufs_wants_poll(mbc)) {
++        mbc->polling = true;
++        pollqueue_add_task(mbc->pt, 2000);
++    }
++    pthread_mutex_unlock(&mbc->lock);
++
++    if (media_request_start(mreq))
++        return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++    return MEDIABUFS_STATUS_SUCCESS;
++
++fail1:
++    media_request_abort(&mreq);
++    if (src_be)
++        queue_put_free(mbc->src, &src_be->base);
++
++// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q
++    if (dst_be) {
++        dst_be->base.status = QENT_ERROR;
++        qe_dst_done(dst_be);
++    }
++    pthread_mutex_unlock(&mbc->lock);
++    return MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++
++static int qe_alloc_from_fmt(struct qent_base *const be,
++                   struct dmabufs_ctl *const dbsc,
++                   const struct v4l2_format *const fmt)
++{
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++        unsigned int i;
++        for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) {
++            be->dh[i] = dmabuf_realloc(dbsc, be->dh[i],
++                fmt->fmt.pix_mp.plane_fmt[i].sizeimage);
++            /* On failure tidy up and die */
++            if (!be->dh[i]) {
++                while (i--) {
++                    dmabuf_free(be->dh[i]);
++                    be->dh[i] = NULL;
++                }
++                return -1;
++            }
++        }
++    }
++    else {
++//      be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage);
++        size_t size = fmt->fmt.pix.sizeimage;
++        be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size);
++        if (!be->dh[0])
++            return -1;
++    }
++    return 0;
++}
++
++static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd,
++            const enum v4l2_buf_type buftype,
++            uint32_t pixfmt,
++            const unsigned int width, const unsigned int height,
++                               const size_t bufsize)
++{
++    *fmt = (struct v4l2_format){.type = buftype};
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
++        fmt->fmt.pix_mp.width = width;
++        fmt->fmt.pix_mp.height = height;
++        fmt->fmt.pix_mp.pixelformat = pixfmt;
++        if (bufsize) {
++            fmt->fmt.pix_mp.num_planes = 1;
++            fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize;
++        }
++    }
++    else {
++        fmt->fmt.pix.width = width;
++        fmt->fmt.pix.height = height;
++        fmt->fmt.pix.pixelformat = pixfmt;
++        fmt->fmt.pix.sizeimage = bufsize;
++    }
++
++    while (ioctl(fd, VIDIOC_S_FMT, fmt))
++        if (errno != EINTR)
++            return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++    // Treat anything where we don't get at least what we asked for as a fail
++    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
++        if (fmt->fmt.pix_mp.width < width ||
++            fmt->fmt.pix_mp.height < height ||
++            fmt->fmt.pix_mp.pixelformat != pixfmt) {
++            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++        }
++    }
++    else {
++        if (fmt->fmt.pix.width < width ||
++            fmt->fmt.pix.height < height ||
++            fmt->fmt.pix.pixelformat != pixfmt) {
++            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++        }
++    }
++
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt,
++                   const int fd,
++                   const unsigned int type_v4l2,
++                   const uint32_t flags_must,
++                   const uint32_t flags_not,
++                   const unsigned int width,
++                   const unsigned int height,
++                   mediabufs_dst_fmt_accept_fn *const accept_fn,
++                   void *const accept_v)
++{
++    unsigned int i;
++
++    for (i = 0;; ++i) {
++        struct v4l2_fmtdesc fmtdesc = {
++            .index = i,
++            .type = type_v4l2
++        };
++        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
++            if (errno != EINTR)
++                return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++        }
++        if ((fmtdesc.flags & flags_must) != flags_must ||
++            (fmtdesc.flags & flags_not))
++            continue;
++        if (!accept_fn(accept_v, &fmtdesc))
++            continue;
++
++        if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat,
++                width, height, 0) == MEDIABUFS_STATUS_SUCCESS)
++            return MEDIABUFS_STATUS_SUCCESS;
++    }
++    return 0;
++}
++
++
++/* Wait for qent done */
++
++MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst)
++{
++    struct qent_base *const be = &be_dst->base;
++    enum qent_status estat;
++
++    pthread_mutex_lock(&be_dst->lock);
++    while (be_dst->waiting &&
++           !pthread_cond_wait(&be_dst->cond, &be_dst->lock))
++        /* Loop */;
++    estat = be->status;
++    pthread_mutex_unlock(&be_dst->lock);
++
++    return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS :
++        estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR :
++            MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no)
++{
++    struct qent_base *const be = &be_dst->base;
++    return dmabuf_map(be->dh[buf_no]);
++}
++
++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst)
++{
++    struct qent_base *const be = &be_dst->base;
++    unsigned int i;
++    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++        if (dmabuf_read_start(be->dh[i])) {
++            while (i--)
++                dmabuf_read_end(be->dh[i]);
++            return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++        }
++    }
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst)
++{
++    struct qent_base *const be = &be_dst->base;
++    unsigned int i;
++    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
++
++    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++        if (dmabuf_read_end(be->dh[i]))
++            status = MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++    return status;
++}
++
++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst)
++{
++    if (be_dst)
++        atomic_fetch_add(&be_dst->base.ref_count, 1);
++    return be_dst;
++}
++
++void qent_dst_unref(struct qent_dst ** const pbe_dst)
++{
++    struct qent_dst * const be_dst = *pbe_dst;
++    struct mediabufs_ctl * mbc;
++    if (!be_dst)
++        return;
++    *pbe_dst = NULL;
++
++    if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0)
++        return;
++
++    if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) {
++        queue_put_free(mbc->dst, &be_dst->base);
++        ff_weak_link_unlock(be_dst->mbc_wl);
++    }
++    else {
++        qe_dst_free(be_dst);
++    }
++}
++
++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
++                unsigned int plane,
++                int fd, size_t size)
++{
++    struct qent_base *const be = &be_dst->base;
++    struct dmabuf_h * dh;
++
++    if (be->status != QENT_IMPORT || be->dh[plane])
++        return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++    dh = dmabuf_import(fd, size);
++    if (!dh)
++        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++
++    be->dh[plane] = dh;
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++// Returns noof buffers created, -ve for error
++static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[])
++{
++    unsigned int i;
++
++    struct v4l2_create_buffers cbuf = {
++        .count = n,
 +        .memory = mediabufs_memory_to_v4l2(mbc->dst->memtype),
-         .format = mbc->dst_fmt,
-     };
- 
-@@ -1125,12 +1161,97 @@ static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, stru
-     return cbuf.count;
- }
- 
++        .format = mbc->dst_fmt,
++    };
++
++    while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) {
++        const int err = -errno;
++        if (err != EINTR) {
++            request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__);
++            return -err;
++        }
++    }
++
++    if (cbuf.count != n)
++        request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n);
++
++    for (i = 0; i != cbuf.count; ++i)
++        qes[i]->base.index = cbuf.index + i;
++
++    return cbuf.count;
++}
++
 +static MediaBufsStatus
 +qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, const struct v4l2_format *const fmt,
 +                   const unsigned int n, const bool x_dmabuf)
@@ -25561,8 +11224,10 @@ index 980b306b8a72..910ac77bb6f9 100644
 +                    .plane = i,
 +                    .flags = O_RDWR, // *** Arguably O_RDONLY would be fine
 +                };
-+                if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0)
++                if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) {
 +                    be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length);
++                    close(xbuf.fd); // dmabuf_import dups the fd so close this one
++                }
 +            }
 +            else {
 +                be->dh[i] = dmabuf_import_mmap(
@@ -25610,35 +11275,36 @@ index 980b306b8a72..910ac77bb6f9 100644
 +    return 0;
 +}
 +
- struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc)
- {
-     struct qent_dst * be_dst;
- 
-     if (mbc == NULL) {
--        be_dst = qe_dst_new(NULL);
++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc)
++{
++    struct qent_dst * be_dst;
++
++    if (mbc == NULL) {
 +        be_dst = qe_dst_new(NULL, MEDIABUFS_MEMORY_DMABUF);
-         if (be_dst)
-             be_dst->base.status = QENT_IMPORT;
-         return be_dst;
-@@ -1144,7 +1265,7 @@ struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struc
-     else {
-         be_dst = base_to_dst(queue_tryget_free(mbc->dst));
-         if (!be_dst) {
--            be_dst = qe_dst_new(mbc->this_wlm);
++        if (be_dst)
++            be_dst->base.status = QENT_IMPORT;
++        return be_dst;
++    }
++
++    if (mbc->dst_fixed) {
++        be_dst = base_to_dst(queue_get_free(mbc->dst));
++        if (!be_dst)
++            return NULL;
++    }
++    else {
++        be_dst = base_to_dst(queue_tryget_free(mbc->dst));
++        if (!be_dst) {
 +            be_dst = qe_dst_new(mbc->this_wlm, mbc->dst->memtype);
-             if (!be_dst)
-                 return NULL;
- 
-@@ -1155,12 +1276,21 @@ struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struc
-         }
-     }
- 
--    if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) {
--        /* Given  how create buf works we can't uncreate it on alloc failure
--         * all we can do is put it on the free Q
--        */
--        queue_put_free(mbc->dst, &be_dst->base);
--        return NULL;
++            if (!be_dst)
++                return NULL;
++
++            if (create_dst_bufs(mbc, 1, &be_dst) != 1) {
++                qe_dst_free(be_dst);
++                return NULL;
++            }
++        }
++    }
++
 +    if (mbc->dst->memtype == MEDIABUFS_MEMORY_MMAP) {
 +        if (qe_import_from_buf(mbc, &be_dst->base, &mbc->dst_fmt, be_dst->base.index, true)) {
 +            request_err(mbc->dc, "Failed to export as dmabuf\n");
@@ -25654,36 +11320,107 @@ index 980b306b8a72..910ac77bb6f9 100644
 +            queue_put_free(mbc->dst, &be_dst->base);
 +            return NULL;
 +        }
-     }
- 
-     be_dst->base.status = QENT_PENDING;
-@@ -1208,7 +1338,7 @@ MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
- 
- // ** This is a mess if we get partial alloc but without any way to remove
- //    individual V4L2 Q members we are somewhat stuffed
--MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed)
++    }
++
++    be_dst->base.status = QENT_PENDING;
++    atomic_store(&be_dst->base.ref_count, 0);
++    return be_dst;
++}
++
++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc)
++{
++    return &mbc->dst_fmt;
++}
++
++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
++               const unsigned int width,
++               const unsigned int height,
++               mediabufs_dst_fmt_accept_fn *const accept_fn,
++               void *const accept_v)
++{
++    MediaBufsStatus status;
++    unsigned int i;
++    const enum v4l2_buf_type buf_type = mbc->dst_fmt.type;
++    static const struct {
++        unsigned int flags_must;
++        unsigned int flags_not;
++    } trys[] = {
++        {0, V4L2_FMT_FLAG_EMULATED},
++        {V4L2_FMT_FLAG_EMULATED, 0},
++    };
++    for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) {
++        status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd,
++                                buf_type,
++                                trys[i].flags_must,
++                                trys[i].flags_not,
++                                width, height, accept_fn, accept_v);
++        if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE)
++            return status;
++    }
++
++    if (status != MEDIABUFS_STATUS_SUCCESS)
++        return status;
++
++    /* Try to create a buffer - don't alloc */
++    return status;
++}
++
++// ** This is a mess if we get partial alloc but without any way to remove
++//    individual V4L2 Q members we are somewhat stuffed
 +MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype)
- {
-     unsigned int i;
-     int a = 0;
-@@ -1218,10 +1348,12 @@ MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, cons
-     if (n > 32)
-         return MEDIABUFS_ERROR_ALLOCATION_FAILED;
- 
++{
++    unsigned int i;
++    int a = 0;
++    unsigned int qc;
++    struct qent_dst * qes[32];
++
++    if (n > 32)
++        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++
 +    mbc->dst->memtype = memtype;
 +
-     // Create qents first as it is hard to get rid of the V4L2 buffers on error
-     for (qc = 0; qc != n; ++qc)
-     {
--        if ((qes[qc] = qe_dst_new(mbc->this_wlm)) == NULL)
++    // Create qents first as it is hard to get rid of the V4L2 buffers on error
++    for (qc = 0; qc != n; ++qc)
++    {
 +        if ((qes[qc] = qe_dst_new(mbc->this_wlm, mbc->dst->memtype)) == NULL)
-             goto fail;
-     }
- 
-@@ -1260,19 +1392,61 @@ void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src *
-     queue_put_free(mbc->src, &qe_src->base);
- }
- 
++            goto fail;
++    }
++
++    if ((a = create_dst_bufs(mbc, n, qes)) < 0)
++        goto fail;
++
++    for (i = 0; i != a; ++i)
++        queue_put_free(mbc->dst, &qes[i]->base);
++
++    if (a != n)
++        goto fail;
++
++    mbc->dst_fixed = fixed;
++    return MEDIABUFS_STATUS_SUCCESS;
++
++fail:
++    for (i = (a < 0 ? 0 : a); i != qc; ++i)
++        qe_dst_free(qes[i]);
++
++    return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++}
++
++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc)
++{
++    struct qent_base * buf = queue_get_free(mbc->src);
++    buf->status = QENT_PENDING;
++    return base_to_src(buf);
++}
++
++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src)
++{
++    struct qent_src *const qe_src = *pqe_src;
++    if (!qe_src)
++        return;
++    *pqe_src = NULL;
++    queue_put_free(mbc->src, &qe_src->base);
++}
++
 +static MediaBufsStatus
 +chk_memory_type(struct mediabufs_ctl *const mbc,
 +    const struct v4l2_format * const f,
@@ -25725,37 +11462,38 @@ index 980b306b8a72..910ac77bb6f9 100644
 +    return chk_memory_type(mbc, &mbc->dst_fmt, memtype);
 +}
 +
- /* src format must have been set up before this */
- MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc,
-                   struct dmabufs_ctl * const dbsc,
--                  unsigned int n)
++/* src format must have been set up before this */
++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc,
++                  struct dmabufs_ctl * const dbsc,
 +                  unsigned int n, const enum mediabufs_memory memtype)
- {
-     unsigned int i;
-     struct v4l2_requestbuffers req = {
-         .count = n,
-         .type = mbc->src_fmt.type,
--        .memory = V4L2_MEMORY_DMABUF
++{
++    unsigned int i;
++    struct v4l2_requestbuffers req = {
++        .count = n,
++        .type = mbc->src_fmt.type,
 +        .memory = mediabufs_memory_to_v4l2(memtype)
-     };
- 
-     bq_free_all_free_src(mbc->src);
++    };
 +
-     while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) {
-         if (errno != EINTR) {
-             request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__);
-@@ -1286,21 +1460,36 @@ MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc,
-     }
- 
-     for (i = 0; i != n; ++i) {
--        struct qent_src *const be_src = qe_src_new();
++    bq_free_all_free_src(mbc->src);
++
++    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) {
++        if (errno != EINTR) {
++            request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__);
++            return MEDIABUFS_ERROR_OPERATION_FAILED;
++        }
++    }
++
++    if (n > req.count) {
++        request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n);
++        n = req.count;
++    }
++
++    for (i = 0; i != n; ++i) {
 +        struct qent_src *const be_src = qe_src_new(memtype);
-         if (!be_src) {
-             request_err(mbc->dc, "Failed to create src be %d\n", i);
-             goto fail;
-         }
--        if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) {
--            qe_src_free(be_src);
++        if (!be_src) {
++            request_err(mbc->dc, "Failed to create src be %d\n", i);
++            goto fail;
++        }
 +        switch (memtype) {
 +        case MEDIABUFS_MEMORY_MMAP:
 +            if (qe_import_from_buf(mbc, &be_src->base, &mbc->src_fmt, i, false)) {
@@ -25773,48 +11511,395 @@ index 980b306b8a72..910ac77bb6f9 100644
 +            break;
 +        default:
 +            request_err(mbc->dc, "Unexpected memorty type\n");
-             goto fail;
-         }
-         be_src->base.index = i;
--        be_src->fixed_size = !mediabufs_src_resizable(mbc);
- 
-         queue_put_free(mbc->src, &be_src->base);
-     }
- 
++            goto fail;
++        }
++        be_src->base.index = i;
++
++        queue_put_free(mbc->src, &be_src->base);
++    }
++
 +    mbc->src->memtype = memtype;
-     return MEDIABUFS_STATUS_SUCCESS;
- 
- fail:
-@@ -1437,9 +1626,13 @@ int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_
- 
- int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc)
- {
++    return MEDIABUFS_STATUS_SUCCESS;
++
++fail:
++    bq_free_all_free_src(mbc->src);
++    req.count = 0;
++    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 &&
++           errno == EINTR)
++        /* Loop */;
++
++    return MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++
++
++/*
++ * Set stuff order:
++ *  Set src fmt
++ *  Set parameters (sps) on vfd
++ *  Negotiate dst format (dst_fmt_set)
++ *  Create src buffers
++ *  Alloc a dst buffer or Create dst slots
++*/
++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc)
++{
++    if (mbc->stream_on)
++        return MEDIABUFS_STATUS_SUCCESS;
++
++    if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) {
++        request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type);
++        return MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++
++    if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) {
++        request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type);
++        set_stream(mbc->vfd, mbc->src_fmt.type, false);
++        return MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++
++    mbc->stream_on = true;
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc)
++{
++    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
++
++    if (!mbc->stream_on)
++        return MEDIABUFS_STATUS_SUCCESS;
++
++    if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) {
++        request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type);
++        status = MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++
++    if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) {
++        request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type);
++        status = MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++
++    mbc->stream_on = false;
++    return status;
++}
++
++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n)
++{
++    struct v4l2_ext_controls controls = {
++        .controls = control_array,
++        .count = n
++    };
++
++    if (mreq) {
++        controls.which = V4L2_CTRL_WHICH_REQUEST_VAL;
++        controls.request_fd = media_request_fd(mreq);
++    }
++
++    while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls))
++    {
++        const int err = errno;
++        if (err != EINTR) {
++            request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err));
++            return -err;
++        }
++    }
++
++    return 0;
++}
++
++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
++                struct media_request * const mreq,
++                unsigned int id, void *data,
++                unsigned int size)
++{
++    struct v4l2_ext_control control = {
++        .id = id,
++        .ptr = data,
++        .size = size
++    };
++
++    int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1);
++    return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
++                                      enum v4l2_buf_type buf_type,
++                   const uint32_t pixfmt,
++                   const uint32_t width, const uint32_t height,
++                                      const size_t bufsize)
++{
++    MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize);
++    if (rv != MEDIABUFS_STATUS_SUCCESS)
++        request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height);
++
++    return rv;
++}
++
++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n)
++{
++    int rv = 0;
++    while (n--) {
++        while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) {
++            const int err = errno;
++            if (err != EINTR) {
++                // Often used for probing - errors are to be expected
++                request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err);
++                ctrls->type = 0; // 0 is invalid
++                rv = -err;
++                break;
++            }
++        }
++        ++ctrls;
++    }
++    return rv;
++}
++
++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc)
++{
 +#if 1
 +    return 0;
 +#else
-     // Single planar OUTPUT can only take exact size buffers
-     // Multiplanar will take larger than negotiated
-     return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type);
++    // Single planar OUTPUT can only take exact size buffers
++    // Multiplanar will take larger than negotiated
++    return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type);
 +#endif
- }
- 
- static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc)
++}
++
++static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc)
++{
++    if (!mbc)
++        return;
++
++    // Break the weak link first
++    ff_weak_link_break(&mbc->this_wlm);
++
++    polltask_delete(&mbc->pt);
++
++    mediabufs_stream_off(mbc);
++
++    // Empty v4l2 buffer stash
++    request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0);
++    request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0);
++
++    bq_free_all_free_src(mbc->src);
++    bq_free_all_inuse_src(mbc->src);
++    bq_free_all_free_dst(mbc->dst);
++
++    {
++        struct qent_dst *dst_be;
++        while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) {
++            dst_be->base.timestamp = (struct timeval){0};
++            dst_be->base.status = QENT_ERROR;
++            qe_dst_done(dst_be);
++        }
++    }
++
++    queue_delete(mbc->dst);
++    queue_delete(mbc->src);
++    close(mbc->vfd);
++    pthread_mutex_destroy(&mbc->lock);
++
++    free(mbc);
++}
++
++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc)
++{
++    atomic_fetch_add(&mbc->ref_count, 1);
++    return mbc;
++}
++
++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc)
++{
++    struct mediabufs_ctl *const mbc = *pmbc;
++    int n;
++
++    if (!mbc)
++        return;
++    *pmbc = NULL;
++    n = atomic_fetch_sub(&mbc->ref_count, 1);
++    if (n)
++        return;
++    mediabufs_ctl_delete(mbc);
++}
++
++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc)
++{
++    return mbc->capability.version;
++}
++
++static int set_capabilities(struct mediabufs_ctl *const mbc)
++{
++    uint32_t caps;
++
++    if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) {
++        int err = errno;
++        request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err));
++        return -err;
++    }
++
++    caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ?
++            mbc->capability.device_caps :
++            mbc->capability.capabilities;
++
++    if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) {
++        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
++        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
++    }
++    else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) {
++        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
++        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++    }
++    else {
++        request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps);
++        return -EINVAL;
++    }
++
++    return 0;
++}
++
++/* One of these per context */
++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq)
++{
++    struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc));
++
++    if (!mbc)
++        return NULL;
++
++    mbc->dc = dc;
++    // Default mono planar
++    mbc->pq = pq;
++    pthread_mutex_init(&mbc->lock, NULL);
++
++    /* Pick a default  - could we scan for this? */
++    if (vpath == NULL)
++        vpath = "/dev/media0";
++
++    while ((mbc->vfd = open(vpath, O_RDWR)) == -1)
++    {
++        const int err = errno;
++        if (err != EINTR) {
++            request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err));
++            goto fail0;
++        }
++    }
++
++    if (set_capabilities(mbc)) {
++        request_err(dc, "Bad capabilities for video dev '%s'\n", vpath);
++        goto fail1;
++    }
++
++    mbc->src = queue_new(mbc->vfd);
++    if (!mbc->src)
++        goto fail1;
++    mbc->dst = queue_new(mbc->vfd);
++    if (!mbc->dst)
++        goto fail2;
++    mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc);
++    if (!mbc->pt)
++        goto fail3;
++    mbc->this_wlm = ff_weak_link_new(mbc);
++    if (!mbc->this_wlm)
++        goto fail4;
++
++    /* Cannot add polltask now - polling with nothing pending
++     * generates infinite error polls
++    */
++    return mbc;
++
++fail4:
++    polltask_delete(&mbc->pt);
++fail3:
++    queue_delete(mbc->dst);
++fail2:
++    queue_delete(mbc->src);
++fail1:
++    close(mbc->vfd);
++fail0:
++    free(mbc);
++    request_info(dc, "%s: FAILED\n", __func__);
++    return NULL;
++}
++
++
++
 diff --git a/libavcodec/v4l2_req_media.h b/libavcodec/v4l2_req_media.h
-index 0307a831defd..890947b2e210 100644
---- a/libavcodec/v4l2_req_media.h
+new file mode 100644
+index 000000000000..0f1c79fb4ee1
+--- /dev/null
 +++ b/libavcodec/v4l2_req_media.h
-@@ -43,6 +43,7 @@ typedef enum media_buf_status {
-     MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE,
-     MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT,
-     MEDIABUFS_ERROR_ALLOCATION_FAILED,
+@@ -0,0 +1,171 @@
++/*
++e.h
++*
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef AVCODEC_V4L2_REQ_MEDIA_H
++#define AVCODEC_V4L2_REQ_MEDIA_H
++
++#include <stdbool.h>
++#include <stdint.h>
++
++struct v4l2_format;
++struct v4l2_fmtdesc;
++struct v4l2_query_ext_ctrl;
++
++struct pollqueue;
++struct media_request;
++struct media_pool;
++
++typedef enum media_buf_status {
++    MEDIABUFS_STATUS_SUCCESS = 0,
++    MEDIABUFS_ERROR_OPERATION_FAILED,
++    MEDIABUFS_ERROR_DECODING_ERROR,
++    MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE,
++    MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT,
++    MEDIABUFS_ERROR_ALLOCATION_FAILED,
 +    MEDIABUFS_ERROR_UNSUPPORTED_MEMORY,
- } MediaBufsStatus;
- 
- struct media_pool * media_pool_new(const char * const media_path,
-@@ -70,6 +71,15 @@ struct qent_dst;
- struct dmabuf_h;
- struct dmabufs_ctl;
- 
++} MediaBufsStatus;
++
++struct media_pool * media_pool_new(const char * const media_path,
++                   struct pollqueue * const pq,
++                   const unsigned int n);
++void media_pool_delete(struct media_pool ** pmp);
++
++// Obtain a media request
++// Will block if none availible - has a 2sec timeout
++struct media_request * media_request_get(struct media_pool * const mp);
++int media_request_fd(const struct media_request * const req);
++
++// Start this request
++// Request structure is returned to pool once done
++int media_request_start(struct media_request * const req);
++
++// Return an *unstarted* media_request to the pool
++// May later be upgraded to allow for aborting a started req
++int media_request_abort(struct media_request ** const preq);
++
++
++struct mediabufs_ctl;
++struct qent_src;
++struct qent_dst;
++struct dmabuf_h;
++struct dmabufs_ctl;
++
 +// 1-1 mammping to V4L2 type - just defined separetely to avoid some include versioning difficulties
 +enum mediabufs_memory {
 +   MEDIABUFS_MEMORY_UNSET            = 0,
@@ -25824,60 +11909,834 @@ index 0307a831defd..890947b2e210 100644
 +   MEDIABUFS_MEMORY_DMABUF           = 4,
 +};
 +
- int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp);
- struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst);
- 
-@@ -93,6 +103,8 @@ MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
-                 unsigned int plane,
-                 int fd, size_t size);
- 
++int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp);
++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst);
++
++// prealloc
++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc);
++// dbsc may be NULL if realloc not required
++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc);
++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane);
++int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane);
++MediaBufsStatus qent_dst_wait(struct qent_dst *const be);
++void qent_dst_delete(struct qent_dst *const be);
++// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead
++void qent_dst_unref(struct qent_dst ** const pbe_dst);
++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst);
++
++const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no);
++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be);
++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be);
++/* Import an fd unattached to any mediabuf */
++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
++                unsigned int plane,
++                int fd, size_t size);
++
 +const char * mediabufs_memory_name(const enum mediabufs_memory m);
 +
- MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
-                 struct media_request **const pmreq,
-                 struct qent_src **const psrc_be,
-@@ -106,7 +118,7 @@ struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc,
- // Create dst slots without alloc
- // If fixed true then qent_alloc will only get slots from this pool and will
- // block until a qent has been unrefed
--MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed);
++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
++                struct media_request **const pmreq,
++                struct qent_src **const psrc_be,
++                struct qent_dst *const dst_be,
++                const bool is_final);
++// Get / alloc a dst buffer & associate with a slot
++// If the dst pool is empty then behaviour depends on the fixed flag passed to
++// dst_slots_create.  Default is !fixed = unlimited alloc
++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc,
++                           struct dmabufs_ctl *const dbsc);
++// Create dst slots without alloc
++// If fixed true then qent_alloc will only get slots from this pool and will
++// block until a qent has been unrefed
 +MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype);
- 
- MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc);
- MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc);
-@@ -140,7 +152,12 @@ MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
- 
- MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw,
-                   struct dmabufs_ctl * const dbsc,
--                  unsigned int n);
++
++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc);
++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc);
++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc);
++
++typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc);
++
++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
++               const unsigned int width,
++               const unsigned int height,
++               mediabufs_dst_fmt_accept_fn *const accept_fn,
++               void *const accept_v);
++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc);
++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src);
++
++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq,
++                                struct v4l2_ext_control control_array[], unsigned int n);
++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
++                struct media_request * const mreq,
++                unsigned int id, void *data,
++                unsigned int size);
++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n);
++
++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc);
++
++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
++                                      enum v4l2_buf_type buf_type,
++                                      const uint32_t pixfmt,
++                                      const uint32_t width, const uint32_t height,
++                                      const size_t bufsize);
++
++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw,
++                  struct dmabufs_ctl * const dbsc,
 +                  unsigned int n,
 +                  const enum mediabufs_memory memtype);
 +
 +// Want to have appropriate formats set first
 +MediaBufsStatus mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype);
 +MediaBufsStatus mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype);
- 
- #define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c))
- unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc);
++
++#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c))
++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc);
++
++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc,
++                     const char *vpath, struct pollqueue *const pq);
++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc);
++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc);
++
++
++#endif
+diff --git a/libavcodec/v4l2_req_pollqueue.c b/libavcodec/v4l2_req_pollqueue.c
+new file mode 100644
+index 000000000000..4b4984e5b064
+--- /dev/null
++++ b/libavcodec/v4l2_req_pollqueue.c
+@@ -0,0 +1,385 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#include <errno.h>
++#include <limits.h>
++#include <poll.h>
++#include <pthread.h>
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <stdbool.h>
++#include <stdlib.h>
++#include <stdint.h>
++#include <stdio.h>
++#include <string.h>
++#include <unistd.h>
++#include <sys/eventfd.h>
++
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_utils.h"
++
++
++struct pollqueue;
++
++enum polltask_state {
++    POLLTASK_UNQUEUED = 0,
++    POLLTASK_QUEUED,
++    POLLTASK_RUNNING,
++    POLLTASK_Q_KILL,
++    POLLTASK_RUN_KILL,
++};
++
++struct polltask {
++    struct polltask *next;
++    struct polltask *prev;
++    struct pollqueue *q;
++    enum polltask_state state;
++
++    int fd;
++    short events;
++
++    void (*fn)(void *v, short revents);
++    void * v;
++
++    uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */
++    sem_t kill_sem;
++};
++
++struct pollqueue {
++    atomic_int ref_count;
++    pthread_mutex_t lock;
++
++    struct polltask *head;
++    struct polltask *tail;
++
++    bool kill;
++    bool no_prod;
++    int prod_fd;
++    struct polltask *prod_pt;
++    pthread_t worker;
++};
++
++struct polltask *polltask_new(struct pollqueue *const pq,
++                              const int fd, const short events,
++                  void (*const fn)(void *v, short revents),
++                  void *const v)
++{
++    struct polltask *pt;
++
++    if (!events)
++        return NULL;
++
++    pt = malloc(sizeof(*pt));
++    if (!pt)
++        return NULL;
++
++    *pt = (struct polltask){
++        .next = NULL,
++        .prev = NULL,
++        .q = pollqueue_ref(pq),
++        .fd = fd,
++        .events = events,
++        .fn = fn,
++        .v = v
++    };
++
++    sem_init(&pt->kill_sem, 0, 0);
++
++    return pt;
++}
++
++static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt)
++{
++    if (pt->prev)
++        pt->prev->next = pt->next;
++    else
++        pq->head = pt->next;
++    if (pt->next)
++        pt->next->prev = pt->prev;
++    else
++        pq->tail = pt->prev;
++    pt->next = NULL;
++    pt->prev = NULL;
++}
++
++static void polltask_free(struct polltask * const pt)
++{
++    sem_destroy(&pt->kill_sem);
++    free(pt);
++}
++
++static int pollqueue_prod(const struct pollqueue *const pq)
++{
++    static const uint64_t one = 1;
++    return write(pq->prod_fd, &one, sizeof(one));
++}
++
++void polltask_delete(struct polltask **const ppt)
++{
++    struct polltask *const pt = *ppt;
++    struct pollqueue * pq;
++    enum polltask_state state;
++    bool prodme;
++
++    if (!pt)
++        return;
++
++    pq = pt->q;
++    pthread_mutex_lock(&pq->lock);
++    state = pt->state;
++    pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL;
++    prodme = !pq->no_prod;
++    pthread_mutex_unlock(&pq->lock);
++
++    if (state != POLLTASK_UNQUEUED) {
++        if (prodme)
++            pollqueue_prod(pq);
++        while (sem_wait(&pt->kill_sem) && errno == EINTR)
++            /* loop */;
++    }
++
++    // Leave zapping the ref until we have DQed the PT as might well be
++    // legitimately used in it
++    *ppt = NULL;
++    polltask_free(pt);
++    pollqueue_unref(&pq);
++}
++
++static uint64_t pollqueue_now(int timeout)
++{
++    struct timespec now;
++    uint64_t now_ms;
++
++    if (clock_gettime(CLOCK_MONOTONIC, &now))
++        return 0;
++    now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout;
++    return now_ms ? now_ms : (uint64_t)1;
++}
++
++void pollqueue_add_task(struct polltask *const pt, const int timeout)
++{
++    bool prodme = false;
++    struct pollqueue * const pq = pt->q;
++
++    pthread_mutex_lock(&pq->lock);
++    if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) {
++        if (pq->tail)
++            pq->tail->next = pt;
++        else
++            pq->head = pt;
++        pt->prev = pq->tail;
++        pt->next = NULL;
++        pt->state = POLLTASK_QUEUED;
++        pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout);
++        pq->tail = pt;
++        prodme = !pq->no_prod;
++    }
++    pthread_mutex_unlock(&pq->lock);
++    if (prodme)
++        pollqueue_prod(pq);
++}
++
++static void *poll_thread(void *v)
++{
++    struct pollqueue *const pq = v;
++    struct pollfd *a = NULL;
++    size_t asize = 0;
++
++    pthread_mutex_lock(&pq->lock);
++    do {
++        unsigned int i;
++        unsigned int n = 0;
++        struct polltask *pt;
++        struct polltask *pt_next;
++        uint64_t now = pollqueue_now(0);
++        int timeout = -1;
++        int rv;
++
++        for (pt = pq->head; pt; pt = pt_next) {
++            int64_t t;
++
++            pt_next = pt->next;
++
++            if (pt->state == POLLTASK_Q_KILL) {
++                pollqueue_rem_task(pq, pt);
++                sem_post(&pt->kill_sem);
++                continue;
++            }
++
++            if (n >= asize) {
++                asize = asize ? asize * 2 : 4;
++                a = realloc(a, asize * sizeof(*a));
++                if (!a) {
++                    request_log("Failed to realloc poll array to %zd\n", asize);
++                    goto fail_locked;
++                }
++            }
++
++            a[n++] = (struct pollfd){
++                .fd = pt->fd,
++                .events = pt->events
++            };
++
++            t = (int64_t)(pt->timeout - now);
++            if (pt->timeout && t < INT_MAX &&
++                (timeout < 0 || (int)t < timeout))
++                timeout = (t < 0) ? 0 : (int)t;
++        }
++        pthread_mutex_unlock(&pq->lock);
++
++        if ((rv = poll(a, n, timeout)) == -1) {
++            if (errno != EINTR) {
++                request_log("Poll error: %s\n", strerror(errno));
++                goto fail_unlocked;
++            }
++        }
++
++        pthread_mutex_lock(&pq->lock);
++        now = pollqueue_now(0);
++
++        /* Prodding in this loop is pointless and might lead to
++         * infinite looping
++        */
++        pq->no_prod = true;
++        for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) {
++            pt_next = pt->next;
++
++            /* Pending? */
++            if (a[i].revents ||
++                (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) {
++                pollqueue_rem_task(pq, pt);
++                if (pt->state == POLLTASK_QUEUED)
++                    pt->state = POLLTASK_RUNNING;
++                if (pt->state == POLLTASK_Q_KILL)
++                    pt->state = POLLTASK_RUN_KILL;
++                pthread_mutex_unlock(&pq->lock);
++
++                /* This can add new entries to the Q but as
++                 * those are added to the tail our existing
++                 * chain remains intact
++                */
++                pt->fn(pt->v, a[i].revents);
++
++                pthread_mutex_lock(&pq->lock);
++                if (pt->state == POLLTASK_RUNNING)
++                    pt->state = POLLTASK_UNQUEUED;
++                if (pt->state == POLLTASK_RUN_KILL)
++                    sem_post(&pt->kill_sem);
++            }
++        }
++        pq->no_prod = false;
++
++    } while (!pq->kill);
++
++fail_locked:
++    pthread_mutex_unlock(&pq->lock);
++fail_unlocked:
++    free(a);
++    return NULL;
++}
++
++static void prod_fn(void *v, short revents)
++{
++    struct pollqueue *const pq = v;
++    char buf[8];
++    if (revents)
++        read(pq->prod_fd, buf, 8);
++    if (!pq->kill)
++        pollqueue_add_task(pq->prod_pt, -1);
++}
++
++struct pollqueue * pollqueue_new(void)
++{
++    struct pollqueue *pq = malloc(sizeof(*pq));
++    if (!pq)
++        return NULL;
++    *pq = (struct pollqueue){
++        .ref_count = ATOMIC_VAR_INIT(0),
++        .lock = PTHREAD_MUTEX_INITIALIZER,
++        .head = NULL,
++        .tail = NULL,
++        .kill = false,
++        .prod_fd = -1
++    };
++
++    pq->prod_fd = eventfd(0, EFD_NONBLOCK);
++    if (pq->prod_fd == 1)
++        goto fail1;
++    pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq);
++    if (!pq->prod_pt)
++        goto fail2;
++    pollqueue_add_task(pq->prod_pt, -1);
++    if (pthread_create(&pq->worker, NULL, poll_thread, pq))
++        goto fail3;
++    // Reset ref count which will have been inced by the add_task
++    atomic_store(&pq->ref_count, 0);
++    return pq;
++
++fail3:
++    polltask_free(pq->prod_pt);
++fail2:
++    close(pq->prod_fd);
++fail1:
++    free(pq);
++    return NULL;
++}
++
++static void pollqueue_free(struct pollqueue *const pq)
++{
++    void *rv;
++
++    pthread_mutex_lock(&pq->lock);
++    pq->kill = true;
++    pollqueue_prod(pq);
++    pthread_mutex_unlock(&pq->lock);
++
++    pthread_join(pq->worker, &rv);
++    polltask_free(pq->prod_pt);
++    pthread_mutex_destroy(&pq->lock);
++    close(pq->prod_fd);
++    free(pq);
++}
++
++struct pollqueue * pollqueue_ref(struct pollqueue *const pq)
++{
++    atomic_fetch_add(&pq->ref_count, 1);
++    return pq;
++}
++
++void pollqueue_unref(struct pollqueue **const ppq)
++{
++    struct pollqueue * const pq = *ppq;
++
++    if (!pq)
++        return;
++    *ppq = NULL;
++
++    if (atomic_fetch_sub(&pq->ref_count, 1) != 0)
++        return;
++
++    pollqueue_free(pq);
++}
++
++
++
+diff --git a/libavcodec/v4l2_req_pollqueue.h b/libavcodec/v4l2_req_pollqueue.h
+new file mode 100644
+index 000000000000..9634f33d48fd
+--- /dev/null
++++ b/libavcodec/v4l2_req_pollqueue.h
+@@ -0,0 +1,42 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef AVCODEC_V4L2_REQ_POLLQUEUE_H
++#define AVCODEC_V4L2_REQ_POLLQUEUE_H
++
++struct polltask;
++struct pollqueue;
++
++struct polltask *polltask_new(struct pollqueue *const pq,
++			      const int fd, const short events,
++			      void (*const fn)(void *v, short revents),
++			      void *const v);
++void polltask_delete(struct polltask **const ppt);
++
++void pollqueue_add_task(struct polltask *const pt, const int timeout);
++struct pollqueue * pollqueue_new(void);
++void pollqueue_unref(struct pollqueue **const ppq);
++struct pollqueue * pollqueue_ref(struct pollqueue *const pq);
++
++#endif /* AVCODEC_V4L2_REQ_POLLQUEUE_H_ */
+diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h
+new file mode 100644
+index 000000000000..a6160c5e1c3b
+--- /dev/null
++++ b/libavcodec/v4l2_req_utils.h
+@@ -0,0 +1,51 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef AVCODEC_V4L2_REQ_UTILS_H
++#define AVCODEC_V4L2_REQ_UTILS_H
++
++#include <stdint.h>
++#include "libavutil/log.h"
++
++#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
++
++#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__)
++#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__)
++#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__)
++#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__)
++
++static inline char safechar(char c) {
++    return c > 0x20 && c < 0x7f ? c : '.';
++}
++
++static inline const char * strfourcc(char tbuf[5], uint32_t fcc) {
++    tbuf[0] = safechar((fcc >>  0) & 0xff);
++    tbuf[1] = safechar((fcc >>  8) & 0xff);
++    tbuf[2] = safechar((fcc >> 16) & 0xff);
++    tbuf[3] = safechar((fcc >> 24) & 0xff);
++    tbuf[4] = '\0';
++    return tbuf;
++}
++
++#endif
 diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-index cd79aad5631a..5cf17dd5e3fb 100644
---- a/libavcodec/v4l2_request_hevc.c
+new file mode 100644
+index 000000000000..94c647380364
+--- /dev/null
 +++ b/libavcodec/v4l2_request_hevc.c
-@@ -144,6 +144,8 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-     const struct decdev * decdev;
-     const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 1).src_pix_fmt_v4l2;  // Assuming constant for all APIs but avoiding V4L2 includes
-     size_t src_size;
+@@ -0,0 +1,410 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++#include "config.h"
++#include "decode.h"
++#include "hevc/hevcdec.h"
++#include "hwaccel_internal.h"
++#include "hwconfig.h"
++#include "internal.h"
++
++#include "v4l2_request_hevc.h"
++
++#include "libavutil/hwcontext_drm.h"
++#include "libavutil/mem.h"
++#include "libavutil/pixdesc.h"
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_utils.h"
++
++static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8)
++{
++    const size_t wxh = w * h;
++    size_t bits_alloc;
++
++    /* Annex A gives a min compression of 2 @ lvl 3.1
++     * (wxh <= 983040) and min 4 thereafter but avoid
++     * the odity of 983041 having a lower limit than
++     * 983040.
++     * Multiply by 3/2 for 4:2:0
++     */
++    bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
++        wxh < 983040 * 2 ? 983040 * 3 / 4 :
++        wxh * 3 / 8;
++    /* Allow for bit depth */
++    bits_alloc += (bits_alloc * bits_minus8) / 8;
++    /* Add a few bytes (16k) for overhead */
++    bits_alloc += 0x4000;
++    return bits_alloc;
++}
++
++static int v4l2_req_hevc_start_frame(AVCodecContext *avctx,
++                                     av_unused const uint8_t *buffer,
++                                     av_unused uint32_t size)
++{
++    V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data;
++    V4L2RequestContextHEVC *const ctx = priv->cctx;
++    return ctx->fns->start_frame(avctx, ctx, buffer, size);
++}
++
++static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
++{
++    V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data;
++    V4L2RequestContextHEVC *const ctx = priv->cctx;
++    return ctx->fns->decode_slice(avctx, ctx, buffer, size);
++}
++
++static int v4l2_req_hevc_end_frame(AVCodecContext *avctx)
++{
++    V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data;
++    V4L2RequestContextHEVC *const ctx = priv->cctx;
++    return ctx->fns->end_frame(avctx, ctx);
++}
++
++static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx)
++{
++    V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data;
++    V4L2RequestContextHEVC *const ctx = priv->cctx;
++    ctx->fns->abort_frame(avctx, ctx);
++}
++
++static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
++{
++    V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data;
++    V4L2RequestContextHEVC *const ctx = priv->cctx;
++    return ctx->fns->frame_params(avctx, ctx, hw_frames_ctx);
++}
++
++static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame)
++{
++    V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data;
++    V4L2RequestContextHEVC *const ctx = priv->cctx;
++    return ctx->fns->alloc_frame(avctx, ctx, frame);
++}
++
++
++static void
++cctx_free(void * v, uint8_t * data)
++{
++    V4L2RequestContextHEVC *const ctx = (V4L2RequestContextHEVC *)data;
++
++    mediabufs_ctl_unref(&ctx->mbufs);
++    media_pool_delete(&ctx->mpool);
++    pollqueue_unref(&ctx->pq);
++    dmabufs_ctl_unref(&ctx->dbufs);
++    devscan_delete(&ctx->devscan);
++
++    decode_q_uninit(&ctx->decode_q);
++
++    av_free(ctx);
++}
++
++static int v4l2_request_hevc_uninit(AVCodecContext *avctx)
++{
++    V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data;
++
++    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++//    decode_q_wait(&ctx->decode_q, NULL);  // Wait for all other threads to be out of decode
++
++    priv->cctx = NULL;
++    av_buffer_unref(&priv->cctx_buf);
++
++//    if (avctx->hw_frames_ctx) {
++//        AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
++//        av_buffer_pool_flush(hwfc->pool);
++//    }
++    return 0;
++}
++
++static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc)
++{
++    AVCodecContext *const avctx = v;
++    const HEVCContext *const h = avctx->priv_data;
++    const HEVCPPS * const pps = h->pps;
++    const HEVCSPS * const sps = pps->sps;
++
++    if (sps->bit_depth == 8) {
++        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 ||
++            fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) {
++            return 1;
++        }
++    }
++    else if (sps->bit_depth == 10) {
++        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
++            return 1;
++        }
++    }
++    return 0;
++}
++
++static int v4l2_request_hevc_init(AVCodecContext *avctx)
++{
++    const HEVCContext *h = avctx->priv_data;
++    V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data;
++    V4L2RequestContextHEVC * ctx;
++    const HEVCPPS * const pps = h->pps;
++    const HEVCSPS * const sps = pps->sps;
++    int ret;
++    const struct decdev * decdev;
++    const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2;  // Assuming constant for all APIs but avoiding V4L2 includes
++    size_t src_size;
 +    enum mediabufs_memory src_memtype;
 +    enum mediabufs_memory dst_memtype;
- 
-     av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
- 
-@@ -174,8 +176,14 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-            decdev_media_path(decdev), decdev_video_path(decdev));
- 
-     if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) {
--        av_log(avctx, AV_LOG_ERROR, "Unable to open dmabufs\n");
--        goto fail0;
++
++    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    // Give up immediately if this is something that we have no code to deal with
++    if (sps->chroma_format_idc != 1) {
++        av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", sps->chroma_format_idc);
++        return AVERROR_PATCHWELCOME;
++    }
++    if (!(sps->bit_depth == 10 || sps->bit_depth == 8) ||
++        sps->bit_depth != sps->bit_depth_chroma) {
++        av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", sps->bit_depth, sps->bit_depth_chroma);
++        return AVERROR_PATCHWELCOME;
++    }
++
++    if ((ctx = av_mallocz(sizeof(*ctx))) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to allocate context");
++        return AVERROR(ENOMEM);
++    }
++    if ((priv->cctx_buf = av_buffer_create((uint8_t*)ctx, sizeof(*ctx), cctx_free, NULL, 0)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to allocate context buffer");
++        av_free(ctx);
++        return AVERROR(ENOMEM);
++    }
++    priv->cctx = ctx;
++
++    if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) {
++        av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n");
++        ret = AVERROR(-ret);
++        goto fail0;
++    }
++    ret = AVERROR(ENOMEM);  // Assume mem fail by default for these
++
++    if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL)
++    {
++        av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n");
++        ret = AVERROR(ENODEV);
++        goto fail0;
++    }
++    av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n",
++           decdev_media_path(decdev), decdev_video_path(decdev));
++
++    if ((ctx->pq = pollqueue_new()) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n");
++        goto fail1;
++    }
++
++    if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n");
++        goto fail2;
++    }
++
++    if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n");
++        goto fail3;
++    }
++
++    // Version test for functional Pi5 HEVC iommu.
++    // rpivid kernel patch was merged in 6.1.57
++    // *** Remove when it is unlikely that there are any broken kernels left
++    if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(6,1,57))
++        ctx->dbufs = dmabufs_ctl_new_vidbuf_cached();
++    else
++        ctx->dbufs = dmabufs_ctl_new();
++
++    if (ctx->dbufs == NULL) {
 +        av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n");
 +        src_memtype = MEDIABUFS_MEMORY_MMAP;
 +        dst_memtype = MEDIABUFS_MEMORY_MMAP;
@@ -25886,24 +12745,26 @@ index cd79aad5631a..5cf17dd5e3fb 100644
 +        av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n");
 +        src_memtype = MEDIABUFS_MEMORY_DMABUF;
 +        dst_memtype = MEDIABUFS_MEMORY_DMABUF;
-     }
- 
-     if ((ctx->pq = pollqueue_new()) == NULL) {
-@@ -196,8 +204,9 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-     // Ask for an initial bitbuf size of max size / 4
-     // We will realloc if we need more
-     // Must use sps->h/w as avctx contains cropped size
++    }
++
++    // Ask for an initial bitbuf size of max size / 4
++    // We will realloc if we need more
++    // Must use sps->h/w as avctx contains cropped size
 +retry_src_memtype:
-     src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8);
--    if (mediabufs_src_resizable(ctx->mbufs))
++    src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8);
 +    if (src_memtype == MEDIABUFS_MEMORY_DMABUF && mediabufs_src_resizable(ctx->mbufs))
-         src_size /= 4;
-     // Kludge for conformance tests which break Annex A limits
-     else if (src_size < 0x40000)
-@@ -210,6 +219,15 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-         goto fail4;
-     }
- 
++        src_size /= 4;
++    // Kludge for conformance tests which break Annex A limits
++    else if (src_size < 0x40000)
++        src_size = 0x40000;
++
++    if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt,
++                              sps->width, sps->height, src_size)) {
++        char tbuf1[5];
++        av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
++        goto fail4;
++    }
++
 +    if (mediabufs_src_chk_memtype(ctx->mbufs, src_memtype)) {
 +        if (src_memtype == MEDIABUFS_MEMORY_DMABUF) {
 +            src_memtype = MEDIABUFS_MEMORY_MMAP;
@@ -25913,22 +12774,43 @@ index cd79aad5631a..5cf17dd5e3fb 100644
 +        goto fail4;
 +    }
 +
-     if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) {
-         av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n");
-         ctx->fns = &V2(ff_v4l2_req_hevc, 4);
-@@ -238,7 +256,7 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-         goto fail4;
-     }
- 
--    if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6)) {
++    if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0)
++        ctx->fns = &V2(ff_v4l2_req_hevc, 4);
++#if CONFIG_V4L2_REQ_HEVC_VX
++    else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0)
++        ctx->fns = &V2(ff_v4l2_req_hevc, 3);
++    else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0)
++        ctx->fns = &V2(ff_v4l2_req_hevc, 2);
++    else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0)
++        ctx->fns = &V2(ff_v4l2_req_hevc, 1);
++#endif
++    else {
++        av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n");
++        ret = AVERROR(EINVAL);
++        goto fail4;
++    }
++
++    av_log(avctx, AV_LOG_DEBUG, "%s probed successfully: driver v %#x\n",
++           ctx->fns->name, mediabufs_ctl_driver_version(ctx->mbufs));
++
++    if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) {
++        char tbuf1[5];
++        av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
++        goto fail4;
++    }
++
 +    if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6, src_memtype)) {
-         av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n");
-         goto fail4;
-     }
-@@ -250,8 +268,17 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-                sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering,
-                avctx->thread_count, avctx->extra_hw_frames);
- 
++        av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n");
++        goto fail4;
++    }
++
++    {
++        unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering +
++            avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6);
++        av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots,
++               sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering,
++               avctx->thread_count, avctx->extra_hw_frames);
++
 +        if (mediabufs_dst_chk_memtype(ctx->mbufs, dst_memtype)) {
 +            if (dst_memtype != MEDIABUFS_MEMORY_DMABUF) {
 +                av_log(avctx, AV_LOG_ERROR, "Failed to get dst memory type\n");
@@ -25938,488 +12820,2295 @@ index cd79aad5631a..5cf17dd5e3fb 100644
 +            dst_memtype = MEDIABUFS_MEMORY_MMAP;
 +        }
 +
-         // extra_hw_frames is -1 if unset
--        if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0))) {
++        // extra_hw_frames is -1 if unset
 +        if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0), dst_memtype)) {
-             av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n");
-             goto fail4;
-         }
-@@ -277,9 +304,10 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-     // Set our s/w format
-     avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format;
- 
--    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s\n",
-+    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s\n",
-            ctx->fns->name,
--           decdev_media_path(decdev), decdev_video_path(decdev));
-+           decdev_media_path(decdev), decdev_video_path(decdev),
-+           mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype));
- 
-     return 0;
- 
-
-From 3087f58ab5abfac2a3d50359db08431a89d446df Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 22 Aug 2022 12:35:40 +0000
-Subject: [PATCH 064/186] Set buffer lengths on DQ
-
----
- libavcodec/v4l2_req_media.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c
-index 910ac77bb6f9..1a9944774a48 100644
---- a/libavcodec/v4l2_req_media.c
-+++ b/libavcodec/v4l2_req_media.c
-@@ -733,6 +733,14 @@ static struct qent_base * qe_dequeue(struct buf_pool *const bp,
-         return NULL;
-     }
- 
-+    if (mp) {
-+        unsigned int i;
-+        for (i = 0; i != buffer.length; ++i)
-+            dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0);
-+    }
-+    else
-+        dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0);
-+
-     be->timestamp = buffer.timestamp;
-     be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE;
-     return be;
-
-From d761ce983b4738df798b02636433bfc342e387c1 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 22 Aug 2022 17:11:24 +0000
-Subject: [PATCH 065/186] Fix compile if videodev2.h defines V4L2 HEVC request
- API
-
-If videodev2.h does define the HEVC request API it is really hard to
-set old variations of the controls so if it does then we only compile
-against the system includes and remove the back compatability.
----
- configure                      | 9 +++++++++
- libavcodec/Makefile            | 4 ++--
- libavcodec/hevc-ctrls-v4.h     | 2 ++
- libavcodec/v4l2_req_hevc_vx.c  | 5 -----
- libavcodec/v4l2_request_hevc.c | 6 ++++--
- 5 files changed, 17 insertions(+), 9 deletions(-)
-
-diff --git a/configure b/configure
-index f3991452e4a5..055944934476 100755
---- a/configure
-+++ b/configure
-@@ -1946,6 +1946,7 @@ FEATURE_LIST="
-     swscale_alpha
-     vout_drm
-     vout_egl
-+    v4l2_req_hevc_vx
- "
- 
- # this list should be kept in linking order
-@@ -6912,6 +6913,14 @@ fi
- 
- check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
- check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;"
-+disable v4l2_req_hevc_vx
-+if enabled hevc_v4l2request_hwaccel; then
-+    enable v4l2_req_hevc_vx
-+fi
-+if enabled hevc_v4l2_request; then
-+    disable v4l2_req_hevc_vx
-+fi
-+
- check_headers sys/videoio.h
- test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
- 
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index d433a712366f..11f183c9b9ba 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -999,8 +999,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)       += dxva2_hevc.o
- OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
- OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
- OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec.o
--OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o\
--                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o  v4l2_req_hevc_v4.o
-+OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o
-+OBJS-$(CONFIG_V4L2_REQ_HEVC_VX)           += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o
- OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
- OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o h265_profile_level.o
- OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
-diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h
-index 7e05f6e7c39b..7829d8208435 100644
---- a/libavcodec/hevc-ctrls-v4.h
-+++ b/libavcodec/hevc-ctrls-v4.h
-@@ -53,6 +53,8 @@
- #include <linux/const.h>
- #include <linux/types.h>
- 
-+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
-+
- #define V4L2_CID_STATELESS_HEVC_SPS		(V4L2_CID_CODEC_STATELESS_BASE + 400)
- #define V4L2_CID_STATELESS_HEVC_PPS		(V4L2_CID_CODEC_STATELESS_BASE + 401)
- #define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_STATELESS_BASE + 402)
-diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c
-index 5d083016f89a..e1bd5c6a1f09 100644
---- a/libavcodec/v4l2_req_hevc_vx.c
-+++ b/libavcodec/v4l2_req_hevc_vx.c
-@@ -40,11 +40,6 @@
- #define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B          V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B
- #endif
- 
--// Should be in videodev2 but we might not have a good enough one
--#ifndef V4L2_PIX_FMT_HEVC_SLICE
--#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
--#endif
--
- #include "v4l2_request_hevc.h"
- 
- #include "libavutil/hwcontext_drm.h"
-diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-index 5cf17dd5e3fb..614a1b4d99e4 100644
---- a/libavcodec/v4l2_request_hevc.c
-+++ b/libavcodec/v4l2_request_hevc.c
-@@ -17,7 +17,7 @@
-  */
- 
- 
--
-+#include "config.h"
- #include "decode.h"
- #include "hevcdec.h"
- #include "hwconfig.h"
-@@ -142,7 +142,7 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-     const HEVCSPS * const sps = h->ps.sps;
-     int ret;
-     const struct decdev * decdev;
--    const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 1).src_pix_fmt_v4l2;  // Assuming constant for all APIs but avoiding V4L2 includes
-+    const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2;  // Assuming constant for all APIs but avoiding V4L2 includes
-     size_t src_size;
-     enum mediabufs_memory src_memtype;
-     enum mediabufs_memory dst_memtype;
-@@ -232,6 +232,7 @@ retry_src_memtype:
-         av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n");
-         ctx->fns = &V2(ff_v4l2_req_hevc, 4);
-     }
-+#if CONFIG_V4L2_REQ_HEVC_VX
-     else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) {
-         av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n");
-         ctx->fns = &V2(ff_v4l2_req_hevc, 3);
-@@ -244,6 +245,7 @@ retry_src_memtype:
-         av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n");
-         ctx->fns = &V2(ff_v4l2_req_hevc, 1);
-     }
-+#endif
-     else {
-         av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n");
-         ret = AVERROR(EINVAL);
-
-From 8fff782ad6a053a67e3621ffaa06dfa6d6b6bba6 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 12 Sep 2022 17:59:22 +0100
-Subject: [PATCH 066/186] v4l2_m2m_enc: Send headers in in pkt side_data
-
-If GLOBAL_HEADERS are requested then we can't provide them at init time
-so send as NEW_EXTRADATA side data in a similar way to some AV1
-encoders.
----
- libavcodec/v4l2_m2m_enc.c | 33 +++++++++++++++++++++++----------
- 1 file changed, 23 insertions(+), 10 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
-index 05ff6ba72655..099ad23928d3 100644
---- a/libavcodec/v4l2_m2m_enc.c
-+++ b/libavcodec/v4l2_m2m_enc.c
-@@ -544,14 +544,12 @@ dequeue:
-         av_freep(&avctx->extradata);
-         avctx->extradata_size = 0;
- 
--        if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) != NULL)
--            memcpy(data, avpkt->data, len);
-+        if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
-+            goto fail_no_mem;
- 
-+        memcpy(data, avpkt->data, len);
-         av_packet_unref(avpkt);
- 
--        if (data == NULL)
--            return AVERROR(ENOMEM);
--
-         // We need to copy the header, but keep local if not global
-         if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) {
-             avctx->extradata = data;
-@@ -567,18 +565,28 @@ dequeue:
-     }
- 
-     // First frame must be key so mark as such even if encoder forgot
--    if (capture->first_buf == 2)
-+    if (capture->first_buf == 2) {
-         avpkt->flags |= AV_PKT_FLAG_KEY;
- 
-+        // Add any extradata to the 1st packet we emit as we cannot create it at init
-+        if (avctx->extradata_size > 0 && avctx->extradata) {
-+            void * const side = av_packet_new_side_data(avpkt,
-+                                           AV_PKT_DATA_NEW_EXTRADATA,
-+                                           avctx->extradata_size);
-+            if (!side)
-+                goto fail_no_mem;
-+
-+            memcpy(side, avctx->extradata, avctx->extradata_size);
++            av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n");
++            goto fail4;
 +        }
 +    }
 +
-     // Add SPS/PPS to the start of every key frame if non-global headers
-     if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) {
-         const size_t newlen = s->extdata_size + avpkt->size;
-         AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE);
- 
--        if (buf == NULL) {
--            av_packet_unref(avpkt);
--            return AVERROR(ENOMEM);
--        }
-+        if (buf == NULL)
-+            goto fail_no_mem;
- 
-         memcpy(buf->data, s->extdata_data, s->extdata_size);
-         memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size);
-@@ -592,6 +600,11 @@ dequeue:
- //    av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret);
-     capture->first_buf = 0;
-     return 0;
++    if (mediabufs_stream_on(ctx->mbufs)) {
++        av_log(avctx, AV_LOG_ERROR, "Failed stream on\n");
++        goto fail4;
++    }
 +
-+fail_no_mem:
-+    ret = AVERROR(ENOMEM);
-+    av_packet_unref(avpkt);
++    if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n");
++        goto fail4;
++    }
++
++    if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed set controls\n");
++        goto fail5;
++    }
++
++    decode_q_init(&ctx->decode_q);
++
++    // Set our s/w format
++    avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format;
++
++    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n",
++           ctx->fns->name,
++           decdev_media_path(decdev), decdev_video_path(decdev),
++           mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype),
++           av_get_pix_fmt_name(avctx->sw_pix_fmt));
++
++    return 0;
++
++fail5:
++    av_buffer_unref(&avctx->hw_frames_ctx);
++fail4:
++fail3:
++fail2:
++fail1:
++fail0:
++    priv->cctx = NULL;
++    av_buffer_unref(&priv->cctx_buf);
 +    return ret;
- }
- 
- static av_cold int v4l2_encode_init(AVCodecContext *avctx)
-
-From 9d4bafaf9c0f149c2ad4b4b26d5c55a4c2deaaa0 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 14 Sep 2022 15:44:10 +0000
-Subject: [PATCH 067/186] matroskaenc: Allow H264 SPS/PPS headers in packet
- sidedata
-
----
- libavformat/matroskaenc.c | 26 ++++++++++++++++++++++----
- 1 file changed, 22 insertions(+), 4 deletions(-)
-
-diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
-index 113541bd9a20..61e4c976ef76 100644
---- a/libavformat/matroskaenc.c
-+++ b/libavformat/matroskaenc.c
-@@ -77,6 +77,10 @@
- 
- #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \
-                       ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER)
++}
 +
-+/* Reserved size for H264 headers if not extant at init time */
-+#define MAX_H264_HEADER_SIZE 1024
++static int
++v4l2_request_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
++{
++    V4L2RequestPrivHEVC * const spriv = src->internal->hwaccel_priv_data;
++    V4L2RequestPrivHEVC * const dpriv = dst->internal->hwaccel_priv_data;
++    int rv;
 +
- #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
-                               !(mkv)->is_live)
++    av_log(dst, AV_LOG_DEBUG, "<<< %s (%s)\n", __func__, dpriv->cctx_buf ? "old" : "new");
++
++    if ((rv = av_buffer_replace(&dpriv->cctx_buf, spriv->cctx_buf)) != 0)
++        return rv;
++
++    dpriv->cctx = spriv->cctx;
++    return 0;
++}
++
++static void
++v4l2_request_free_frame_priv(FFRefStructOpaque hwctx, void *data)
++{
++    fprintf(stderr, "%s\n", __func__);
++}
++
++const FFHWAccel ff_hevc_v4l2request_hwaccel = {
++    .p = {
++        .name           = "hevc_v4l2request",
++        .type           = AVMEDIA_TYPE_VIDEO,
++        .id             = AV_CODEC_ID_HEVC,
++        .pix_fmt        = AV_PIX_FMT_DRM_PRIME,
++    },
++    .alloc_frame    = v4l2_req_hevc_alloc_frame,
++    .start_frame    = v4l2_req_hevc_start_frame,
++    .decode_slice   = v4l2_req_hevc_decode_slice,
++    .end_frame      = v4l2_req_hevc_end_frame,
++    .abort_frame    = v4l2_req_hevc_abort_frame,
++    .init           = v4l2_request_hevc_init,
++    .uninit         = v4l2_request_hevc_uninit,
++    .free_frame_priv = v4l2_request_free_frame_priv,
++    .frame_priv_data_size  = 128,
++    .update_thread_context = v4l2_request_update_thread_context,
++    .priv_data_size = sizeof(V4L2RequestPrivHEVC),
++    .frame_params   = v4l2_req_hevc_frame_params,
++    .caps_internal  = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
++};
+diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h
+new file mode 100644
+index 000000000000..9b41cbe9ceb3
+--- /dev/null
++++ b/libavcodec/v4l2_request_hevc.h
+@@ -0,0 +1,131 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef AVCODEC_V4L2_REQUEST_HEVC_H
++#define AVCODEC_V4L2_REQUEST_HEVC_H
++
++#include <stdint.h>
++#include <drm_fourcc.h>
++#include "v4l2_req_decode_q.h"
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++// P030 should be defined in drm_fourcc.h and hopefully will be sometime
++// in the future but until then...
++#ifndef DRM_FORMAT_P030
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
++#endif
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++#include <linux/videodev2.h>
++#ifndef V4L2_CID_CODEC_BASE
++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
++#endif
++
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in drm_fourcc.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
++#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY
++#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY	0x0800
++#endif
++
++#define VCAT(name, version) name##_v##version
++#define V2(n,v) VCAT(n, v)
++#define V(n) V2(n, HEVC_CTRLS_VERSION)
++
++#define S2(x) #x
++#define STR(x) S2(x)
++
++// 1 per decoder
++struct v4l2_req_decode_fns;
++
++typedef struct V4L2RequestContextHEVC {
++//    V4L2RequestContext base;
++    const struct v4l2_req_decode_fns * fns;
++
++    unsigned int timestamp;  // ?? maybe uint64_t
++
++    int decode_mode;
++    int start_code;
++    unsigned int max_slices;    // 0 => not wanted (frame mode)
++    unsigned int max_offsets;   // 0 => not wanted
++
++    req_decode_q decode_q;
++
++    struct devscan *devscan;
++    struct dmabufs_ctl *dbufs;
++    struct pollqueue *pq;
++    struct media_pool * mpool;
++    struct mediabufs_ctl *mbufs;
++} V4L2RequestContextHEVC;
++
++typedef struct V4L2RequestPrivHEVC {
++    V4L2RequestContextHEVC * cctx;  // Common context
++    AVBufferRef * cctx_buf;         // Buf for cctx
++} V4L2RequestPrivHEVC;
++
++typedef struct v4l2_req_decode_fns {
++    int src_pix_fmt_v4l2;
++    const char * name;
++
++    // Init setup
++    int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
++    int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
++
++    // Passthrough of hwaccel fns
++    int (*start_frame)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, const uint8_t *buf, uint32_t buf_size);
++    int (*decode_slice)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, const uint8_t *buf, uint32_t buf_size);
++    int (*end_frame)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx);
++    void (*abort_frame)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx);
++    int (*frame_params)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, AVBufferRef *hw_frames_ctx);
++    int (*alloc_frame)(AVCodecContext * avctx, V4L2RequestContextHEVC *const ctx, AVFrame *frame);
++} v4l2_req_decode_fns;
++
++
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3);
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4);
++
++#endif
+diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c
+new file mode 100644
+index 000000000000..71f6cc356720
+--- /dev/null
++++ b/libavcodec/weak_link.c
+@@ -0,0 +1,127 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#include <stdlib.h>
++#include <pthread.h>
++#include <stdatomic.h>
++#include "weak_link.h"
++
++struct ff_weak_link_master {
++    atomic_int ref_count;    /* 0 is single ref for easier atomics */
++    pthread_rwlock_t lock;
++    void * ptr;
++};
++
++static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c)
++{
++    return (struct ff_weak_link_master *)c;
++}
++
++struct ff_weak_link_master * ff_weak_link_new(void * p)
++{
++    struct ff_weak_link_master * w = malloc(sizeof(*w));
++    if (!w)
++        return NULL;
++    atomic_init(&w->ref_count, 0);
++    w->ptr = p;
++    if (pthread_rwlock_init(&w->lock, NULL)) {
++        free(w);
++        return NULL;
++    }
++    return w;
++}
++
++static void weak_link_do_unref(struct ff_weak_link_master * const w)
++{
++    int n = atomic_fetch_sub(&w->ref_count, 1);
++    if (n)
++        return;
++
++    pthread_rwlock_destroy(&w->lock);
++    free(w);
++}
++
++// Unref & break link
++void ff_weak_link_break(struct ff_weak_link_master ** ppLink)
++{
++    struct ff_weak_link_master * const w = *ppLink;
++    if (!w)
++        return;
++
++    *ppLink = NULL;
++    pthread_rwlock_wrlock(&w->lock);
++    w->ptr = NULL;
++    pthread_rwlock_unlock(&w->lock);
++
++    weak_link_do_unref(w);
++}
++
++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w)
++{
++    if (!w)
++        return NULL;
++    atomic_fetch_add(&w->ref_count, 1);
++    return (struct ff_weak_link_client*)w;
++}
++
++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink)
++{
++    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
++    if (!w)
++        return;
++
++    *ppLink = NULL;
++    weak_link_do_unref(w);
++}
++
++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink)
++{
++    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
++
++    if (!w)
++        return NULL;
++
++    if (pthread_rwlock_rdlock(&w->lock))
++        goto broken;
++
++    if (w->ptr)
++        return w->ptr;
++
++    pthread_rwlock_unlock(&w->lock);
++
++broken:
++    *ppLink = NULL;
++    weak_link_do_unref(w);
++    return NULL;
++}
++
++// Ignores a NULL c (so can be on the return path of both broken & live links)
++void ff_weak_link_unlock(struct ff_weak_link_client * c)
++{
++    struct ff_weak_link_master * const w = weak_link_x(c);
++    if (w)
++        pthread_rwlock_unlock(&w->lock);
++}
++
++
+diff --git a/libavcodec/weak_link.h b/libavcodec/weak_link.h
+new file mode 100644
+index 000000000000..5c66b29f9b61
+--- /dev/null
++++ b/libavcodec/weak_link.h
+@@ -0,0 +1,46 @@
++/*
++    Copyright (C) 2024  John Cox john.cox@raspberrypi.com
++
++    Permission is hereby granted, free of charge, to any person
++    obtaining a copy of this software and associated documentation
++    files (the "Software"), to deal in the Software without
++    restriction, including without limitation the rights to use, copy,
++    modify, merge, publish, distribute, sublicense, and/or sell copies
++    of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
++    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef AVCODEC_WEAK_LINK_H
++#define AVCODEC_WEAK_LINK_H
++
++struct ff_weak_link_master;
++struct ff_weak_link_client;
++
++struct ff_weak_link_master * ff_weak_link_new(void * p);
++void ff_weak_link_break(struct ff_weak_link_master ** ppLink);
++
++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w);
++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink);
++
++// Returns NULL if link broken - in this case it will also zap
++//   *ppLink and unref the weak_link.
++// Returns NULL if *ppLink is NULL (so a link once broken stays broken)
++//
++// The above does mean that there is a race if this is called simultainiously
++// by two threads using the same weak_link_client (so don't do that)
++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink);
++void ff_weak_link_unlock(struct ff_weak_link_client * c);
++
++#endif
+diff --git a/libavdevice/Makefile b/libavdevice/Makefile
+index c30449201d47..d39dadb1dee1 100644
+--- a/libavdevice/Makefile
++++ b/libavdevice/Makefile
+@@ -48,6 +48,8 @@ OBJS-$(CONFIG_SNDIO_OUTDEV)              += sndio_enc.o sndio.o
+ OBJS-$(CONFIG_V4L2_INDEV)                += v4l2.o v4l2-common.o timefilter.o
+ OBJS-$(CONFIG_V4L2_OUTDEV)               += v4l2enc.o v4l2-common.o
+ OBJS-$(CONFIG_VFWCAP_INDEV)              += vfwcap.o
++OBJS-$(CONFIG_VOUT_DRM_OUTDEV)           += drm_vout.o
++OBJS-$(CONFIG_VOUT_EGL_OUTDEV)           += egl_vout.o
+ OBJS-$(CONFIG_XCBGRAB_INDEV)             += xcbgrab.o
+ OBJS-$(CONFIG_XV_OUTDEV)                 += xv.o
  
-@@ -1121,8 +1125,12 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn
-     case AV_CODEC_ID_WAVPACK:
-         return put_wv_codecpriv(dyn_cp, extradata, extradata_size);
-     case AV_CODEC_ID_H264:
--        return ff_isom_write_avcc(dyn_cp, extradata,
--                                  extradata_size);
-+        if (par->extradata_size)
-+            return ff_isom_write_avcc(dyn_cp, extradata,
-+                                      extradata_size);
-+        else
-+            *size_to_reserve = MAX_H264_HEADER_SIZE;
-+        break;
-     case AV_CODEC_ID_HEVC:
-         return ff_isom_write_hvcc(dyn_cp, extradata,
-                                   extradata_size, 0);
-@@ -2731,8 +2739,8 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
-         }
-         break;
- #endif
--    // FIXME: Remove the following once libaom starts propagating proper extradata during init()
--    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208
-+    // FIXME: Remove the following once libaom starts propagating extradata during init()
-+    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012
-     case AV_CODEC_ID_AV1:
-         if (side_data_size && mkv->track.bc && !par->extradata_size) {
-             // If the reserved space doesn't suffice, only write
-@@ -2744,6 +2752,16 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
-         } else if (!par->extradata_size)
-             return AVERROR_INVALIDDATA;
-         break;
-+    // H264 V4L2 has a similar issue
-+    case AV_CODEC_ID_H264:
-+        if (side_data_size && mkv->track.bc && !par->extradata_size) {
-+            ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size,
-+                                          par, mkv->track.bc, track, 0);
-+            if (ret < 0)
-+                return ret;
-+        } else if (!par->extradata_size)
-+            return AVERROR_INVALIDDATA;
-+        break;
-     default:
-         if (side_data_size)
-             av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index);
-
-From 969917342459c78f480f327ea682d8880357a2df Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 14 Sep 2022 15:55:15 +0000
-Subject: [PATCH 068/186] movenc: Allow H264 SPS/PPS headers in packet sidedata
-
----
- libavformat/movenc.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/libavformat/movenc.c b/libavformat/movenc.c
-index c4fcb5f8b1b3..891adbf7b26c 100644
---- a/libavformat/movenc.c
-+++ b/libavformat/movenc.c
-@@ -6343,6 +6343,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
-     if (trk->par->codec_id == AV_CODEC_ID_MP4ALS ||
-             trk->par->codec_id == AV_CODEC_ID_AAC ||
-             trk->par->codec_id == AV_CODEC_ID_AV1 ||
-+            trk->par->codec_id == AV_CODEC_ID_H264 ||
-             trk->par->codec_id == AV_CODEC_ID_FLAC) {
-         size_t side_size;
-         uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
-
-From a78c7c1a9afc53f0ef71d251cb06789763babb26 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 26 Sep 2022 12:45:05 +0100
-Subject: [PATCH 069/186] Allow ffmpeg to select codec internal hwfmts if
- no_cvt_hw
-
-This allows the selection of DRM_PRIME from v4l2m2m without forcing it
-in the decoder.
-
-Not utterly sure this is the right method for 5.1 but it does work
----
- fftools/ffmpeg.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
-index 04bea4ef4fe9..0de534618323 100644
---- a/fftools/ffmpeg.c
-+++ b/fftools/ffmpeg.c
-@@ -2766,12 +2766,15 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat
-             break;
+diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
+index 9b9a9146c7d7..1b68c16e4f19 100644
+--- a/libavdevice/alldevices.c
++++ b/libavdevice/alldevices.c
+@@ -56,6 +56,8 @@ extern const FFOutputFormat ff_sndio_muxer;
+ extern const FFInputFormat  ff_v4l2_demuxer;
+ extern const FFOutputFormat ff_v4l2_muxer;
+ extern const FFInputFormat  ff_vfwcap_demuxer;
++extern const FFOutputFormat ff_vout_drm_muxer;
++extern const FFOutputFormat ff_vout_egl_muxer;
+ extern const FFInputFormat  ff_xcbgrab_demuxer;
+ extern const FFOutputFormat ff_xv_muxer;
  
-         if (ist->hwaccel_id == HWACCEL_GENERIC ||
--            ist->hwaccel_id == HWACCEL_AUTO) {
-+            ist->hwaccel_id == HWACCEL_AUTO ||
-+            no_cvt_hw) {
-             for (i = 0;; i++) {
-                 config = avcodec_get_hw_config(s->codec, i);
-                 if (!config)
-                     break;
--                if (!(config->methods &
-+                if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL))
-+                    av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p);
-+                else if (!(config->methods &
-                       AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
-                     continue;
-                 if (config->pix_fmt == *p)
-
-From 72c4c2e860365d46301c688d8586b2f4f023ac8d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 1 Sep 2022 11:42:41 +0000
-Subject: [PATCH 070/186] vf_deinterlace_v4l2m2m: Add a v4l2m2m scaler
-
-The logic for running an isp based scaler is pretty much identical to
-that for the deinterlacer so add to the deinterlacer. This requires
-some rework of the setup code to avoid assumptions that are true for
-deinterlace but not scale but the reworked code requires few switches
-based on operation.
----
- libavfilter/allfilters.c             |    1 +
- libavfilter/vf_deinterlace_v4l2m2m.c | 1123 ++++++++++++++++++++------
- 2 files changed, 877 insertions(+), 247 deletions(-)
-
+diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c
+new file mode 100644
+index 000000000000..6d11e98d7521
+--- /dev/null
++++ b/libavdevice/drm_vout.c
+@@ -0,0 +1,684 @@
++/*
++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++// *** This module is a work in progress and its utility is strictly
++//     limited to testing.
++
++#include "libavutil/opt.h"
++#include "libavutil/frame.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavformat/mux.h"
++#include "avdevice.h"
++
++#include "pthread.h"
++#include <semaphore.h>
++#include <unistd.h>
++
++#include <xf86drm.h>
++#include <xf86drmMode.h>
++#include <drm_fourcc.h>
++
++#define TRACE_ALL 0
++
++#define DRM_MODULE "vc4"
++
++#define ERRSTR strerror(errno)
++
++struct drm_setup {
++   int conId;
++   uint32_t crtcId;
++   int crtcIdx;
++   uint32_t planeId;
++   unsigned int out_fourcc;
++   struct {
++       int x, y, width, height;
++   } compose;
++};
++
++typedef struct drm_aux_s {
++    unsigned int fb_handle;
++    uint32_t bo_handles[AV_DRM_MAX_PLANES];
++    AVFrame * frame;
++} drm_aux_t;
++
++// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS
++// we get initial flicker probably due to dodgy drm timing
++#define AUX_SIZE 3
++typedef struct drm_display_env_s
++{
++    AVClass *class;
++
++    int drm_fd;
++    uint32_t con_id;
++    struct drm_setup setup;
++    enum AVPixelFormat avfmt;
++
++    int show_all;
++    const char * drm_module;
++
++    unsigned int ano;
++    drm_aux_t aux[AUX_SIZE];
++
++    pthread_t q_thread;
++    sem_t q_sem_in;
++    sem_t q_sem_out;
++    int q_terminate;
++    AVFrame * q_next;
++
++} drm_display_env_t;
++
++
++static int drm_vout_write_trailer(AVFormatContext *s)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++
++    return 0;
++}
++
++static int drm_vout_write_header(AVFormatContext *s)
++{
++    const AVCodecParameters * const par = s->streams[0]->codecpar;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++    if (   s->nb_streams > 1
++        || par->codec_type != AVMEDIA_TYPE_VIDEO
++        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
++        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
++        return AVERROR(EINVAL);
++    }
++
++    return 0;
++}
++
++static int find_plane(struct AVFormatContext * const avctx,
++                      const int drmfd, const int crtcidx, const uint32_t format,
++                      uint32_t * const pplane_id)
++{
++   drmModePlaneResPtr planes;
++   drmModePlanePtr plane;
++   drmModeObjectPropertiesPtr props = NULL;
++   drmModePropertyPtr prop = NULL;
++   unsigned int i;
++   unsigned int j;
++   int ret = -1;
++
++   planes = drmModeGetPlaneResources(drmfd);
++   if (!planes)
++   {
++       av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR);
++       return -1;
++   }
++
++   for (i = 0; i < planes->count_planes; ++i) {
++      plane = drmModeGetPlane(drmfd, planes->planes[i]);
++      if (!planes)
++      {
++          av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR);
++          break;
++      }
++
++      if (!(plane->possible_crtcs & (1 << crtcidx))) {
++         drmModeFreePlane(plane);
++         continue;
++      }
++
++      for (j = 0; j < plane->count_formats; ++j) {
++         if (plane->formats[j] == format)
++            break;
++      }
++
++      if (j == plane->count_formats) {
++         drmModeFreePlane(plane);
++         continue;
++      }
++
++      *pplane_id = plane->plane_id;
++      drmModeFreePlane(plane);
++      break;
++   }
++
++   if (i == planes->count_planes) {
++       ret = -1;
++       goto fail;
++   }
++
++    props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE);
++    if (!props)
++        goto fail;
++    for (i = 0; i != props->count_props; ++i) {
++        if (prop)
++            drmModeFreeProperty(prop);
++        prop = drmModeGetProperty(drmfd, props->props[i]);
++        if (!prop)
++            goto fail;
++        if (strcmp("zpos", prop->name) == 0) {
++            if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0)
++                av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]);
++            else
++                av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n");
++            break;
++        }
++    }
++
++    ret = 0;
++fail:
++    if (props)
++        drmModeFreeObjectProperties(props);
++    if (prop)
++        drmModeFreeProperty(prop);
++    drmModeFreePlaneResources(planes);
++    return ret;
++}
++
++static void da_uninit(drm_display_env_t * const de, drm_aux_t * da)
++{
++    if (da->fb_handle != 0) {
++        drmModeRmFB(de->drm_fd, da->fb_handle);
++        da->fb_handle = 0;
++    }
++
++    for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) {
++        if (da->bo_handles[i]) {
++            struct drm_gem_close gem_close = {.handle = da->bo_handles[i]};
++            drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
++            da->bo_handles[i] = 0;
++        }
++    }
++    av_frame_free(&da->frame);
++}
++
++static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame)
++{
++    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
++    drm_aux_t * da = de->aux + de->ano;
++    const uint32_t format = desc->layers[0].format;
++    int ret = 0;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd);
++#endif
++
++    if (de->setup.out_fourcc != format) {
++        if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) {
++            av_frame_free(&frame);
++            av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format);
++            return -1;
++        }
++        de->setup.out_fourcc = format;
++    }
++
++    {
++        drmVBlank vbl = {
++            .request = {
++                .type = DRM_VBLANK_RELATIVE,
++                .sequence = 0
++            }
++        };
++
++        while (drmWaitVBlank(de->drm_fd, &vbl)) {
++            if (errno != EINTR) {
++//                av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR);
++                break;
++            }
++        }
++    }
++
++    da_uninit(de, da);
++
++    {
++        uint32_t pitches[4] = {0};
++        uint32_t offsets[4] = {0};
++        uint64_t modifiers[4] = {0};
++        uint32_t bo_handles[4] = {0};
++        int has_mods = 0;
++        int i, j, n;
++
++        da->frame = frame;
++
++        for (i = 0; i < desc->nb_objects; ++i) {
++            if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) {
++                av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR);
++                return -1;
++            }
++            if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR &&
++                desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID)
++                has_mods = 1;
++        }
++
++        n = 0;
++        for (i = 0; i < desc->nb_layers; ++i) {
++            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
++                const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
++                const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
++                pitches[n] = p->pitch;
++                offsets[n] = p->offset;
++                modifiers[n] = obj->format_modifier;
++                bo_handles[n] = da->bo_handles[p->object_index];
++                ++n;
++            }
++        }
++
++#if 1 && TRACE_ALL
++        av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
++               " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
++               av_frame_cropped_width(frame),
++               av_frame_cropped_height(frame),
++               desc->layers[0].format,
++               bo_handles[0],
++               bo_handles[1],
++               bo_handles[2],
++               bo_handles[3],
++               pitches[0],
++               pitches[1],
++               pitches[2],
++               pitches[3],
++               offsets[0],
++               offsets[1],
++               offsets[2],
++               offsets[3],
++               (long long)modifiers[0],
++               (long long)modifiers[1],
++               (long long)modifiers[2],
++               (long long)modifiers[3]
++               );
++#endif
++
++        if (drmModeAddFB2WithModifiers(de->drm_fd,
++                                       av_frame_cropped_width(frame),
++                                       av_frame_cropped_height(frame),
++                                       desc->layers[0].format, bo_handles,
++                                       pitches, offsets,
++                                       has_mods ? modifiers : NULL,
++                                       &da->fb_handle,
++                                       has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) {
++            av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR);
++            return -1;
++        }
++    }
++
++    ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId,
++                              da->fb_handle, 0,
++                de->setup.compose.x, de->setup.compose.y,
++                de->setup.compose.width,
++                de->setup.compose.height,
++                0, 0,
++                av_frame_cropped_width(frame) << 16,
++                av_frame_cropped_height(frame) << 16);
++
++    if (ret != 0) {
++        av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR);
++    }
++
++    de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1;
++
++    return ret;
++}
++
++static int do_sem_wait(sem_t * const sem, const int nowait)
++{
++    while (nowait ? sem_trywait(sem) : sem_wait(sem)) {
++        if (errno != EINTR)
++            return -errno;
++    }
++    return 0;
++}
++
++static void * display_thread(void * v)
++{
++    AVFormatContext * const s = v;
++    drm_display_env_t * const de = s->priv_data;
++    int i;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++#endif
++
++    sem_post(&de->q_sem_out);
++
++    for (;;) {
++        AVFrame * frame;
++
++        do_sem_wait(&de->q_sem_in, 0);
++
++        if (de->q_terminate)
++            break;
++
++        frame = de->q_next;
++        de->q_next = NULL;
++        sem_post(&de->q_sem_out);
++
++        do_display(s, de, frame);
++    }
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++#endif
++
++    for (i = 0; i != AUX_SIZE; ++i)
++        da_uninit(de, de->aux + i);
++
++    av_frame_free(&de->q_next);
++
++    return NULL;
++}
++
++static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
++{
++    const AVFrame * const src_frame = (AVFrame *)pkt->data;
++    AVFrame * frame;
++    drm_display_env_t * const de = s->priv_data;
++    int ret;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++
++    if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) {
++        av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts);
++        return 0;
++    }
++
++    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
++        frame = av_frame_alloc();
++        av_frame_ref(frame, src_frame);
++    }
++    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
++        frame = av_frame_alloc();
++        frame->format = AV_PIX_FMT_DRM_PRIME;
++        if (av_hwframe_map(frame, src_frame, 0) != 0)
++        {
++            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
++            av_frame_free(&frame);
++            return AVERROR(EINVAL);
++        }
++    }
++    else {
++        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
++        return AVERROR(EINVAL);
++    }
++
++    ret = do_sem_wait(&de->q_sem_out, !de->show_all);
++    if (ret) {
++        av_frame_free(&frame);
++    }
++    else {
++        de->q_next = frame;
++        sem_post(&de->q_sem_in);
++    }
++
++    return 0;
++}
++
++static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
++                          unsigned flags)
++{
++    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
++    return AVERROR_PATCHWELCOME;
++}
++
++static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type);
++#endif
++    switch(type) {
++    case AV_APP_TO_DEV_WINDOW_REPAINT:
++        return 0;
++    default:
++        break;
++    }
++    return AVERROR(ENOSYS);
++}
++
++static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId)
++{
++   int ret = -1;
++   int i;
++   drmModeRes *res = drmModeGetResources(drmfd);
++   drmModeConnector *c;
++
++   if(!res)
++   {
++      printf( "drmModeGetResources failed: %s\n", ERRSTR);
++      return -1;
++   }
++
++   if (res->count_crtcs <= 0)
++   {
++      printf( "drm: no crts\n");
++      goto fail_res;
++   }
++
++   if (!s->conId) {
++      fprintf(stderr,
++         "No connector ID specified.  Choosing default from list:\n");
++
++      for (i = 0; i < res->count_connectors; i++) {
++         drmModeConnector *con =
++            drmModeGetConnector(drmfd, res->connectors[i]);
++         drmModeEncoder *enc = NULL;
++         drmModeCrtc *crtc = NULL;
++
++         if (con->encoder_id) {
++            enc = drmModeGetEncoder(drmfd, con->encoder_id);
++            if (enc->crtc_id) {
++               crtc = drmModeGetCrtc(drmfd, enc->crtc_id);
++            }
++         }
++
++         if (!s->conId && crtc) {
++            s->conId = con->connector_id;
++            s->crtcId = crtc->crtc_id;
++         }
++
++         av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n",
++                con->connector_id,
++                crtc ? crtc->crtc_id : 0,
++                con->connector_type,
++                crtc ? crtc->width : 0,
++                crtc ? crtc->height : 0,
++                (s->conId == (int)con->connector_id ?
++            " (chosen)" : ""));
++
++          if (crtc)
++              drmModeFreeCrtc(crtc);
++          if (enc)
++              drmModeFreeEncoder(enc);
++          if (con)
++              drmModeFreeConnector(con);
++      }
++
++      if (!s->conId) {
++         av_log(avctx, AV_LOG_ERROR,
++            "No suitable enabled connector found.\n");
++         return -1;;
++      }
++   }
++
++   s->crtcIdx = -1;
++
++   for (i = 0; i < res->count_crtcs; ++i) {
++      if (s->crtcId == res->crtcs[i]) {
++         s->crtcIdx = i;
++         break;
++      }
++   }
++
++   if (s->crtcIdx == -1)
++   {
++       av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId);
++       goto fail_res;
++   }
++
++   if (res->count_connectors <= 0)
++   {
++       av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n");
++       goto fail_res;
++   }
++
++   c = drmModeGetConnector(drmfd, s->conId);
++   if (!c)
++   {
++       av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR);
++       goto fail_res;
++   }
++
++   if (!c->count_modes)
++   {
++       av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n");
++       goto fail_conn;
++   }
++
++   {
++      drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId);
++      s->compose.x = crtc->x;
++      s->compose.y = crtc->y;
++      s->compose.width = crtc->width;
++      s->compose.height = crtc->height;
++      drmModeFreeCrtc(crtc);
++   }
++
++   if (pConId)
++      *pConId = c->connector_id;
++   ret = 0;
++
++fail_conn:
++   drmModeFreeConnector(c);
++
++fail_res:
++   drmModeFreeResources(res);
++
++   return ret;
++}
++
++// deinit is called if init fails so no need to clean up explicity here
++static int drm_vout_init(struct AVFormatContext * s)
++{
++    drm_display_env_t * const de = s->priv_data;
++    int rv;
++
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    de->drm_fd = -1;
++    de->con_id = 0;
++    de->setup = (struct drm_setup){0};
++    de->q_terminate = 0;
++
++    if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0)
++    {
++        rv = AVERROR(errno);
++        av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv));
++        return rv;
++    }
++
++    if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0)
++    {
++        av_log(s, AV_LOG_ERROR, "failed to find valid mode\n");
++        rv = AVERROR(EINVAL);
++        goto fail_close;
++    }
++
++    sem_init(&de->q_sem_in, 0, 0);
++    sem_init(&de->q_sem_out, 0, 0);
++    if (pthread_create(&de->q_thread, NULL, display_thread, s)) {
++        rv = AVERROR(errno);
++        av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv));
++        goto fail_close;
++    }
++
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++
++    return 0;
++
++fail_close:
++    close(de->drm_fd);
++    de->drm_fd = -1;
++    av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__);
++
++    return rv;
++}
++
++static void drm_vout_deinit(struct AVFormatContext * s)
++{
++    drm_display_env_t * const de = s->priv_data;
++
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    de->q_terminate = 1;
++    sem_post(&de->q_sem_in);
++    pthread_join(de->q_thread, NULL);
++    sem_destroy(&de->q_sem_in);
++    sem_destroy(&de->q_sem_out);
++
++    for (unsigned int i = 0; i != AUX_SIZE; ++i)
++        da_uninit(de, de->aux + i);
++
++    av_frame_free(&de->q_next);
++
++    if (de->drm_fd >= 0) {
++        close(de->drm_fd);
++        de->drm_fd = -1;
++    }
++
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++}
++
++
++#define OFFSET(x) offsetof(drm_display_env_t, x)
++static const AVOption options[] = {
++    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++    { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
++    { NULL }
++};
++
++static const AVClass drm_vout_class = {
++    .class_name = "drm vid outdev",
++    .item_name  = av_default_item_name,
++    .option     = options,
++    .version    = LIBAVUTIL_VERSION_INT,
++    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
++};
++
++FFOutputFormat ff_vout_drm_muxer = {
++    .p = {
++        .name           = "vout_drm",
++        .long_name      = NULL_IF_CONFIG_SMALL("Drm video output device"),
++        .audio_codec    = AV_CODEC_ID_NONE,
++        .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
++        .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
++        .priv_class     = &drm_vout_class,
++    },
++    .priv_data_size = sizeof(drm_display_env_t),
++    .write_header   = drm_vout_write_header,
++    .write_packet   = drm_vout_write_packet,
++    .write_uncoded_frame = drm_vout_write_frame,
++    .write_trailer  = drm_vout_write_trailer,
++    .control_message = drm_vout_control_message,
++    .init           = drm_vout_init,
++    .deinit         = drm_vout_deinit,
++};
++
+diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c
+new file mode 100644
+index 000000000000..0c8c629852fb
+--- /dev/null
++++ b/libavdevice/egl_vout.c
+@@ -0,0 +1,784 @@
++/*
++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++// *** This module is a work in progress and its utility is strictly
++//     limited to testing.
++//     Amongst other issues it doesn't wait for the pic to be displayed before
++//     returning the buffer so flikering does occur.
++
++#include <epoxy/gl.h>
++#include <epoxy/egl.h>
++
++#include "libavutil/opt.h"
++#include "libavutil/avassert.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/imgutils.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavformat/mux.h"
++#include "avdevice.h"
++
++#include "pthread.h"
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <unistd.h>
++
++#include <X11/Xlib.h>
++#include <X11/Xutil.h>
++
++#include "libavutil/rpi_sand_fns.h"
++
++#define TRACE_ALL 0
++
++struct egl_setup {
++    int conId;
++
++    Display *dpy;
++    EGLDisplay egl_dpy;
++    EGLContext ctx;
++    EGLSurface surf;
++    Window win;
++
++    uint32_t crtcId;
++    int crtcIdx;
++    uint32_t planeId;
++    struct {
++        int x, y, width, height;
++    } compose;
++};
++
++typedef struct egl_aux_s {
++    int fd;
++    GLuint texture;
++
++} egl_aux_t;
++
++typedef struct egl_display_env_s {
++    AVClass *class;
++
++    struct egl_setup setup;
++    enum AVPixelFormat avfmt;
++
++    int show_all;
++    int window_width, window_height;
++    int window_x, window_y;
++    int fullscreen;
++
++    egl_aux_t aux[32];
++
++    pthread_t q_thread;
++    pthread_mutex_t q_lock;
++    sem_t display_start_sem;
++    sem_t q_sem;
++    int q_terminate;
++    AVFrame *q_this;
++    AVFrame *q_next;
++
++} egl_display_env_t;
++
++
++/**
++ * Remove window border/decorations.
++ */
++static void
++no_border(Display *dpy, Window w)
++{
++    static const unsigned MWM_HINTS_DECORATIONS = (1 << 1);
++    static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5;
++
++    typedef struct {
++        unsigned long       flags;
++        unsigned long       functions;
++        unsigned long       decorations;
++        long                inputMode;
++        unsigned long       status;
++    } PropMotifWmHints;
++
++    PropMotifWmHints motif_hints;
++    Atom prop, proptype;
++    unsigned long flags = 0;
++
++    /* setup the property */
++    motif_hints.flags = MWM_HINTS_DECORATIONS;
++    motif_hints.decorations = flags;
++
++    /* get the atom for the property */
++    prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True);
++    if (!prop) {
++        /* something went wrong! */
++        return;
++    }
++
++    /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */
++    proptype = prop;
++
++    XChangeProperty(dpy, w,                         /* display, window */
++                    prop, proptype,                 /* property, type */
++                    32,                             /* format: 32-bit datums */
++                    PropModeReplace,                /* mode */
++                    (unsigned char *)&motif_hints, /* data */
++                    PROP_MOTIF_WM_HINTS_ELEMENTS    /* nelements */
++                   );
++}
++
++
++/*
++ * Create an RGB, double-buffered window.
++ * Return the window and context handles.
++ */
++static int
++make_window(struct AVFormatContext *const s,
++            egl_display_env_t *const de,
++            Display *dpy, EGLDisplay egl_dpy, const char *name,
++            Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet)
++{
++    int scrnum = DefaultScreen(dpy);
++    XSetWindowAttributes attr;
++    unsigned long mask;
++    Window root = RootWindow(dpy, scrnum);
++    Window win;
++    EGLContext ctx;
++    const int fullscreen = de->fullscreen;
++    EGLConfig config;
++    int x = de->window_x;
++    int y = de->window_y;
++    int width = de->window_width ? de->window_width : 1280;
++    int height = de->window_height ? de->window_height : 720;
++
++
++    if (fullscreen) {
++        int scrnum = DefaultScreen(dpy);
++
++        x = 0; y = 0;
++        width = DisplayWidth(dpy, scrnum);
++        height = DisplayHeight(dpy, scrnum);
++    }
++
++    {
++        EGLint num_configs;
++        static const EGLint attribs[] = {
++            EGL_RED_SIZE, 1,
++            EGL_GREEN_SIZE, 1,
++            EGL_BLUE_SIZE, 1,
++            EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
++            EGL_NONE
++        };
++
++        if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) {
++            av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n");
++            return -1;
++        }
++    }
++
++    {
++        EGLint vid;
++        if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) {
++            av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n");
++            return -1;
++        }
++
++        {
++            XVisualInfo visTemplate = {
++                .visualid = vid,
++            };
++            int num_visuals;
++            XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask,
++                                                  &visTemplate, &num_visuals);
++
++            /* window attributes */
++            attr.background_pixel = 0;
++            attr.border_pixel = 0;
++            attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone);
++            attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask;
++            /* XXX this is a bad way to get a borderless window! */
++            mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask;
++
++            win = XCreateWindow(dpy, root, x, y, width, height,
++                                0, visinfo->depth, InputOutput,
++                                visinfo->visual, mask, &attr);
++            XFree(visinfo);
++        }
++    }
++
++    if (fullscreen)
++        no_border(dpy, win);
++
++    /* set hints and properties */
++    {
++        XSizeHints sizehints;
++        sizehints.x = x;
++        sizehints.y = y;
++        sizehints.width  = width;
++        sizehints.height = height;
++        sizehints.flags = USSize | USPosition;
++        XSetNormalHints(dpy, win, &sizehints);
++        XSetStandardProperties(dpy, win, name, name,
++                               None, (char **)NULL, 0, &sizehints);
++    }
++
++    eglBindAPI(EGL_OPENGL_ES_API);
++
++    {
++        static const EGLint ctx_attribs[] = {
++            EGL_CONTEXT_CLIENT_VERSION, 2,
++            EGL_NONE
++        };
++        ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs);
++        if (!ctx) {
++            av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
++            return -1;
++        }
++    }
++
++
++    XMapWindow(dpy, win);
++
++    {
++        EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL);
++        if (!surf) {
++            av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n");
++            return -1;
++        }
++
++        if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) {
++            av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
++            return -1;
++        }
++
++        *winRet = win;
++        *ctxRet = ctx;
++        *surfRet = surf;
++    }
++
++    return 0;
++}
++
++static GLint
++compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source)
++{
++    GLuint s = glCreateShader(target);
++
++    if (s == 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n");
++        return 0;
++    }
++
++    glShaderSource(s, 1, (const GLchar **)&source, NULL);
++    glCompileShader(s);
++
++    {
++        GLint ok;
++        glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
++
++        if (!ok) {
++            GLchar *info;
++            GLint size;
++
++            glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size);
++            info = malloc(size);
++
++            glGetShaderInfoLog(s, size, NULL, info);
++            av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source);
++
++            return 0;
++        }
++    }
++
++    return s;
++}
++
++static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs)
++{
++    GLuint prog = glCreateProgram();
++
++    if (prog == 0) {
++        av_log(s, AV_LOG_ERROR, "Failed to create program\n");
++        return 0;
++    }
++
++    glAttachShader(prog, vs);
++    glAttachShader(prog, fs);
++    glLinkProgram(prog);
++
++    {
++        GLint ok;
++        glGetProgramiv(prog, GL_LINK_STATUS, &ok);
++        if (!ok) {
++            /* Some drivers return a size of 1 for an empty log.  This is the size
++             * of a log that contains only a terminating NUL character.
++             */
++            GLint size;
++            GLchar *info = NULL;
++            glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size);
++            if (size > 1) {
++                info = malloc(size);
++                glGetProgramInfoLog(prog, size, NULL, info);
++            }
++
++            av_log(s, AV_LOG_ERROR, "Failed to link: %s\n",
++                   (info != NULL) ? info : "<empty log>");
++            return 0;
++        }
++    }
++
++    return prog;
++}
++
++static int
++gl_setup(struct AVFormatContext *const s)
++{
++    const char *vs =
++        "attribute vec4 pos;\n"
++        "varying vec2 texcoord;\n"
++        "\n"
++        "void main() {\n"
++        "  gl_Position = pos;\n"
++        "  texcoord.x = (pos.x + 1.0) / 2.0;\n"
++        "  texcoord.y = (-pos.y + 1.0) / 2.0;\n"
++        "}\n";
++    const char *fs =
++        "#extension GL_OES_EGL_image_external : enable\n"
++        "precision mediump float;\n"
++        "uniform samplerExternalOES s;\n"
++        "varying vec2 texcoord;\n"
++        "void main() {\n"
++        "  gl_FragColor = texture2D(s, texcoord);\n"
++        "}\n";
++
++    GLuint vs_s;
++    GLuint fs_s;
++    GLuint prog;
++
++    if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) ||
++        !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) ||
++        !(prog = link_program(s, vs_s, fs_s)))
++        return -1;
++
++    glUseProgram(prog);
++
++    {
++        static const float verts[] = {
++            -1, -1,
++            1, -1,
++            1,  1,
++            -1,  1,
++        };
++        glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts);
++    }
++
++    glEnableVertexAttribArray(0);
++    return 0;
++}
++
++static int egl_vout_write_trailer(AVFormatContext *s)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++
++    return 0;
++}
++
++static int egl_vout_write_header(AVFormatContext *s)
++{
++    const AVCodecParameters *const par = s->streams[0]->codecpar;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++    if (s->nb_streams > 1
++        || par->codec_type != AVMEDIA_TYPE_VIDEO
++        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
++        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
++        return AVERROR(EINVAL);
++    }
++
++    return 0;
++}
++
++
++static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame)
++{
++    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0];
++    egl_aux_t *da = NULL;
++    unsigned int i;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
++#endif
++
++    for (i = 0; i != 32; ++i) {
++        if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) {
++            da = de->aux + i;
++            break;
++        }
++    }
++
++    if (da == NULL) {
++        av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__);
++        return AVERROR(EINVAL);
++    }
++
++    if (da->texture == 0) {
++        EGLint attribs[50];
++        EGLint *a = attribs;
++        int i, j;
++        static const EGLint anames[] = {
++            EGL_DMA_BUF_PLANE0_FD_EXT,
++            EGL_DMA_BUF_PLANE0_OFFSET_EXT,
++            EGL_DMA_BUF_PLANE0_PITCH_EXT,
++            EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
++            EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
++            EGL_DMA_BUF_PLANE1_FD_EXT,
++            EGL_DMA_BUF_PLANE1_OFFSET_EXT,
++            EGL_DMA_BUF_PLANE1_PITCH_EXT,
++            EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT,
++            EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
++            EGL_DMA_BUF_PLANE2_FD_EXT,
++            EGL_DMA_BUF_PLANE2_OFFSET_EXT,
++            EGL_DMA_BUF_PLANE2_PITCH_EXT,
++            EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT,
++            EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT,
++        };
++        const EGLint *b = anames;
++
++        *a++ = EGL_WIDTH;
++        *a++ = av_frame_cropped_width(frame);
++        *a++ = EGL_HEIGHT;
++        *a++ = av_frame_cropped_height(frame);
++        *a++ = EGL_LINUX_DRM_FOURCC_EXT;
++        *a++ = desc->layers[0].format;
++
++        for (i = 0; i < desc->nb_layers; ++i) {
++            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
++                const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j;
++                const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index;
++                *a++ = *b++;
++                *a++ = obj->fd;
++                *a++ = *b++;
++                *a++ = p->offset;
++                *a++ = *b++;
++                *a++ = p->pitch;
++                if (obj->format_modifier == 0) {
++                    b += 2;
++                }
++                else {
++                    *a++ = *b++;
++                    *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF);
++                    *a++ = *b++;
++                    *a++ = (EGLint)(obj->format_modifier >> 32);
++                }
++            }
++        }
++
++        *a = EGL_NONE;
++
++#if TRACE_ALL
++        for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) {
++            av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]);
++        }
++#endif
++        {
++            const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy,
++                                                     EGL_NO_CONTEXT,
++                                                     EGL_LINUX_DMA_BUF_EXT,
++                                                     NULL, attribs);
++            if (!image) {
++                av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd);
++                return -1;
++            }
++
++            glGenTextures(1, &da->texture);
++            glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
++            glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
++            glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
++            glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
++
++            eglDestroyImageKHR(de->setup.egl_dpy, image);
++        }
++
++        da->fd = desc->objects[0].fd;
++    }
++
++    glClearColor(0.5, 0.5, 0.5, 0.5);
++    glClear(GL_COLOR_BUFFER_BIT);
++
++    glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
++    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
++    eglSwapBuffers(de->setup.egl_dpy, de->setup.surf);
++
++    glDeleteTextures(1, &da->texture);
++    da->texture = 0;
++    da->fd = -1;
++
++    return 0;
++}
++
++static void* display_thread(void *v)
++{
++    AVFormatContext *const s = v;
++    egl_display_env_t *const de = s->priv_data;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
++#endif
++    {
++        EGLint egl_major, egl_minor;
++
++        de->setup.dpy = XOpenDisplay(NULL);
++        if (!de->setup.dpy) {
++            av_log(s, AV_LOG_ERROR, "Couldn't open X display\n");
++            goto fail;
++        }
++
++        de->setup.egl_dpy = eglGetDisplay(de->setup.dpy);
++        if (!de->setup.egl_dpy) {
++            av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n");
++            goto fail;
++        }
++
++        if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) {
++            av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n");
++            goto fail;
++        }
++
++        av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor);
++
++        if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) {
++            av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n");
++            goto fail;
++        }
++    }
++
++    if (!de->window_width || !de->window_height) {
++        de->window_width = 1280;
++        de->window_height = 720;
++    }
++    if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout",
++                    &de->setup.win, &de->setup.ctx, &de->setup.surf)) {
++        av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__);
++        goto fail;
++    }
++
++    if (gl_setup(s)) {
++        av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__);
++        goto fail;
++    }
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__);
++#endif
++    sem_post(&de->display_start_sem);
++
++    for (;;) {
++        AVFrame *frame;
++
++        while (sem_wait(&de->q_sem) != 0) {
++            av_assert0(errno == EINTR);
++        }
++
++        if (de->q_terminate)
++            break;
++
++        pthread_mutex_lock(&de->q_lock);
++        frame = de->q_next;
++        de->q_next = NULL;
++        pthread_mutex_unlock(&de->q_lock);
++
++        do_display(s, de, frame);
++
++        av_frame_free(&de->q_this);
++        de->q_this = frame;
++    }
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, ">>> %s\n", __func__);
++#endif
++
++    return NULL;
++
++fail:
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__);
++#endif
++    de->q_terminate = 1;
++    sem_post(&de->display_start_sem);
++
++    return NULL;
++}
++
++static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
++{
++    const AVFrame *const src_frame = (AVFrame *)pkt->data;
++    AVFrame *frame;
++    egl_display_env_t *const de = s->priv_data;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++
++    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
++        frame = av_frame_alloc();
++        av_frame_ref(frame, src_frame);
++    }
++    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
++        frame = av_frame_alloc();
++        frame->format = AV_PIX_FMT_DRM_PRIME;
++        if (av_hwframe_map(frame, src_frame, 0) != 0) {
++            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
++            av_frame_free(&frame);
++            return AVERROR(EINVAL);
++        }
++    }
++    else {
++        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
++        return AVERROR(EINVAL);
++    }
++
++    // Really hacky sync
++    while (de->show_all && de->q_next) {
++        usleep(3000);
++    }
++
++    pthread_mutex_lock(&de->q_lock);
++    {
++        AVFrame *const t = de->q_next;
++        de->q_next = frame;
++        frame = t;
++    }
++    pthread_mutex_unlock(&de->q_lock);
++
++    if (frame == NULL)
++        sem_post(&de->q_sem);
++    else
++        av_frame_free(&frame);
++
++    return 0;
++}
++
++static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
++                                unsigned flags)
++{
++    av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
++    return AVERROR_PATCHWELCOME;
++}
++
++static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type);
++#endif
++    switch (type) {
++    case AV_APP_TO_DEV_WINDOW_REPAINT:
++        return 0;
++    default:
++        break;
++    }
++    return AVERROR(ENOSYS);
++}
++
++// deinit is called if init fails so no need to clean up explicity here
++static int egl_vout_init(struct AVFormatContext *s)
++{
++    egl_display_env_t *const de = s->priv_data;
++    unsigned int i;
++
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    de->setup = (struct egl_setup) { 0 };
++
++    for (i = 0; i != 32; ++i) {
++        de->aux[i].fd = -1;
++    }
++
++    de->q_terminate = 0;
++    pthread_mutex_init(&de->q_lock, NULL);
++    sem_init(&de->q_sem, 0, 0);
++    sem_init(&de->display_start_sem, 0, 0);
++    av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0);
++
++    sem_wait(&de->display_start_sem);
++    if (de->q_terminate) {
++        av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__);
++        return -1;
++    }
++
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++
++    return 0;
++}
++
++static void egl_vout_deinit(struct AVFormatContext *s)
++{
++    egl_display_env_t *const de = s->priv_data;
++
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    de->q_terminate = 1;
++    sem_post(&de->q_sem);
++    pthread_join(de->q_thread, NULL);
++    sem_destroy(&de->q_sem);
++    pthread_mutex_destroy(&de->q_lock);
++
++    av_frame_free(&de->q_next);
++    av_frame_free(&de->q_this);
++
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++}
++
++#define OFFSET(x) offsetof(egl_display_env_t, x)
++static const AVOption options[] = {
++    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++    { "window_size",  "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
++    { "window_x",     "set window x offset",    OFFSET(window_x),     AV_OPT_TYPE_INT,    { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++    { "window_y",     "set window y offset",    OFFSET(window_y),     AV_OPT_TYPE_INT,    { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++    { "fullscreen",   "set fullscreen display", OFFSET(fullscreen),   AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++    { NULL }
++
++};
++
++static const AVClass egl_vout_class = {
++    .class_name = "egl vid outdev",
++    .item_name  = av_default_item_name,
++    .option     = options,
++    .version    = LIBAVUTIL_VERSION_INT,
++    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
++};
++
++FFOutputFormat ff_vout_egl_muxer = {
++    .p = {
++        .name           = "vout_egl",
++        .long_name      = NULL_IF_CONFIG_SMALL("Egl video output device"),
++        .audio_codec    = AV_CODEC_ID_NONE,
++        .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
++        .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
++        .priv_class     = &egl_vout_class,
++    },
++    .priv_data_size = sizeof(egl_display_env_t),
++    .write_header   = egl_vout_write_header,
++    .write_packet   = egl_vout_write_packet,
++    .write_uncoded_frame = egl_vout_write_frame,
++    .write_trailer  = egl_vout_write_trailer,
++    .control_message = egl_vout_control_message,
++    .init           = egl_vout_init,
++    .deinit         = egl_vout_deinit,
++};
++
+diff --git a/libavfilter/Makefile b/libavfilter/Makefile
+index 91487afb2185..6dd422a9358c 100644
+--- a/libavfilter/Makefile
++++ b/libavfilter/Makefile
+@@ -272,6 +272,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER)                += vf_neighbor.o
+ OBJS-$(CONFIG_DEFLICKER_FILTER)              += vf_deflicker.o
+ OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER)        += vf_vpp_qsv.o
+ OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER)      += vf_deinterlace_vaapi.o vaapi_vpp.o
++OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER)    += vf_deinterlace_v4l2m2m.o
+ OBJS-$(CONFIG_DEJUDDER_FILTER)               += vf_dejudder.o
+ OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
+ OBJS-$(CONFIG_DENOISE_VAAPI_FILTER)          += vf_misc_vaapi.o vaapi_vpp.o
+@@ -536,6 +537,7 @@ OBJS-$(CONFIG_TRANSPOSE_VT_FILTER)           += vf_transpose_vt.o
+ OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER)       += vf_transpose_vulkan.o vulkan.o vulkan_filter.o
+ OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
+ OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
++OBJS-$(CONFIG_UNSAND_FILTER)                 += vf_unsand.o
+ OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
+ OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
+                                                 opencl/unsharp.o
 diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
-index 357ff61ca803..d504fa1bc8de 100644
+index 9819f0f95b7f..a4ef3091157d 100644
 --- a/libavfilter/allfilters.c
 +++ b/libavfilter/allfilters.c
-@@ -421,6 +421,7 @@ extern const AVFilter ff_vf_scale;
+@@ -256,6 +256,7 @@ extern const AVFilter ff_vf_derain;
+ extern const AVFilter ff_vf_deshake;
+ extern const AVFilter ff_vf_deshake_opencl;
+ extern const AVFilter ff_vf_despill;
++extern const AVFilter ff_vf_deinterlace_v4l2m2m;
+ extern const AVFilter ff_vf_detelecine;
+ extern const AVFilter ff_vf_dilation;
+ extern const AVFilter ff_vf_dilation_opencl;
+@@ -434,6 +435,7 @@ extern const AVFilter ff_vf_scale;
  extern const AVFilter ff_vf_scale_cuda;
  extern const AVFilter ff_vf_scale_npp;
  extern const AVFilter ff_vf_scale_qsv;
 +extern const AVFilter ff_vf_scale_v4l2m2m;
  extern const AVFilter ff_vf_scale_vaapi;
+ extern const AVFilter ff_vf_scale_vt;
  extern const AVFilter ff_vf_scale_vulkan;
- extern const AVFilter ff_vf_scale2ref;
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-index 1a3bef5bcba6..2df39ec0f19f 100644
---- a/libavfilter/vf_deinterlace_v4l2m2m.c
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -52,31 +52,36 @@
- #include "avfilter.h"
- #include "formats.h"
- #include "internal.h"
-+#include "scale_eval.h"
- #include "video.h"
+@@ -507,6 +509,7 @@ extern const AVFilter ff_vf_trim;
+ extern const AVFilter ff_vf_unpremultiply;
+ extern const AVFilter ff_vf_unsharp;
+ extern const AVFilter ff_vf_unsharp_opencl;
++extern const AVFilter ff_vf_unsand;
+ extern const AVFilter ff_vf_untile;
+ extern const AVFilter ff_vf_uspp;
+ extern const AVFilter ff_vf_v360;
+diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c
+index 5811720c61ff..13e1f3446585 100644
+--- a/libavfilter/buffersink.c
++++ b/libavfilter/buffersink.c
+@@ -60,6 +60,11 @@ typedef struct BufferSinkContext {
+     int sample_rates_size;
  
+     AVFrame *peeked_frame;
++
++    union {
++        av_buffersink_alloc_video_frame * video;
++    } alloc_cb;
++    void * alloc_v;
+ } BufferSinkContext;
+ 
+ #define NB_ITEMS(list) (list ## _size / sizeof(*list))
+@@ -129,6 +134,22 @@ int attribute_align_arg av_buffersink_get_samples(AVFilterContext *ctx,
+     return get_frame_internal(ctx, frame, 0, nb_samples);
+ }
+ 
++static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h)
++{
++    AVFilterContext * const ctx = link->dst;
++    BufferSinkContext * const bs = ctx->priv;
++    return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) :
++        ff_default_get_video_buffer(link, w, h);
++}
++
++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v)
++{
++    BufferSinkContext * const bs = ctx->priv;
++    bs->alloc_cb.video = cb;
++    bs->alloc_v = v;
++    return 0;
++}
++
+ static av_cold int common_init(AVFilterContext *ctx)
+ {
+     BufferSinkContext *buf = ctx->priv;
+@@ -355,6 +376,14 @@ static const AVOption abuffersink_options[] = {
+ AVFILTER_DEFINE_CLASS(buffersink);
+ AVFILTER_DEFINE_CLASS(abuffersink);
+ 
++static const AVFilterPad avfilter_vsink_buffer_inputs[] = {
++    {
++        .name = "default",
++        .type = AVMEDIA_TYPE_VIDEO,
++        .get_buffer = {.video = alloc_video_buffer},
++    },
++};
++
+ const AVFilter ff_vsink_buffer = {
+     .name          = "buffersink",
+     .description   = NULL_IF_CONFIG_SMALL("Buffer video frames, and make them available to the end of the filter graph."),
+@@ -363,7 +392,7 @@ const AVFilter ff_vsink_buffer = {
+     .init          = common_init,
+     .uninit        = uninit,
+     .activate      = activate,
+-    FILTER_INPUTS(ff_video_default_filterpad),
++    FILTER_INPUTS(avfilter_vsink_buffer_inputs),
+     .outputs       = NULL,
+     FILTER_QUERY_FUNC(vsink_query_formats),
+ };
+diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h
+index 361d60367933..47a296cf5edb 100644
+--- a/libavfilter/buffersink.h
++++ b/libavfilter/buffersink.h
+@@ -166,6 +166,9 @@ int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame);
+  */
+ int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples);
+ 
++typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h);
++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v);
++
+ /**
+  * @}
+  */
+diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
+index b5682006f05c..e42ff26d5182 100644
+--- a/libavfilter/buffersrc.c
++++ b/libavfilter/buffersrc.c
+@@ -210,7 +210,7 @@ int attribute_align_arg av_buffersrc_add_frame_flags(AVFilterContext *ctx, AVFra
+ 
+         switch (ctx->outputs[0]->type) {
+         case AVMEDIA_TYPE_VIDEO:
+-            CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
++            CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame),
+                                      frame->format, frame->colorspace,
+                                      frame->color_range, frame->pts);
+             break;
+diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
+index d41a25f8ea99..233b17a21bde 100644
+--- a/libavfilter/vf_bwdif.c
++++ b/libavfilter/vf_bwdif.c
+@@ -115,19 +115,28 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
+     YADIFContext *yadif = &bwdif->yadif;
+     ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff };
+     int i;
++    int last_plane = -1;
+ 
+     for (i = 0; i < yadif->csp->nb_components; i++) {
+         int w = dstpic->width;
+         int h = dstpic->height;
++        const AVComponentDescriptor * const comp = yadif->csp->comp + i;
++
++        // If the last plane was the same as this plane assume we've dealt
++        // with all the pels already
++        if (last_plane == comp->plane)
++            continue;
++        last_plane = comp->plane;
+ 
+         if (i == 1 || i == 2) {
+             w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
+             h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
+         }
+ 
+-        td.w     = w;
+-        td.h     = h;
+-        td.plane = i;
++        // comp step is in bytes but td.w is in pels
++        td.w       = w * comp->step / ((comp->depth + 7) / 8);
++        td.h       = h;
++        td.plane   = comp->plane;
+ 
+         ff_filter_execute(ctx, filter_slice, &td, NULL,
+                           FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx)));
+@@ -151,6 +160,7 @@ static const enum AVPixelFormat pix_fmts[] = {
+     AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
+     AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
+     AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
++    AV_PIX_FMT_NV12,
+     AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
+     AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+     AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16,
+diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
+new file mode 100644
+index 000000000000..a5f3a776f824
+--- /dev/null
++++ b/libavfilter/vf_deinterlace_v4l2m2m.c
+@@ -0,0 +1,2120 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * deinterlace video filter - V4L2 M2M
++ */
++
++#include <drm_fourcc.h>
++
++#include <linux/videodev2.h>
++
++#include <dirent.h>
++#include <fcntl.h>
++#include <poll.h>
++#include <stdatomic.h>
++#include <stdio.h>
++#include <string.h>
++#include <sys/ioctl.h>
++#include <sys/mman.h>
++#include <unistd.h>
++
++#include "config.h"
++
++#include "libavutil/avassert.h"
++#include "libavutil/avstring.h"
++#include "libavutil/common.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavutil/internal.h"
++#include "libavutil/mathematics.h"
++#include "libavutil/mem.h"
++#include "libavutil/opt.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/time.h"
++
++#define FF_INTERNAL_FIELDS 1
++#include "framequeue.h"
++#include "filters.h"
++#include "avfilter.h"
++#include "formats.h"
++#include "scale_eval.h"
++#include "video.h"
++
 +#ifndef DRM_FORMAT_P030
 +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
 +#endif
 +
- typedef struct V4L2Queue V4L2Queue;
- typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
- 
--typedef struct V4L2PlaneInfo {
--    int bytesperline;
--    size_t length;
--} V4L2PlaneInfo;
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in drm_fourcc.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
++typedef struct V4L2Queue V4L2Queue;
++typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
++
 +typedef enum filter_type_v4l2_e
 +{
 +    FILTER_V4L2_DEINTERLACE = 1,
 +    FILTER_V4L2_SCALE,
 +} filter_type_v4l2_t;
- 
- typedef struct V4L2Buffer {
-     int enqueued;
-     int reenqueue;
--    int fd;
-     struct v4l2_buffer buffer;
-     AVFrame frame;
-     struct v4l2_plane planes[VIDEO_MAX_PLANES];
-     int num_planes;
--    V4L2PlaneInfo plane_info[VIDEO_MAX_PLANES];
-     AVDRMFrameDescriptor drm_frame;
-     V4L2Queue *q;
- } V4L2Buffer;
- 
- typedef struct V4L2Queue {
-     struct v4l2_format format;
++
++typedef struct V4L2Buffer {
++    int enqueued;
++    int reenqueue;
++    struct v4l2_buffer buffer;
++    AVFrame frame;
++    struct v4l2_plane planes[VIDEO_MAX_PLANES];
++    int num_planes;
++    AVDRMFrameDescriptor drm_frame;
++    V4L2Queue *q;
++} V4L2Buffer;
++
++typedef struct V4L2Queue {
++    struct v4l2_format format;
 +    struct v4l2_selection sel;
-     int num_buffers;
-     V4L2Buffer *buffers;
-     DeintV4L2M2MContextShared *ctx;
-@@ -111,11 +116,18 @@ typedef struct pts_track_s
- 
- typedef struct DeintV4L2M2MContextShared {
-     void * logctx;  // For logging - will be NULL when done
++    int eos;
++    int num_buffers;
++    V4L2Buffer *buffers;
++    const char * name;
++    DeintV4L2M2MContextShared *ctx;
++} V4L2Queue;
++
++typedef struct pts_stats_s
++{
++    void * logctx;
++    const char * name;  // For debug
++    unsigned int last_count;
++    unsigned int last_interval;
++    int64_t last_pts;
++} pts_stats_t;
++
++#define PTS_TRACK_SIZE 32
++typedef struct pts_track_el_s
++{
++    uint32_t n;
++    unsigned int interval;
++    AVFrame * props;
++} pts_track_el_t;
++
++typedef struct pts_track_s
++{
++    uint32_t n;
++    uint32_t last_n;
++    int got_2;
++    void * logctx;
++    pts_stats_t stats;
++    pts_track_el_t a[PTS_TRACK_SIZE];
++} pts_track_t;
++
++typedef enum drain_state_e
++{
++    DRAIN_NONE = 0,     // Not draining
++    DRAIN_TIMEOUT,      // Drain until normal timeout setup yields no frame
++    DRAIN_LAST,         // Drain with long timeout last_frame in received on output expected
++    DRAIN_EOS,          // Drain with long timeout EOS expected
++    DRAIN_DONE          // Drained
++} drain_state_t;
++
++typedef struct DeintV4L2M2MContextShared {
++    void * logctx;  // For logging - will be NULL when done
 +    filter_type_v4l2_t filter_type;
- 
-     int fd;
-     int done;
-     int width;
-     int height;
++
++    int fd;
++    int done;   // fd closed - awating all refs dropped
++    int width;
++    int height;
++
++    int drain;          // EOS received (inlink status)
++    drain_state_t drain_state;
++    int64_t drain_pts;  // PTS associated with inline status
++
++    unsigned int frames_rx;
++    unsigned int frames_tx;
 +
 +    // from options
 +    int output_width;
 +    int output_height;
 +    enum AVPixelFormat output_format;
 +
-     int orig_width;
-     int orig_height;
-     atomic_uint refcount;
-@@ -134,8 +146,60 @@ typedef struct DeintV4L2M2MContext {
-     const AVClass *class;
- 
-     DeintV4L2M2MContextShared *shared;
++    int has_enc_stop;
++    // We expect to get exactly the same number of frames out as we put in
++    // We can drain by matching input to output
++    int one_to_one;
++
++    int orig_width;
++    int orig_height;
++    atomic_uint refcount;
++
++    AVBufferRef *hw_frames_ctx;
++
++    unsigned int field_order;
++
++    pts_track_t track;
++
++    V4L2Queue output;
++    V4L2Queue capture;
++} DeintV4L2M2MContextShared;
++
++typedef struct DeintV4L2M2MContext {
++    const AVClass *class;
++
++    DeintV4L2M2MContextShared *shared;
 +
 +    char * w_expr;
 +    char * h_expr;
@@ -26438,8 +15127,47 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    enum AVColorTransferCharacteristic colour_transfer;
 +    enum AVColorSpace colour_matrix;
 +    enum AVChromaLocation chroma_location;
- } DeintV4L2M2MContext;
- 
++} DeintV4L2M2MContext;
++
++
++static inline void frame_set_progressive(AVFrame* frame)
++{
++#if FF_API_INTERLACED_FRAME
++FF_DISABLE_DEPRECATION_WARNINGS
++    frame->interlaced_frame = 0;
++    frame->top_field_first =  0;
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++    frame->flags &= ~(AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED);
++}
++
++static inline int frame_is_interlaced(const AVFrame* const frame)
++{
++#if FF_API_INTERLACED_FRAME
++FF_DISABLE_DEPRECATION_WARNINGS
++    return frame->interlaced_frame || (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0;
++FF_ENABLE_DEPRECATION_WARNINGS
++#else
++    return (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0;
++#endif
++}
++
++static inline int frame_is_tff(const AVFrame* const frame)
++{
++#if FF_API_INTERLACED_FRAME
++FF_DISABLE_DEPRECATION_WARNINGS
++    return frame->top_field_first || (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0;
++FF_ENABLE_DEPRECATION_WARNINGS
++#else
++    return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0;
++#endif
++}
++
++static inline int drain_frame_expected(const drain_state_t d)
++{
++    return d == DRAIN_EOS || d == DRAIN_LAST;
++}
++
 +// These just list the ones we know we can cope with
 +static uint32_t
 +fmt_av_to_v4l2(const enum AVPixelFormat avfmt)
@@ -26449,9 +15177,11 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +        return V4L2_PIX_FMT_YUV420;
 +    case AV_PIX_FMT_NV12:
 +        return V4L2_PIX_FMT_NV12;
++#if CONFIG_SAND
 +    case AV_PIX_FMT_RPI4_8:
 +    case AV_PIX_FMT_SAND128:
 +        return V4L2_PIX_FMT_NV12_COL128;
++#endif
 +    default:
 +        break;
 +    }
@@ -26466,21 +15196,188 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +        return AV_PIX_FMT_YUV420P;
 +    case V4L2_PIX_FMT_NV12:
 +        return AV_PIX_FMT_NV12;
++#if CONFIG_SAND
 +    case V4L2_PIX_FMT_NV12_COL128:
 +        return AV_PIX_FMT_RPI4_8;
++#endif
 +    default:
 +        break;
 +    }
 +    return AV_PIX_FMT_NONE;
 +}
 +
- static unsigned int pts_stats_interval(const pts_stats_t * const stats)
- {
-     return stats->last_interval;
-@@ -301,6 +365,39 @@ static int pts_track_init(pts_track_t * const trk, void *logctx)
-     return 0;
- }
- 
++static unsigned int pts_stats_interval(const pts_stats_t * const stats)
++{
++    return stats->last_interval;
++}
++
++// Pick 64 for max last count - that is >1sec at 60fps
++#define STATS_LAST_COUNT_MAX 64
++#define STATS_INTERVAL_MAX (1 << 30)
++static void pts_stats_add(pts_stats_t * const stats, int64_t pts)
++{
++    if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) {
++        if (stats->last_count < STATS_LAST_COUNT_MAX)
++            ++stats->last_count;
++        return;
++    }
++
++    if (stats->last_pts != AV_NOPTS_VALUE) {
++        const int64_t interval = pts - stats->last_pts;
++
++        if (interval < 0 || interval >= STATS_INTERVAL_MAX ||
++            stats->last_count >= STATS_LAST_COUNT_MAX) {
++            if (stats->last_interval != 0)
++                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n",
++                       __func__, stats->name, interval, stats->last_count);
++            stats->last_interval = 0;
++        }
++        else {
++            const int64_t frame_time = interval / (int64_t)stats->last_count;
++
++            if (frame_time != stats->last_interval)
++                av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n",
++                       __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time);
++            stats->last_interval = frame_time;
++        }
++    }
++
++    stats->last_pts = pts;
++    stats->last_count = 1;
++}
++
++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name)
++{
++    *stats = (pts_stats_t){
++        .logctx = logctx,
++        .name = name,
++        .last_count = 1,
++        .last_interval = 0,
++        .last_pts = AV_NOPTS_VALUE
++    };
++}
++
++static inline uint32_t pts_track_next_n(pts_track_t * const trk)
++{
++    if (++trk->n == 0)
++        trk->n = 1;
++    return trk->n;
++}
++
++static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst)
++{
++    uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000);
++    pts_track_el_t * t;
++
++    // As a first guess assume that n==0 means last frame
++    if (n == 0) {
++        n = trk->last_n;
++        if (n == 0)
++            goto fail;
++    }
++
++    t = trk->a + (n & (PTS_TRACK_SIZE - 1));
++
++    if (t->n != n) {
++        av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n);
++        goto fail;
++    }
++
++    // 1st frame is simple - just believe it
++    if (n != trk->last_n) {
++        trk->last_n = n;
++        trk->got_2 = 0;
++        return av_frame_copy_props(dst, t->props);
++    }
++
++    // Only believe in a single interpolated frame
++    if (trk->got_2)
++        goto fail;
++    trk->got_2 = 1;
++
++    av_frame_copy_props(dst, t->props);
++
++
++    // If we can't guess - don't
++    if (t->interval == 0) {
++        dst->best_effort_timestamp = AV_NOPTS_VALUE;
++        dst->pts = AV_NOPTS_VALUE;
++        dst->pkt_dts = AV_NOPTS_VALUE;
++    }
++    else {
++        if (dst->best_effort_timestamp != AV_NOPTS_VALUE)
++            dst->best_effort_timestamp += t->interval / 2;
++        if (dst->pts != AV_NOPTS_VALUE)
++            dst->pts += t->interval / 2;
++        if (dst->pkt_dts != AV_NOPTS_VALUE)
++            dst->pkt_dts += t->interval / 2;
++    }
++
++    return 0;
++
++fail:
++    trk->last_n = 0;
++    trk->got_2 = 0;
++    dst->pts = AV_NOPTS_VALUE;
++    dst->pkt_dts = AV_NOPTS_VALUE;
++    return 0;
++}
++
++// We are only ever expecting in-order frames so nothing more clever is required
++static unsigned int
++pts_track_count(const pts_track_t * const trk)
++{
++    return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1);
++}
++
++static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src)
++{
++    const uint32_t n = pts_track_next_n(trk);
++    pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1));
++
++    pts_stats_add(&trk->stats, src->pts);
++
++    t->n = n;
++    t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last
++    av_frame_unref(t->props);
++    av_frame_copy_props(t->props, src);
++
++    // We now know what the previous interval was, rather than having to guess,
++    // so set it.  There is a better than decent chance that this is before
++    // we use it.
++    if (t->interval != 0) {
++        pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1));
++        prev_t->interval = t->interval;
++    }
++
++    // In case deinterlace interpolates frames use every other usec
++    return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2};
++}
++
++static void pts_track_uninit(pts_track_t * const trk)
++{
++    unsigned int i;
++    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
++        trk->a[i].n = 0;
++        av_frame_free(&trk->a[i].props);
++    }
++}
++
++static int pts_track_init(pts_track_t * const trk, void *logctx)
++{
++    unsigned int i;
++    trk->n = 1;
++    pts_stats_init(&trk->stats, logctx, "track");
++    for (i = 0; i != PTS_TRACK_SIZE; ++i) {
++        trk->a[i].n = 0;
++        if ((trk->a[i].props = av_frame_alloc()) == NULL) {
++            pts_track_uninit(trk);
++            return AVERROR(ENOMEM);
++        }
++    }
++    return 0;
++}
++
 +static inline uint32_t
 +fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n)
 +{
@@ -26505,6 +15402,12 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
 +}
 +
++static inline uint32_t
++buf_bytesused0(const struct v4l2_buffer * const buf)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused;
++}
++
 +static void
 +init_format(V4L2Queue * const q, const uint32_t format_type)
 +{
@@ -26514,36 +15417,29 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    q->sel.type    = format_type;
 +}
 +
- static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
- {
-     struct v4l2_capability cap;
-@@ -311,80 +408,99 @@ static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
-     if (ret < 0)
-         return ret;
- 
--    if (!(cap.capabilities & V4L2_CAP_STREAMING))
++static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx)
++{
++    struct v4l2_capability cap;
++    int ret;
++
++    memset(&cap, 0, sizeof(cap));
++    ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap);
++    if (ret < 0)
++        return ret;
++
 +    if (ctx->filter_type == FILTER_V4L2_SCALE &&
 +        strcmp("bcm2835-codec-isp", cap.card) != 0)
 +    {
 +        av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n");
-         return AVERROR(EINVAL);
++        return AVERROR(EINVAL);
 +    }
- 
--    if (cap.capabilities & V4L2_CAP_VIDEO_M2M) {
--        ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
--        ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
--
--        return 0;
++
 +    if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
 +        av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n");
 +        return AVERROR(EINVAL);
-     }
- 
-     if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) {
--        ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
--        ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
--
--        return 0;
++    }
++
++    if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) {
 +        init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
 +        init_format(&ctx->output,  V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE);
 +    }
@@ -26554,85 +15450,57 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    else {
 +        av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n");
 +        return AVERROR(EINVAL);
-     }
- 
--    return AVERROR(EINVAL);
++    }
++
 +    return 0;
- }
- 
--static int deint_v4l2m2m_try_format(V4L2Queue *queue)
++}
++
 +// Just use for probe - doesn't modify q format
 +static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt)
- {
--    struct v4l2_format *fmt        = &queue->format;
++{
 +    struct v4l2_format fmt         = {.type = queue->format.type};
-     DeintV4L2M2MContextShared *ctx = queue->ctx;
-     int ret, field;
++    DeintV4L2M2MContextShared *ctx = queue->ctx;
++    int ret, field;
 +    // Pick YUV to test with if not otherwise specified
 +    uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt);
 +    enum AVPixelFormat r_avfmt;
 +
- 
--    ret = ioctl(ctx->fd, VIDIOC_G_FMT, fmt);
++
 +    ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt);
-     if (ret)
-         av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret);
- 
--    if (V4L2_TYPE_IS_OUTPUT(fmt->type))
++    if (ret)
++        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret);
++
 +    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type))
-         field = V4L2_FIELD_INTERLACED_TB;
-     else
-         field = V4L2_FIELD_NONE;
- 
--    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
--        fmt->fmt.pix_mp.pixelformat = V4L2_PIX_FMT_YUV420;
--        fmt->fmt.pix_mp.field = field;
--        fmt->fmt.pix_mp.width = ctx->width;
--        fmt->fmt.pix_mp.height = ctx->height;
++        field = V4L2_FIELD_INTERLACED_TB;
++    else
++        field = V4L2_FIELD_NONE;
++
 +    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
 +        fmt.fmt.pix_mp.pixelformat = pixelformat;
 +        fmt.fmt.pix_mp.field = field;
 +        fmt.fmt.pix_mp.width = width;
 +        fmt.fmt.pix_mp.height = height;
-     } else {
--        fmt->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420;
--        fmt->fmt.pix.field = field;
--        fmt->fmt.pix.width = ctx->width;
--        fmt->fmt.pix.height = ctx->height;
++    } else {
 +        fmt.fmt.pix.pixelformat = pixelformat;
 +        fmt.fmt.pix.field = field;
 +        fmt.fmt.pix.width = width;
 +        fmt.fmt.pix.height = height;
-     }
- 
--    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
--		 fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height,
--		 fmt->fmt.pix_mp.pixelformat,
--		 fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline);
++    }
++
 +    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__,
 +         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
 +         fmt.fmt.pix_mp.pixelformat,
 +         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
- 
--    ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, fmt);
++
 +    ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt);
-     if (ret)
-         return AVERROR(EINVAL);
- 
--    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
--		 fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height,
--		 fmt->fmt.pix_mp.pixelformat,
--		 fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline);
++    if (ret)
++        return AVERROR(EINVAL);
++
 +    av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__,
 +         fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height,
 +         fmt.fmt.pix_mp.pixelformat,
 +         fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline);
- 
--    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
--        if ((fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 &&
--             fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_NV12) ||
--            fmt->fmt.pix_mp.field != field) {
--            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type);
++
 +    r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt));
 +    if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) {
 +        av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src");
@@ -26646,95 +15514,51 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
 +        if (fmt.fmt.pix_mp.field != field) {
 +            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
- 
-             return AVERROR(EINVAL);
-         }
-     } else {
--        if ((fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 &&
--             fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_NV12) ||
--            fmt->fmt.pix.field != field) {
--            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type);
++
++            return AVERROR(EINVAL);
++        }
++    } else {
 +        if (fmt.fmt.pix.field != field) {
 +            av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type);
- 
-             return AVERROR(EINVAL);
-         }
-@@ -393,68 +509,410 @@ static int deint_v4l2m2m_try_format(V4L2Queue *queue)
-     return 0;
- }
- 
--static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height, int pitch, int ysize)
++
++            return AVERROR(EINVAL);
++        }
++    }
++
++    return 0;
++}
++
 +static int
 +do_s_fmt(V4L2Queue * const q)
- {
--    struct v4l2_format *fmt        = &queue->format;
--    DeintV4L2M2MContextShared *ctx = queue->ctx;
++{
 +    DeintV4L2M2MContextShared * const ctx = q->ctx;
 +    const uint32_t pixelformat = fmt_pixelformat(&q->format);
-     int ret;
- 
--    struct v4l2_selection sel = {
--        .type = fmt->type,
--        .target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP_BOUNDS : V4L2_SEL_TGT_COMPOSE_BOUNDS,
--    };
--
--    // This works for most single object 4:2:0 types
--    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
--        fmt->fmt.pix_mp.pixelformat = pixelformat;
--        fmt->fmt.pix_mp.field = field;
--        fmt->fmt.pix_mp.width = width;
--        fmt->fmt.pix_mp.height = ysize / pitch;
--        fmt->fmt.pix_mp.plane_fmt[0].bytesperline = pitch;
--        fmt->fmt.pix_mp.plane_fmt[0].sizeimage = ysize + (ysize >> 1);
--    } else {
--        fmt->fmt.pix.pixelformat = pixelformat;
--        fmt->fmt.pix.field = field;
--        fmt->fmt.pix.width = width;
--        fmt->fmt.pix.height = height;
--        fmt->fmt.pix.sizeimage = 0;
--        fmt->fmt.pix.bytesperline = 0;
--    }
--
--    ret = ioctl(ctx->fd, VIDIOC_S_FMT, fmt);
++    int ret;
++
 +    ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format);
-     if (ret) {
-         ret = AVERROR(errno);
--        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %d\n", ret);
++    if (ret) {
++        ret = AVERROR(errno);
 +        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret));
-         return ret;
-     }
- 
--    if (pixelformat != fmt->fmt.pix.pixelformat) {
--        av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt->fmt.pix.pixelformat));
++        return ret;
++    }
++
 +    if (pixelformat != fmt_pixelformat(&q->format)) {
 +        av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format)));
-         return AVERROR(EINVAL);
-     }
- 
--    ret = ioctl(ctx->fd, VIDIOC_G_SELECTION, &sel);
++        return AVERROR(EINVAL);
++    }
++
 +    q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
 +    q->sel.flags  = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE;
 +
 +    ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel);
-     if (ret) {
-         ret = AVERROR(errno);
--        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION failed: %d\n", ret);
++    if (ret) {
++        ret = AVERROR(errno);
 +        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret));
-     }
- 
--    sel.r.width = width;
--    sel.r.height = height;
--    sel.r.left = 0;
--    sel.r.top = 0;
--    sel.target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE,
--    sel.flags = V4L2_SEL_FLAG_LE;
++    }
++
 +    return 0;
 +}
- 
--    ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &sel);
--    if (ret) {
--        ret = AVERROR(errno);
--        av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %d\n", ret);
++
 +static void
 +set_fmt_color(struct v4l2_format *const fmt,
 +               const enum AVColorPrimaries avcp,
@@ -27017,6 +15841,7 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +                h = src->layers[0].planes[1].offset / bpl;
 +                w = bpl;
 +            }
++#if CONFIG_SAND
 +            else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
 +                if (src->layers[0].nb_planes != 2)
 +                    break;
@@ -27025,9 +15850,11 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +                h = src->layers[0].planes[1].offset / 128;
 +                bpl = fourcc_mod_broadcom_param(mod);
 +            }
++#endif
 +            break;
 +
 +        case DRM_FORMAT_P030:
++#if CONFIG_SAND
 +            if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
 +                if (src->layers[0].nb_planes != 2)
 +                    break;
@@ -27036,6 +15863,7 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +                h = src->layers[0].planes[1].offset / 128;
 +                bpl = fourcc_mod_broadcom_param(mod);
 +            }
++#endif
 +            break;
 +
 +        default:
@@ -27061,8 +15889,8 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +        pix->height = h;
 +        pix->pixelformat = pix_fmt;
 +        pix->bytesperline = bpl;
-     }
- 
++    }
++
 +    set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc);
 +    set_fmt_color_range(format, frame->color_range);
 +
@@ -27071,9 +15899,9 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    q->sel.r.left = frame->crop_left;
 +    q->sel.r.top = frame->crop_top;
 +
-     return 0;
- }
- 
++    return 0;
++}
++
 +
 +static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height)
 +{
@@ -27108,42 +15936,91 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    return do_s_fmt(queue);
 +}
 +
- static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
- {
-     int ret;
-@@ -464,16 +922,22 @@ static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node
-         return AVERROR(errno);
- 
-     ret = deint_v4l2m2m_prepare_context(ctx);
--    if (ret)
++static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node)
++{
++    int ret;
++
++    ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0);
++    if (ctx->fd < 0)
++        return AVERROR(errno);
++
++    ret = deint_v4l2m2m_prepare_context(ctx);
 +    if (ret) {
 +        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n");
-         goto fail;
++        goto fail;
 +    }
- 
--    ret = deint_v4l2m2m_try_format(&ctx->capture);
--    if (ret)
++
 +    ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format);
 +    if (ret) {
 +        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n");
-         goto fail;
++        goto fail;
 +    }
- 
--    ret = deint_v4l2m2m_try_format(&ctx->output);
--    if (ret)
++
 +    ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE);
 +    if (ret) {
 +        av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n");
-         goto fail;
++        goto fail;
 +    }
- 
-     return 0;
- 
-@@ -534,26 +998,118 @@ static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf)
-     return 0;
- }
- 
--static int v4l2_buffer_export_drm(V4L2Buffer* avbuf, const uint32_t pixelformat)
++
++    return 0;
++
++fail:
++    close(ctx->fd);
++    ctx->fd = -1;
++
++    return ret;
++}
++
++static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx)
++{
++    int ret = AVERROR(EINVAL);
++    struct dirent *entry;
++    char node[PATH_MAX];
++    DIR *dirp;
++
++    dirp = opendir("/dev");
++    if (!dirp)
++        return AVERROR(errno);
++
++    for (entry = readdir(dirp); entry; entry = readdir(dirp)) {
++
++        if (strncmp(entry->d_name, "video", 5))
++            continue;
++
++        snprintf(node, sizeof(node), "/dev/%s", entry->d_name);
++        av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node);
++        ret = deint_v4l2m2m_probe_device(ctx, node);
++        if (!ret)
++            break;
++    }
++
++    closedir(dirp);
++
++    if (ret) {
++        av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n");
++        ctx->fd = -1;
++
++        return ret;
++    }
++
++    av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node);
++
++    return 0;
++}
++
++static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf)
++{
++    int ret;
++
++    ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer);
++    if (ret < 0)
++        return AVERROR(errno);
++
++    buf->enqueued = 1;
++
++    return 0;
++}
++
 +static void
 +drm_frame_init(AVDRMFrameDescriptor * const d)
 +{
@@ -27184,26 +16061,15 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +}
 +
 +static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
- {
-     struct v4l2_exportbuffer expbuf;
-     int i, ret;
-     uint64_t mod = DRM_FORMAT_MOD_LINEAR;
--    uint32_t fmt = 0;
- 
--    switch (pixelformat) {
--    case V4L2_PIX_FMT_NV12:
--        fmt = DRM_FORMAT_NV12;
--        break;
--    case V4L2_PIX_FMT_YUV420:
--        fmt = DRM_FORMAT_YUV420;
--        break;
--    default:
--        return AVERROR(EINVAL);
++{
++    struct v4l2_exportbuffer expbuf;
++    int i, ret;
++    uint64_t mod = DRM_FORMAT_MOD_LINEAR;
++
 +    AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame;
 +    AVDRMLayerDescriptor * const layer = &drm_desc->layers[0];
 +    const struct v4l2_format *const fmt = &q->format;
 +    const uint32_t height = fmt_height(fmt);
-+    const uint32_t width  = fmt_width(fmt);
 +    ptrdiff_t bpl0;
 +
 +    /* fill the DRM frame descriptor */
@@ -27214,11 +16080,11 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +        layer->planes[i].object_index = i;
 +        layer->planes[i].offset = 0;
 +        layer->planes[i].pitch = fmt_bpl(fmt, i);
-     }
++    }
 +    bpl0 = layer->planes[0].pitch;
 +
 +    switch (fmt_pixelformat(fmt)) {
-+
++#if CONFIG_SAND
 +        case V4L2_PIX_FMT_NV12_COL128:
 +            mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0);
 +            layer->format = V4L2_PIX_FMT_NV12;
@@ -27229,14 +16095,14 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +            layer->nb_planes = 2;
 +            layer->planes[1].object_index = 0;
 +            layer->planes[1].offset = height * 128;
-+            layer->planes[0].pitch = width;
-+            layer->planes[1].pitch = width;
++            layer->planes[0].pitch = fmt_width(fmt);
++            layer->planes[1].pitch = layer->planes[0].pitch;
 +            break;
- 
--    avbuf->drm_frame.layers[0].format = fmt;
++#endif
++
 +        case DRM_FORMAT_NV12:
 +            layer->format = V4L2_PIX_FMT_NV12;
- 
++
 +            if (avbuf->num_planes > 1)
 +                break;
 +
@@ -27267,241 +16133,379 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    }
 +
 +    drm_desc->nb_objects = 0;
-     for (i = 0; i < avbuf->num_planes; i++) {
-         memset(&expbuf, 0, sizeof(expbuf));
- 
-@@ -565,19 +1121,11 @@ static int v4l2_buffer_export_drm(V4L2Buffer* avbuf, const uint32_t pixelformat)
-         if (ret < 0)
-             return AVERROR(errno);
- 
--        avbuf->fd = expbuf.fd;
--
--        if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type)) {
--            /* drm frame */
--            avbuf->drm_frame.objects[i].size = avbuf->buffer.m.planes[i].length;
--            avbuf->drm_frame.objects[i].fd = expbuf.fd;
--            avbuf->drm_frame.objects[i].format_modifier = mod;
--        } else {
--            /* drm frame */
--            avbuf->drm_frame.objects[0].size = avbuf->buffer.length;
--            avbuf->drm_frame.objects[0].fd = expbuf.fd;
--            avbuf->drm_frame.objects[0].format_modifier = mod;
--        }
++    for (i = 0; i < avbuf->num_planes; i++) {
++        memset(&expbuf, 0, sizeof(expbuf));
++
++        expbuf.index = avbuf->buffer.index;
++        expbuf.type = avbuf->buffer.type;
++        expbuf.plane = i;
++
++        ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf);
++        if (ret < 0)
++            return AVERROR(errno);
++
 +        drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ?
 +            avbuf->buffer.m.planes[i].length : avbuf->buffer.length;
 +        drm_desc->objects[i].fd = expbuf.fd;
 +        drm_desc->objects[i].format_modifier = mod;
 +        drm_desc->nb_objects = i + 1;
-     }
- 
-     return 0;
-@@ -588,7 +1136,7 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
-     struct v4l2_format *fmt = &queue->format;
-     DeintV4L2M2MContextShared *ctx = queue->ctx;
-     struct v4l2_requestbuffers req;
--    int ret, i, j, multiplanar;
++    }
++
++    return 0;
++}
++
++static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
++{
++    struct v4l2_format *fmt = &queue->format;
++    DeintV4L2M2MContextShared *ctx = queue->ctx;
++    struct v4l2_requestbuffers req;
 +    int ret, i, multiplanar;
-     uint32_t memory;
- 
-     memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ?
-@@ -617,10 +1165,9 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
-     }
- 
-     for (i = 0; i < queue->num_buffers; i++) {
--        V4L2Buffer *buf = &queue->buffers[i];
++    uint32_t memory;
++
++    memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ?
++        V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
++
++    multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type);
++
++    memset(&req, 0, sizeof(req));
++    req.count = queue->num_buffers;
++    req.memory = memory;
++    req.type = fmt->type;
++
++    ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req);
++    if (ret < 0) {
++        av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno));
++
++        return AVERROR(errno);
++    }
++
++    queue->num_buffers = req.count;
++    queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer));
++    if (!queue->buffers) {
++        av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n");
++
++        return AVERROR(ENOMEM);
++    }
++
++    for (i = 0; i < queue->num_buffers; i++) {
 +        V4L2Buffer * const buf = &queue->buffers[i];
- 
-         buf->enqueued = 0;
--        buf->fd = -1;
-         buf->q = queue;
- 
-         buf->buffer.type = fmt->type;
-@@ -632,6 +1179,12 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
-             buf->buffer.m.planes = buf->planes;
-         }
- 
++
++        buf->enqueued = 0;
++        buf->q = queue;
++
++        buf->buffer.type = fmt->type;
++        buf->buffer.memory = memory;
++        buf->buffer.index = i;
++
++        if (multiplanar) {
++            buf->buffer.length = VIDEO_MAX_PLANES;
++            buf->buffer.m.planes = buf->planes;
++        }
++
 +        drm_frame_init(&buf->drm_frame);
 +    }
 +
 +    for (i = 0; i < queue->num_buffers; i++) {
 +        V4L2Buffer * const buf = &queue->buffers[i];
 +
-         ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer);
-         if (ret < 0) {
-             ret = AVERROR(errno);
-@@ -639,29 +1192,14 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
-             goto fail;
-         }
- 
--        if (multiplanar)
--            buf->num_planes = buf->buffer.length;
--        else
--            buf->num_planes = 1;
--
--        for (j = 0; j < buf->num_planes; j++) {
--            V4L2PlaneInfo *info = &buf->plane_info[j];
--
--            if (multiplanar) {
--                info->bytesperline = fmt->fmt.pix_mp.plane_fmt[j].bytesperline;
--                info->length = buf->buffer.m.planes[j].length;
--            } else {
--                info->bytesperline = fmt->fmt.pix.bytesperline;
--                info->length = buf->buffer.length;
--            }
--        }
++        ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer);
++        if (ret < 0) {
++            ret = AVERROR(errno);
++
++            goto fail;
++        }
++
 +        buf->num_planes = multiplanar ? buf->buffer.length : 1;
- 
-         if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) {
-             ret = deint_v4l2m2m_enqueue_buffer(buf);
-             if (ret)
-                 goto fail;
- 
--            ret = v4l2_buffer_export_drm(buf, multiplanar ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat);
++
++        if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) {
++            ret = deint_v4l2m2m_enqueue_buffer(buf);
++            if (ret)
++                goto fail;
++
 +            ret = v4l2_buffer_export_drm(queue, buf);
-             if (ret)
-                 goto fail;
-         }
-@@ -670,12 +1208,8 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue)
-     return 0;
- 
- fail:
--    for (i = 0; i < queue->num_buffers; i++)
--        if (queue->buffers[i].fd >= 0)
--            close(queue->buffers[i].fd);
--    av_free(queue->buffers);
--    queue->buffers = NULL;
--
++            if (ret)
++                goto fail;
++        }
++    }
++
++    return 0;
++
++fail:
 +    avbufs_delete(&queue->buffers, queue->num_buffers);
 +    queue->num_buffers = 0;
-     return ret;
- }
- 
-@@ -862,7 +1396,6 @@ static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx)
-     if (atomic_fetch_sub(&ctx->refcount, 1) == 1) {
-         V4L2Queue *capture = &ctx->capture;
-         V4L2Queue *output  = &ctx->output;
--        int i;
- 
-         av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__);
- 
-@@ -871,12 +1404,7 @@ static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx)
-             deint_v4l2m2m_streamoff(output);
-         }
- 
--        if (capture->buffers)
--            for (i = 0; i < capture->num_buffers; i++) {
--                capture->buffers[i].q = NULL;
--                if (capture->buffers[i].fd >= 0)
--                    close(capture->buffers[i].fd);
--            }
++    return ret;
++}
++
++static int deint_v4l2m2m_streamon(V4L2Queue *queue)
++{
++    DeintV4L2M2MContextShared * const ctx = queue->ctx;
++    int type = queue->format.type;
++    int ret;
++
++    ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type);
++    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
++    if (ret < 0)
++        return AVERROR(errno);
++
++    return 0;
++}
++
++static int deint_v4l2m2m_streamoff(V4L2Queue *queue)
++{
++    DeintV4L2M2MContextShared * const ctx = queue->ctx;
++    int type = queue->format.type;
++    int ret;
++
++    ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type);
++    av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno));
++    if (ret < 0)
++        return AVERROR(errno);
++
++    return 0;
++}
++
++// timeout in ms
++static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout)
++{
++    struct v4l2_plane planes[VIDEO_MAX_PLANES];
++    DeintV4L2M2MContextShared *ctx = queue->ctx;
++    struct v4l2_buffer buf = { 0 };
++    V4L2Buffer* avbuf = NULL;
++    struct pollfd pfd;
++    short events;
++    int ret;
++
++    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
++        events =  POLLOUT | POLLWRNORM;
++    else
++        events = POLLIN | POLLRDNORM;
++
++    pfd.events = events;
++    pfd.fd = ctx->fd;
++
++    for (;;) {
++        ret = poll(&pfd, 1, timeout);
++        if (ret > 0)
++            break;
++        if (errno == EINTR)
++            continue;
++        return NULL;
++    }
++
++    if (pfd.revents & POLLERR)
++        return NULL;
++
++    if (pfd.revents & events) {
++        memset(&buf, 0, sizeof(buf));
++        buf.memory = V4L2_MEMORY_MMAP;
++        buf.type = queue->format.type;
++        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
++            memset(planes, 0, sizeof(planes));
++            buf.length = VIDEO_MAX_PLANES;
++            buf.m.planes = planes;
++        }
++
++        ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf);
++        if (ret) {
++            if (errno != EAGAIN)
++                av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n",
++                       av_err2str(AVERROR(errno)));
++            return NULL;
++        }
++
++        avbuf = &queue->buffers[buf.index];
++        avbuf->enqueued = 0;
++        avbuf->buffer = buf;
++        if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) {
++            memcpy(avbuf->planes, planes, sizeof(planes));
++            avbuf->buffer.m.planes = avbuf->planes;
++        }
++        return avbuf;
++    }
++
++    return NULL;
++}
++
++static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue)
++{
++    int i;
++    V4L2Buffer *buf = NULL;
++
++    for (i = 0; i < queue->num_buffers; i++)
++        if (!queue->buffers[i].enqueued) {
++            buf = &queue->buffers[i];
++            break;
++        }
++    return buf;
++}
++
++static void deint_v4l2m2m_unref_queued(V4L2Queue *queue)
++{
++    int i;
++    V4L2Buffer *buf = NULL;
++
++    if (!queue || !queue->buffers)
++        return;
++    for (i = 0; i < queue->num_buffers; i++) {
++        buf = &queue->buffers[i];
++        if (queue->buffers[i].enqueued)
++            av_frame_unref(&buf->frame);
++    }
++}
++
++static void recycle_q(V4L2Queue * const queue)
++{
++    V4L2Buffer* avbuf;
++    while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) {
++        av_frame_unref(&avbuf->frame);
++    }
++}
++
++static int count_enqueued(V4L2Queue *queue)
++{
++    int i;
++    int n = 0;
++
++    if (queue->buffers == NULL)
++        return 0;
++
++    for (i = 0; i < queue->num_buffers; i++)
++        if (queue->buffers[i].enqueued)
++            ++n;
++    return n;
++}
++
++static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame)
++{
++    DeintV4L2M2MContextShared *const ctx = queue->ctx;
++    AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0];
++    V4L2Buffer *buf;
++    int i;
++
++    if (V4L2_TYPE_IS_OUTPUT(queue->format.type))
++        recycle_q(queue);
++
++    buf = deint_v4l2m2m_find_free_buf(queue);
++    if (!buf) {
++        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0);
++        return AVERROR(EAGAIN);
++    }
++    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type))
++        for (i = 0; i < drm_desc->nb_objects; i++)
++            buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd;
++    else
++        buf->buffer.m.fd = drm_desc->objects[0].fd;
++
++    buf->buffer.field = !frame_is_interlaced(frame) ? V4L2_FIELD_NONE :
++        frame_is_tff(frame) ? V4L2_FIELD_INTERLACED_TB :
++            V4L2_FIELD_INTERLACED_BT;
++
++    if (ctx->field_order != buf->buffer.field) {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field);
++        ctx->field_order = buf->buffer.field;
++    }
++
++    buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame);
++
++    buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd;
++
++    av_frame_move_ref(&buf->frame, frame);
++
++    return deint_v4l2m2m_enqueue_buffer(buf);
++}
++
++static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx)
++{
++    if (atomic_fetch_sub(&ctx->refcount, 1) == 1) {
++        V4L2Queue *capture = &ctx->capture;
++        V4L2Queue *output  = &ctx->output;
++
++        av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__);
++
++        if (ctx->fd >= 0) {
++            deint_v4l2m2m_streamoff(capture);
++            deint_v4l2m2m_streamoff(output);
++        }
++
 +        avbufs_delete(&capture->buffers, capture->num_buffers);
- 
-         deint_v4l2m2m_unref_queued(output);
- 
-@@ -908,73 +1436,15 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused)
-     deint_v4l2m2m_destroy_context(ctx);
- }
- 
--static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height)
--{
--    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
--    AVDRMLayerDescriptor *layer;
--
--    /* fill the DRM frame descriptor */
--    drm_desc->nb_objects = avbuf->num_planes;
--    drm_desc->nb_layers = 1;
--
--    layer = &drm_desc->layers[0];
--    layer->nb_planes = avbuf->num_planes;
--
--    for (int i = 0; i < avbuf->num_planes; i++) {
--        layer->planes[i].object_index = i;
--        layer->planes[i].offset = 0;
--        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
--    }
--
--    switch (layer->format) {
--    case DRM_FORMAT_YUYV:
--        layer->nb_planes = 1;
--        break;
--
--    case DRM_FORMAT_NV12:
--    case DRM_FORMAT_NV21:
--        if (avbuf->num_planes > 1)
--            break;
--
--        layer->nb_planes = 2;
--
--        layer->planes[1].object_index = 0;
--        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
--            height;
--        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
--        break;
--
--    case DRM_FORMAT_YUV420:
--        if (avbuf->num_planes > 1)
--            break;
--
--        layer->nb_planes = 3;
--
--        layer->planes[1].object_index = 0;
--        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
--            height;
--        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
--
--        layer->planes[2].object_index = 0;
--        layer->planes[2].offset = layer->planes[1].offset +
--            ((avbuf->plane_info[0].bytesperline *
--              height) >> 2);
--        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
--        break;
--
--    default:
--        drm_desc->nb_layers = 0;
--        break;
--    }
--
--    return (uint8_t *) drm_desc;
--}
--
- // timeout in ms
- static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout)
- {
-     DeintV4L2M2MContextShared *ctx = queue->ctx;
-     V4L2Buffer* avbuf;
++
++        deint_v4l2m2m_unref_queued(output);
++
++        av_buffer_unref(&ctx->hw_frames_ctx);
++
++        if (capture->buffers)
++            av_free(capture->buffers);
++
++        if (output->buffers)
++            av_free(output->buffers);
++
++        if (ctx->fd >= 0) {
++            close(ctx->fd);
++            ctx->fd = -1;
++        }
++
++        av_free(ctx);
++    }
++}
++
++static void v4l2_free_buffer(void *opaque, uint8_t *unused)
++{
++    V4L2Buffer *buf                = opaque;
++    DeintV4L2M2MContextShared *ctx = buf->q->ctx;
++
++    if (!ctx->done)
++        deint_v4l2m2m_enqueue_buffer(buf);
++
++    deint_v4l2m2m_destroy_context(ctx);
++}
++
++// timeout in ms
++static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout)
++{
++    DeintV4L2M2MContextShared *ctx = queue->ctx;
++    V4L2Buffer* avbuf;
 +    enum AVColorPrimaries color_primaries;
 +    enum AVColorSpace colorspace;
 +    enum AVColorTransferCharacteristic color_trc;
 +    enum AVColorRange color_range;
- 
-     av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
- 
-@@ -985,8 +1455,6 @@ static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int tim
-     }
- 
-     // Fill in PTS and anciliary info from src frame
--    // we will want to overwrite some fields as only the pts/dts
--    // fields are updated with new timing in this fn
-     pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
- 
-     frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame,
-@@ -999,18 +1467,36 @@ static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int tim
- 
-     atomic_fetch_add(&ctx->refcount, 1);
- 
--    frame->data[0] = (uint8_t *)v4l2_get_drm_frame(avbuf, ctx->orig_height);
++
++    av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++
++    if (queue->eos) {
++        av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__);
++        return AVERROR_EOF;
++    }
++
++    avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout);
++    if (!avbuf) {
++        av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout);
++        return AVERROR(EAGAIN);
++    }
++
++    if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) {
++        if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0)
++            queue->eos = 1;
++        if (buf_bytesused0(&avbuf->buffer) == 0)
++            return queue->eos ? AVERROR_EOF : AVERROR(EINVAL);
++    }
++
++    // Fill in PTS and anciliary info from src frame
++    pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
++
++    frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame,
++                            sizeof(avbuf->drm_frame), v4l2_free_buffer,
++                            avbuf, AV_BUFFER_FLAG_READONLY);
++    if (!frame->buf[0]) {
++        av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0);
++        return AVERROR(ENOMEM);
++    }
++
++    atomic_fetch_add(&ctx->refcount, 1);
++
 +    frame->data[0] = (uint8_t *)&avbuf->drm_frame;
-     frame->format = AV_PIX_FMT_DRM_PRIME;
-     if (ctx->hw_frames_ctx)
-         frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
--    frame->height = ctx->height;
--    frame->width = ctx->width;
--
--    // Not interlaced now
--    frame->interlaced_frame = 0;
--    frame->top_field_first = 0;
--    // Pkt duration halved
--    frame->pkt_duration /= 2;
++    frame->format = AV_PIX_FMT_DRM_PRIME;
++    if (ctx->hw_frames_ctx)
++        frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx);
 +    frame->height = ctx->output_height;
 +    frame->width = ctx->output_width;
 +
@@ -27523,19 +16527,31 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +
 +    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) {
 +        // Not interlaced now
-+        frame->interlaced_frame = 0;   // *** Fill in from dst buffer?
-+        frame->top_field_first = 0;
-+        // Pkt duration halved
-+        frame->pkt_duration /= 2;
++        frame_set_progressive(frame);
++        // Duration halved
++        frame->duration /= 2;
 +    }
- 
-     if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) {
-         av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n");
-@@ -1032,15 +1518,34 @@ static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
-     ctx->height = avctx->inputs[0]->h;
-     ctx->width = avctx->inputs[0]->w;
- 
--    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d\n", __func__, ctx->width, ctx->height);
++
++    if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) {
++        av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n");
++        frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM;
++    }
++
++    av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts);
++    return 0;
++}
++
++static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
++{
++    AVFilterLink *inlink           = outlink->src->inputs[0];
++    AVFilterContext *avctx         = outlink->src;
++    DeintV4L2M2MContext *priv      = avctx->priv;
++    DeintV4L2M2MContextShared *ctx = priv->shared;
++    int ret;
++
++    ctx->height = avctx->inputs[0]->h;
++    ctx->width = avctx->inputs[0]->w;
++
 +    if (ctx->filter_type == FILTER_V4L2_SCALE) {
 +        if ((ret = ff_scale_eval_dimensions(priv,
 +                                            priv->w_expr, priv->h_expr,
@@ -27551,68 +16567,75 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +        ctx->output_height = ctx->height;
 +    }
 +
-+    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d\n", __func__, ctx->width, ctx->height, ctx->output_width, ctx->output_height);
- 
-     outlink->time_base           = inlink->time_base;
--    outlink->w                   = inlink->w;
--    outlink->h                   = inlink->h;
--    outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
++    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d\n", __func__,
++           ctx->width, ctx->height, ctx->output_width, ctx->output_height);
++
++    outlink->time_base           = inlink->time_base;
 +    outlink->w                   = ctx->output_width;
 +    outlink->h                   = ctx->output_height;
-     outlink->format              = inlink->format;
-     outlink->frame_rate = (AVRational) {1, 0};  // Deny knowledge of frame rate
- 
++    outlink->format              = inlink->format;
++
 +    if (inlink->sample_aspect_ratio.num)
 +        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
 +    else
 +        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
 +
-     ret = deint_v4l2m2m_find_device(ctx);
-     if (ret)
-         return ret;
-@@ -1055,18 +1560,19 @@ static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
- 
- static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
- {
--    const int is_linear = (drm_desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ||
--            drm_desc->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID);
++    return deint_v4l2m2m_find_device(ctx);
++}
++
++static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
++{
 +    const uint64_t mod = drm_desc->objects[0].format_modifier;
 +    const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID);
 +
 +    // Only currently support single object things
 +    if (drm_desc->nb_objects != 1)
 +        return 0;
- 
-     switch (drm_desc->layers[0].format) {
-     case DRM_FORMAT_YUV420:
--        if (is_linear)
--            return drm_desc->nb_objects == 1 ? V4L2_PIX_FMT_YUV420 : 0;
--        break;
++
++    switch (drm_desc->layers[0].format) {
++    case DRM_FORMAT_YUV420:
 +        return is_linear ? V4L2_PIX_FMT_YUV420 : 0;
-     case DRM_FORMAT_NV12:
--        if (is_linear)
--            return drm_desc->nb_objects == 1 ? V4L2_PIX_FMT_NV12 : 0;
--        break;
++    case DRM_FORMAT_NV12:
 +        return is_linear ? V4L2_PIX_FMT_NV12 :
-+            fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : 0;
-     default:
-         break;
-     }
-@@ -1089,7 +1595,7 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
- 
-     if (ctx->field_order == V4L2_FIELD_ANY) {
-         const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0];
--        const uint32_t pixelformat = desc_pixelformat(drm_desc);
++#if CONFIG_SAND
++            fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 :
++#endif
++            0;
++    default:
++        break;
++    }
++    return 0;
++}
++
++static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
++{
++    AVFilterContext *avctx         = link->dst;
++    DeintV4L2M2MContext *priv      = avctx->priv;
++    DeintV4L2M2MContextShared *ctx = priv->shared;
++    V4L2Queue *capture             = &ctx->capture;
++    V4L2Queue *output              = &ctx->output;
++    int ret;
++
++    av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n",
++           __func__, in->pts, in->pkt_dts, frame_is_tff(in), frame_is_interlaced(in), in->sample_aspect_ratio.num, in->sample_aspect_ratio.den);
++
++    if (ctx->field_order == V4L2_FIELD_ANY) {
++        const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0];
 +        uint32_t pixelformat = desc_pixelformat(drm_desc);
- 
-         if (pixelformat == 0) {
-             av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n",
-@@ -1104,29 +1610,49 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
-         av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
-            drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
- 
--        ret = deint_v4l2m2m_set_format(output, pixelformat, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
--        if (ret)
++
++        if (pixelformat == 0) {
++            av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n",
++                   av_fourcc2str(drm_desc->layers[0].format),
++                   drm_desc->nb_objects, drm_desc->objects[0].format_modifier);
++            return AVERROR(EINVAL);
++        }
++
++        ctx->orig_width = drm_desc->layers[0].planes[0].pitch;
++        ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width;
++
++        av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height,
++           drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset);
++
 +        if ((ret = set_src_fmt(output, in)) != 0) {
 +            av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n",
 +                   av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier);
@@ -27622,70 +16645,245 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +        ret = do_s_fmt(output);
 +        if (ret) {
 +            av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n");
-             return ret;
++            return ret;
 +        }
- 
--        ret = deint_v4l2m2m_set_format(capture, pixelformat, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset);
--        if (ret)
++
 +        if (ctx->output_format != AV_PIX_FMT_NONE)
 +           pixelformat = fmt_av_to_v4l2(ctx->output_format);
 +        ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height);
 +        if (ret) {
 +            av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n");
-             return ret;
++            return ret;
 +        }
- 
-         ret = deint_v4l2m2m_allocate_buffers(capture);
--        if (ret)
++
++        ret = deint_v4l2m2m_allocate_buffers(capture);
 +        if (ret) {
 +            av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n");
-             return ret;
++            return ret;
 +        }
- 
-         ret = deint_v4l2m2m_streamon(capture);
--        if (ret)
++
++        ret = deint_v4l2m2m_streamon(capture);
 +        if (ret) {
 +            av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret));
-             return ret;
++            return ret;
 +        }
- 
-         ret = deint_v4l2m2m_allocate_buffers(output);
--        if (ret)
++
++        ret = deint_v4l2m2m_allocate_buffers(output);
 +        if (ret) {
 +            av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n");
-             return ret;
++            return ret;
 +        }
- 
-         ret = deint_v4l2m2m_streamon(output);
--        if (ret)
++
++        ret = deint_v4l2m2m_streamon(output);
 +        if (ret) {
 +            av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret));
-             return ret;
++            return ret;
 +        }
- 
-         if (in->top_field_first)
-             ctx->field_order = V4L2_FIELD_INTERLACED_TB;
-@@ -1251,7 +1777,7 @@ again:
-     return did_something ? 0 : FFERROR_NOT_READY;
- }
- 
--static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
++
++        if (frame_is_tff(in))
++            ctx->field_order = V4L2_FIELD_INTERLACED_TB;
++        else
++            ctx->field_order = V4L2_FIELD_INTERLACED_BT;
++
++        {
++            struct v4l2_encoder_cmd ecmd = {
++                .cmd = V4L2_ENC_CMD_STOP
++            };
++            ctx->has_enc_stop = 0;
++            if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) {
++                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n");
++                ctx->has_enc_stop = 1;
++            }
++            else {
++                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno)));
++            }
++
++        }
++    }
++
++    ret = deint_v4l2m2m_enqueue_frame(output, in);
++
++    av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret));
++    return ret;
++}
++
++static int
++ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s,
++           AVFilterLink * const inlink)
++{
++    int instatus;
++    int64_t inpts;
++
++    if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0)
++        return 0;
++
++    s->drain      = instatus;
++    s->drain_pts  = inpts;
++    s->drain_state = DRAIN_TIMEOUT;
++
++    if (s->field_order == V4L2_FIELD_ANY) {  // Not yet started
++        s->drain_state = DRAIN_DONE;
++    }
++    else if (s->one_to_one) {
++        s->drain_state = DRAIN_LAST;
++    }
++    else if (s->has_enc_stop) {
++        struct v4l2_encoder_cmd ecmd = {
++            .cmd = V4L2_ENC_CMD_STOP
++        };
++        if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) {
++            av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n");
++            s->drain_state = DRAIN_EOS;
++        }
++        else {
++            av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno)));
++        }
++    }
++    return 1;
++}
++
++static int deint_v4l2m2m_activate(AVFilterContext *avctx)
++{
++    DeintV4L2M2MContext * const priv = avctx->priv;
++    DeintV4L2M2MContextShared *const s = priv->shared;
++    AVFilterLink * const outlink = avctx->outputs[0];
++    AVFilterLink * const inlink = avctx->inputs[0];
++    int n = 0;
++    int cn = 99;
++    int did_something = 0;
++
++    av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__);
++
++    FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx);
++
++    ack_inlink(avctx, s, inlink);
++
++    if (s->field_order != V4L2_FIELD_ANY)  // Can't DQ if no setup!
++    {
++        AVFrame * frame = av_frame_alloc();
++        int rv;
++
++        recycle_q(&s->output);
++        n = count_enqueued(&s->output);
++
++        if (frame == NULL) {
++            av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__);
++            return AVERROR(ENOMEM);
++        }
++
++        rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame,
++                                         drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0);
++        if (rv != 0) {
++            av_frame_free(&frame);
++            if (rv == AVERROR_EOF) {
++                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__);
++                s->drain_state = DRAIN_DONE;
++            }
++            else if (rv == AVERROR(EAGAIN)) {
++                if (s->drain_state != DRAIN_NONE) {
++                    av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__);
++                    s->drain_state = DRAIN_DONE;
++                }
++            }
++            else {
++                av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv));
++                return rv;
++            }
++        }
++        else {
++            frame_set_progressive(frame);
++            // frame is always consumed by filter_frame - even on error despite
++            // a somewhat confusing comment in the header
++            rv = ff_filter_frame(outlink, frame);
++            ++s->frames_tx;
++
++            av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv));
++            did_something = 1;
++
++            if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) {
++                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__);
++                s->drain_state = DRAIN_DONE;
++            }
++        }
++
++        cn = count_enqueued(&s->capture);
++    }
++
++    if (s->drain_state == DRAIN_DONE) {
++        ff_outlink_set_status(outlink, s->drain, s->drain_pts);
++        av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain));
++        return 0;
++    }
++
++    recycle_q(&s->output);
++    n = count_enqueued(&s->output);
++
++    while (n < 6 && !s->drain) {
++        AVFrame * frame;
++        int rv;
++
++        if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) {
++            av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv));
++            return rv;
++        }
++
++        if (frame == NULL) {
++            av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
++            if (!ack_inlink(avctx, s, inlink)) {
++                ff_inlink_request_frame(inlink);
++                av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__);
++            }
++            break;
++        }
++        ++s->frames_rx;
++
++        rv = deint_v4l2m2m_filter_frame(inlink, frame);
++        av_frame_free(&frame);
++
++        if (rv != 0)
++            return rv;
++
++        av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
++        did_something = 1;
++        ++n;
++    }
++
++    if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) {
++        ff_filter_set_ready(avctx, 1);
++        did_something = 1;
++        av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__);
++    }
++
++    av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn);
++    return did_something ? 0 : FFERROR_NOT_READY;
++}
++
 +static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type)
- {
-     DeintV4L2M2MContext * const priv = avctx->priv;
-     DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared));
-@@ -1262,6 +1788,7 @@ static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
-     }
-     priv->shared = ctx;
-     ctx->logctx = priv;
++{
++    DeintV4L2M2MContext * const priv = avctx->priv;
++    DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared));
++
++    if (!ctx) {
++        av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0);
++        return AVERROR(ENOMEM);
++    }
++    priv->shared = ctx;
++    ctx->logctx = priv;
 +    ctx->filter_type = filter_type;
-     ctx->fd = -1;
-     ctx->output.ctx = ctx;
-     ctx->output.num_buffers = 8;
-@@ -1274,9 +1801,52 @@ static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
- 
-     atomic_init(&ctx->refcount, 1);
- 
++    ctx->fd = -1;
++    ctx->output.ctx = ctx;
++    ctx->output.num_buffers = 8;
++    ctx->output.name = "OUTPUT";
++    ctx->capture.ctx = ctx;
++    ctx->capture.num_buffers = 12;
++    ctx->capture.name = "CAPTURE";
++    ctx->done = 0;
++    ctx->field_order = V4L2_FIELD_ANY;
++
++    pts_track_init(&ctx->track, priv);
++
++    atomic_init(&ctx->refcount, 1);
++
 +    if (priv->output_format_string) {
 +        ctx->output_format = av_get_pix_fmt(priv->output_format_string);
 +        if (ctx->output_format == AV_PIX_FMT_NONE) {
@@ -27719,9 +16917,9 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    STRING_OPTION(colour_matrix,    color_space,     AVCOL_SPC_UNSPECIFIED);
 +    STRING_OPTION(chroma_location,  chroma_location, AVCHROMA_LOC_UNSPECIFIED);
 +
-     return 0;
- }
- 
++    return 0;
++}
++
 +static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
 +{
 +    return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE);
@@ -27729,16 +16927,39 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +
 +static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx)
 +{
-+    return common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE);
++    int rv;
++    DeintV4L2M2MContext * priv;
++    DeintV4L2M2MContextShared * ctx;
++
++    if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0)
++        return rv;
++
++    priv = avctx->priv;
++    ctx = priv->shared;
++
++    ctx->one_to_one = 1;
++    return 0;
 +}
 +
- static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
- {
-     DeintV4L2M2MContext *priv = avctx->priv;
-@@ -1294,6 +1864,51 @@ static const AVOption deinterlace_v4l2m2m_options[] = {
- 
- AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m);
- 
++static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
++{
++    DeintV4L2M2MContext *priv = avctx->priv;
++    DeintV4L2M2MContextShared *ctx = priv->shared;
++
++    av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n",
++           ctx->frames_rx, ctx->frames_tx);
++    ctx->done = 1;
++    ctx->logctx = NULL;  // Log to NULL works, log to missing crashes
++    pts_track_uninit(&ctx->track);
++    deint_v4l2m2m_destroy_context(ctx);
++}
++
++static const AVOption deinterlace_v4l2m2m_options[] = {
++    { NULL },
++};
++
++AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m);
++
 +#define OFFSET(x) offsetof(DeintV4L2M2MContext, x)
 +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
 +
@@ -27784,13 +17005,33 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +
 +AVFILTER_DEFINE_CLASS(scale_v4l2m2m);
 +
- static const AVFilterPad deint_v4l2m2m_inputs[] = {
-     {
-         .name         = "default",
-@@ -1321,3 +1936,17 @@ AVFilter ff_vf_deinterlace_v4l2m2m = {
-     .priv_class     = &deinterlace_v4l2m2m_class,
-     .activate       = deint_v4l2m2m_activate,
- };
++static const AVFilterPad deint_v4l2m2m_inputs[] = {
++    {
++        .name         = "default",
++        .type         = AVMEDIA_TYPE_VIDEO,
++    },
++};
++
++static const AVFilterPad deint_v4l2m2m_outputs[] = {
++    {
++        .name          = "default",
++        .type          = AVMEDIA_TYPE_VIDEO,
++        .config_props  = deint_v4l2m2m_config_props,
++    },
++};
++
++AVFilter ff_vf_deinterlace_v4l2m2m = {
++    .name           = "deinterlace_v4l2m2m",
++    .description    = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"),
++    .priv_size      = sizeof(DeintV4L2M2MContext),
++    .init           = &deint_v4l2m2m_init,
++    .uninit         = &deint_v4l2m2m_uninit,
++    FILTER_INPUTS(deint_v4l2m2m_inputs),
++    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
++    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME),
++    .priv_class     = &deinterlace_v4l2m2m_class,
++    .activate       = deint_v4l2m2m_activate,
++};
 +
 +AVFilter ff_vf_scale_v4l2m2m = {
 +    .name           = "scale_v4l2m2m",
@@ -27800,4020 +17041,6165 @@ index 1a3bef5bcba6..2df39ec0f19f 100644
 +    .uninit         = &deint_v4l2m2m_uninit,
 +    FILTER_INPUTS(deint_v4l2m2m_inputs),
 +    FILTER_OUTPUTS(deint_v4l2m2m_outputs),
-+    FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME),
++    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME),
 +    .priv_class     = &scale_v4l2m2m_class,
 +    .activate       = deint_v4l2m2m_activate,
 +};
 +
-
-From ce9d8c33e7d64ba11d1f1df4ee5340b63ab84c2f Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 22 Sep 2022 14:54:46 +0000
-Subject: [PATCH 071/186] v4l2_m2m: Adjust buffer allocation based on min/max
- controls
-
-Clip requested buffer count to min/max declared by driver.
-If 0 buffers requested then set to min+2.
-This allows encode to keep its src buffer count down to a plausible
-minimum which helps with flow control.
----
- libavcodec/v4l2_context.c | 19 +++++++++++++++++++
- 1 file changed, 19 insertions(+)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 6b97eab41ed7..ba36689ff3a6 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -1187,6 +1187,7 @@ fail_release:
- 
- int ff_v4l2_context_init(V4L2Context* ctx)
- {
-+    struct v4l2_queryctrl qctrl;
-     V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-     int ret;
- 
-@@ -1228,6 +1229,24 @@ int ff_v4l2_context_init(V4L2Context* ctx)
-         goto fail_unref_hwframes;
-     }
- 
-+    memset(&qctrl, 0, sizeof(qctrl));
-+    qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT;
-+    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) {
-+        ret = AVERROR(errno);
-+        if (ret != AVERROR(EINVAL)) {
-+            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret));
-+            goto fail_unref_hwframes;
-+        }
-+        // Control unsupported - set default if wanted
-+        if (ctx->num_buffers < 2)
-+            ctx->num_buffers = 4;
-+    }
-+    else {
-+        if (ctx->num_buffers < 2)
-+            ctx->num_buffers = qctrl.minimum + 2;
-+        ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum);
-+    }
+diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c
+new file mode 100644
+index 000000000000..67750e4f12b8
+--- /dev/null
++++ b/libavfilter/vf_unsand.c
+@@ -0,0 +1,227 @@
++/*
++ * Copyright (c) 2007 Bobby Bingham
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
 +
-     ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
-     if (ret < 0)
-         goto fail_unref_hwframes;
-
-From d67aed711e1ad85b3d4dfc3d363c0bdd3ac40001 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 22 Sep 2022 15:00:12 +0000
-Subject: [PATCH 072/186] v4l2_m2m_dec: If src Q is full then wait indefinitely
- for buffer
-
-If it is not possible to add another buffer to the src Q then alawys
-wait indefinitely for either an output frame or the Q to have space.
-
-This has issues if the reason that the Q is stalled is due to dst buffer
-exhaustion and buffers cannot be returned async by another thread but
-the current scheme confuses ffmpegs pipeline scheduling.
----
- libavcodec/v4l2_m2m_dec.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 485a96f4b487..bb183097f6f5 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -456,9 +456,9 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-         if (dst_rv != 0 && TRY_DQ(src_rv)) {
-             // Pick a timeout depending on state
-             const int t =
-+                src_rv == NQ_Q_FULL ? -1 :
-                 src_rv == NQ_DRAINING ? 300 :
--                prefer_dq ? 5 :
--                src_rv == NQ_Q_FULL ? -1 : 0;
-+                prefer_dq ? 5 : 0;
- 
-             // Dequeue frame will unref any previous contents of frame
-             // if it returns success so we don't need an explicit unref
-
-From 04ed865af7885364c4ae7d5e790a887c2c500275 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 22 Sep 2022 15:12:27 +0000
-Subject: [PATCH 073/186] vf_deinterlace_v4l2m2m: Add Q name to structure for
- debug
-
----
- libavfilter/vf_deinterlace_v4l2m2m.c | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-index 2df39ec0f19f..4edecc02bff5 100644
---- a/libavfilter/vf_deinterlace_v4l2m2m.c
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -84,6 +84,7 @@ typedef struct V4L2Queue {
-     struct v4l2_selection sel;
-     int num_buffers;
-     V4L2Buffer *buffers;
-+    const char * name;
-     DeintV4L2M2MContextShared *ctx;
- } V4L2Queue;
- 
-@@ -1792,8 +1793,10 @@ static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filt
-     ctx->fd = -1;
-     ctx->output.ctx = ctx;
-     ctx->output.num_buffers = 8;
-+    ctx->output.name = "OUTPUT";
-     ctx->capture.ctx = ctx;
-     ctx->capture.num_buffers = 12;
-+    ctx->capture.name = "CAPTURE";
-     ctx->done = 0;
-     ctx->field_order = V4L2_FIELD_ANY;
- 
-
-From 8194a72e9599b4beacce6676e86b08028a4c979d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 22 Sep 2022 16:08:42 +0000
-Subject: [PATCH 074/186] v4l2_m2m_enc: Set src buffer count to min+2 by
- default
-
-Set output.num_buffers to 0 by default which will then be set to min+2
-by the allocation code. This fixes an issue where the deinterlacer had
-fewer dest buffer than the encoder has src buffers and so ran dry
-creating deadlock in the ffmpeg filter chain.
----
- libavcodec/v4l2_m2m_enc.c | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
-index 099ad23928d3..b8ba815c379d 100644
---- a/libavcodec/v4l2_m2m_enc.c
-+++ b/libavcodec/v4l2_m2m_enc.c
-@@ -672,9 +672,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx)
- #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
- 
- #define V4L_M2M_CAPTURE_OPTS \
--    V4L_M2M_DEFAULT_OPTS,\
-+    { "num_output_buffers", "Number of buffers in the output context",\
-+        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\
-     { "num_capture_buffers", "Number of buffers in the capture context", \
--        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS }
-+        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS }
- 
- static const AVOption mpeg4_options[] = {
-     V4L_M2M_CAPTURE_OPTS,
-
-From f949fe93ac776f6a2e9cec6c171a8c47c2b00c44 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 22 Sep 2022 16:13:57 +0000
-Subject: [PATCH 075/186] vf_deinterlace_m2m: For deinterlace set outlink FR to
- twice inlink
-
-We used to set the outlink framerate to unknown but it turns out that
-ffmpegs filter pipeline copes with that badly. Otherwise leave at 0,0
-which will copy FR from inlink to outlink.
----
- libavfilter/vf_deinterlace_v4l2m2m.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-index 4edecc02bff5..c52dae1c44a8 100644
---- a/libavfilter/vf_deinterlace_v4l2m2m.c
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -1534,13 +1534,16 @@ static int deint_v4l2m2m_config_props(AVFilterLink *outlink)
-         ctx->output_height = ctx->height;
-     }
- 
--    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d\n", __func__, ctx->width, ctx->height, ctx->output_width, ctx->output_height);
-+    av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__,
-+           ctx->width, ctx->height, ctx->output_width, ctx->output_height,
-+           inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den);
- 
-     outlink->time_base           = inlink->time_base;
-     outlink->w                   = ctx->output_width;
-     outlink->h                   = ctx->output_height;
-     outlink->format              = inlink->format;
--    outlink->frame_rate = (AVRational) {1, 0};  // Deny knowledge of frame rate
-+    if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0)
-+        outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den};
- 
-     if (inlink->sample_aspect_ratio.num)
-         outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
-
-From e207d1dab82d5c1684cb87ed8c957d93b3913a4e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 23 Sep 2022 11:30:56 +0000
-Subject: [PATCH 076/186] v4l2m2m: Add ff_v4l2_dq_all to drain all buffers from
- a Q
-
-Useful for where (encode) we might have drmprime buffers that we want to
-return to the source ASAP.
----
- libavcodec/v4l2_context.c | 17 +++++++++++------
- libavcodec/v4l2_context.h |  2 ++
- 2 files changed, 13 insertions(+), 6 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index ba36689ff3a6..4a359bf45e30 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -707,17 +707,22 @@ clean_v4l2_buffer(V4L2Buffer * const avbuf)
-     return avbuf;
- }
- 
-+void
-+ff_v4l2_dq_all(V4L2Context *const ctx)
++/**
++ * @file
++ * format and noformat video filters
++ */
++
++#include <string.h>
++
++#include "libavutil/internal.h"
++#include "libavutil/mem.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/opt.h"
++#include "libavutil/rpi_sand_fns.h"
++
++#include "avfilter.h"
++#include "formats.h"
++#include "video.h"
++
++typedef struct UnsandContext {
++    const AVClass *class;
++} UnsandContext;
++
++static av_cold void uninit(AVFilterContext *ctx)
 +{
-+    V4L2Buffer * avbuf;
-+    do {
-+        get_qbuf(ctx, &avbuf, 0);
-+    } while (avbuf);
++//    UnsandContext *s = ctx->priv;
 +}
 +
- static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
- {
-     int i;
- 
-     /* get back as many output buffers as possible */
--    if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
--        V4L2Buffer * avbuf;
--        do {
--            get_qbuf(ctx, &avbuf, 0);
--        } while (avbuf);
--    }
-+    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
-+        ff_v4l2_dq_all(ctx);
- 
-     for (i = 0; i < ctx->num_buffers; i++) {
-         V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 21265f1bd77b..523c53e97dc5 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -218,4 +218,6 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const
-  */
- int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
- 
-+void ff_v4l2_dq_all(V4L2Context *const ctx);
-+
- #endif // AVCODEC_V4L2_CONTEXT_H
-
-From fb8f90688761ae011e9b4cca65a51e13416a498c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 23 Sep 2022 11:38:36 +0000
-Subject: [PATCH 077/186] v4l2_m2m_enc: DQ output more frequently
-
-Ensure that we DQ any released src buffers on every op to avoid deadlock
-with source.
-
-There is a plausible argument that this patch is inelegant and the drain
-should be integrated into dq_buf, but that is a further reaching delta.
----
- libavcodec/v4l2_m2m_enc.c | 12 ++++++++++--
- 1 file changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
-index b8ba815c379d..a992a3cccc68 100644
---- a/libavcodec/v4l2_m2m_enc.c
-+++ b/libavcodec/v4l2_m2m_enc.c
-@@ -421,6 +421,8 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
-     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-     V4L2Context *const output = &s->output;
- 
-+    ff_v4l2_dq_all(output);
-+
-     // Signal EOF if needed
-     if (!frame) {
-         return ff_v4l2_context_enqueue_frame(output, frame);
-@@ -492,6 +494,8 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-     AVFrame *frame = s->frame;
-     int ret;
- 
-+    ff_v4l2_dq_all(output);
-+
-     if (s->draining)
-         goto dequeue;
- 
-@@ -528,7 +532,9 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-     }
- 
- dequeue:
--    if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0)
-+    ret = ff_v4l2_context_dequeue_packet(capture, avpkt);
-+    ff_v4l2_dq_all(output);
-+    if (ret)
-         return ret;
- 
-     if (capture->first_buf == 1) {
-@@ -560,7 +566,9 @@ dequeue:
-             s->extdata_size = len;
-         }
- 
--        if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0)
-+        ret = ff_v4l2_context_dequeue_packet(capture, avpkt);
-+        ff_v4l2_dq_all(output);
-+        if (ret)
-             return ret;
-     }
- 
-
-From c90d17e99a8d66762c890bca316b6f52da2e6278 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 26 Sep 2022 18:20:00 +0100
-Subject: [PATCH 078/186] conf_native: Remove --enable-rpi from all builds
-
----
- pi-util/conf_native.sh | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
-index 37cea71756ae..f22d531ca448 100755
---- a/pi-util/conf_native.sh
-+++ b/pi-util/conf_native.sh
-@@ -54,9 +54,9 @@ if [ $MMAL ]; then
-   RPI_LIBDIRS="-L$RPI_OPT_VC/lib"
-   RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000"
-   RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group"
--  RPIOPTS="--enable-mmal --enable-rpi"
-+  RPIOPTS="--enable-mmal"
- else
--  RPIOPTS="--disable-mmal --enable-sand"
-+  RPIOPTS="--disable-mmal"
- fi
- 
- C=`lsb_release -sc`
-@@ -89,6 +89,7 @@ $FFSRC/configure \
-  $MCOPTS\
-  --disable-stripping\
-  --disable-thumb\
-+ --enable-sand\
-  --enable-v4l2-request\
-  --enable-libdrm\
-  --enable-vout-egl\
-
-From 976ada8fe321b41bb2989b83b232b99d3c6720d1 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 29 Sep 2022 19:48:08 +0000
-Subject: [PATCH 079/186] v4l2_m2m_dec: Deal correctly with avcC H264 data in
- extradata
-
-Decoders expect AnnexB style headers, mkv and similar formats have
-somewhat oddly wrapped extradata. Convert to annex-b style before use.
----
- libavcodec/v4l2_m2m.h     |   2 +-
- libavcodec/v4l2_m2m_dec.c | 177 ++++++++++++++++++++++++++++++++++++--
- 2 files changed, 169 insertions(+), 10 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index ee72beb0522b..babf101d650a 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -118,7 +118,7 @@ typedef struct V4L2m2mContext {
-     /* Ext data sent */
-     int extdata_sent;
-     /* Ext data sent in packet - overrides ctx */
--    uint8_t * extdata_data;
-+    void * extdata_data;
-     size_t extdata_size;
- 
- #define FF_V4L2_QUIRK_REINIT_ALWAYS             1
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index bb183097f6f5..6bd9926b3f31 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -46,6 +46,71 @@
- #define STATS_LAST_COUNT_MAX 64
- #define STATS_INTERVAL_MAX (1 << 30)
- 
-+#ifndef FF_API_BUFFER_SIZE_T
-+#define FF_API_BUFFER_SIZE_T 1
-+#endif
-+
-+#define DUMP_FAILED_EXTRADATA 0
-+
-+#if DUMP_FAILED_EXTRADATA
-+static inline char hex1(unsigned int x)
++static av_cold int init(AVFilterContext *ctx)
 +{
-+    x &= 0xf;
-+    return x <= 9 ? '0' + x : 'a' + x - 10;
-+}
-+
-+static inline char * hex2(char * s, unsigned int x)
-+{
-+    *s++ = hex1(x >> 4);
-+    *s++ = hex1(x);
-+    return s;
-+}
-+
-+static inline char * hex4(char * s, unsigned int x)
-+{
-+    s = hex2(s, x >> 8);
-+    s = hex2(s, x);
-+    return s;
-+}
-+
-+static inline char * dash2(char * s)
-+{
-+    *s++ = '-';
-+    *s++ = '-';
-+    return s;
-+}
-+
-+static void
-+data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len)
-+{
-+    size_t i;
-+    s = hex4(s, offset);
-+    m += offset;
-+    for (i = 0; i != 8; ++i) {
-+        *s++ = ' ';
-+        s = len > i + offset ? hex2(s, *m++) : dash2(s);
-+    }
-+    *s++ = ' ';
-+    *s++ = ':';
-+    for (; i != 16; ++i) {
-+        *s++ = ' ';
-+        s = len > i + offset ? hex2(s, *m++) : dash2(s);
-+    }
-+    *s++ = 0;
-+}
-+
-+static void
-+log_dump(void * logctx, int lvl, const void * const data, const size_t len)
-+{
-+    size_t i;
-+    for (i = 0; i < len; i += 16) {
-+        char buf[80];
-+        data16(buf, i, data, len);
-+        av_log(logctx, lvl, "%s\n", buf);
-+    }
-+}
-+#endif
-+
- static int64_t pts_stats_guess(const pts_stats_t * const stats)
- {
-     if (stats->last_pts == AV_NOPTS_VALUE ||
-@@ -98,6 +163,98 @@ static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char
-     };
- }
- 
-+// If abdata == NULL then this just counts space required
-+// Unpacks avcC if detected
-+static int
-+h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata)
-+{
-+    const uint8_t * const xdend = extradata + extrasize;
-+    const uint8_t * p = extradata;
-+    uint8_t * d = abdata;
-+    unsigned int n;
-+    unsigned int len;
-+    const unsigned int hdrlen = 4;
-+    unsigned int need_pps = 1;
-+
-+    if (extrasize < 8)
-+        return AVERROR(EINVAL);
-+
-+    if (p[0] == 0 && p[1] == 0) {
-+        // Assume a couple of leading zeros are good enough to indicate NAL
-+        if (abdata)
-+            memcpy(d, p, extrasize);
-+        return extrasize;
-+    }
-+
-+    // avcC starts with a 1
-+    if (p[0] != 1)
-+        return AVERROR(EINVAL);
-+
-+    p += 5;
-+    n = *p++ & 0x1f;
-+
-+doxps:
-+    while (n--) {
-+        if (xdend - p < 2)
-+            return AVERROR(EINVAL);
-+        len = (p[0] << 8) | p[1];
-+        p += 2;
-+        if (xdend - p < (ptrdiff_t)len)
-+            return AVERROR(EINVAL);
-+        if (abdata) {
-+            d[0] = 0;
-+            d[1] = 0;
-+            d[2] = 0;
-+            d[3] = 1;
-+            memcpy(d + 4, p, len);
-+        }
-+        d += len + hdrlen;
-+        p += len;
-+    }
-+    if (need_pps) {
-+        need_pps = 0;
-+        if (p >= xdend)
-+            return AVERROR(EINVAL);
-+        n = *p++;
-+        goto doxps;
-+    }
-+
-+    return d - abdata;
-+}
-+
-+static int
-+copy_extradata(AVCodecContext * const avctx,
-+               const void * const src_data, const int src_len,
-+               void ** const pdst_data, size_t * const pdst_len)
-+{
-+    int len;
-+
-+    *pdst_len = 0;
-+    av_freep(pdst_data);
-+
-+    if (avctx->codec_id == AV_CODEC_ID_H264)
-+        len = h264_xd_copy(src_data, src_len, NULL);
-+    else
-+        len = src_len < 0 ? AVERROR(EINVAL) : src_len;
-+
-+    // Zero length is OK but we swant to stop - -ve is error val
-+    if (len <= 0)
-+        return len;
-+
-+    if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL)
-+        return AVERROR(ENOMEM);
-+
-+    if (avctx->codec_id == AV_CODEC_ID_H264)
-+        h264_xd_copy(src_data, src_len, *pdst_data);
-+    else
-+        memcpy(*pdst_data, src_data, len);
-+    *pdst_len = len;
++//    UnsandContext *s = ctx->priv;
 +
 +    return 0;
 +}
 +
 +
++static int filter_frame(AVFilterLink *link, AVFrame *in)
++{
++    AVFilterLink * const outlink = link->dst->outputs[0];
++    AVFrame *out = NULL;
++    int rv = 0;
 +
- static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
- {
-     int ret;
-@@ -277,13 +434,8 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-             side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
-             if (side_data) {
-                 av_log(avctx, AV_LOG_DEBUG, "New extradata\n");
--                av_freep(&s->extdata_data);
--                if ((s->extdata_data = av_malloc(side_size ? side_size : 1)) == NULL) {
--                    av_log(avctx, AV_LOG_ERROR, "Failed to alloc %zd bytes of extra data\n", side_size);
--                    return AVERROR(ENOMEM);
--                }
--                memcpy(s->extdata_data, side_data, side_size);
--                s->extdata_size = side_size;
-+                if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0)
-+                    av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret));
-                 s->extdata_sent = 0;
-             }
- 
-@@ -359,8 +511,6 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
-         ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
-     else if (s->extdata_data)
-         ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
--    else
--        ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, avctx->extradata, avctx->extradata_size);
- 
-     if (ret == AVERROR(EAGAIN)) {
-         // Out of input buffers - keep packet
-@@ -770,6 +920,15 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-         return ret;
-     }
- 
-+    if (avctx->extradata &&
-+        (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret));
-+#if DUMP_FAILED_EXTRADATA
-+        log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size);
-+#endif
-+        return ret;
++    if (outlink->format == in->format) {
++        // If nothing to do then do nothing
++        out = in;
++    }
++    else
++    {
++        if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL)
++        {
++            rv = AVERROR(ENOMEM);
++            goto fail;
++        }
++        if (av_rpi_sand_to_planar_frame(out, in) != 0)
++        {
++            rv = -1;
++            goto fail;
++        }
++
++        av_frame_free(&in);
 +    }
 +
-     if ((ret = v4l2_prepare_decoder(s)) < 0)
-         return ret;
- 
-
-From 4c7e2544e1bb6a5517ef45b9520cf1a50a2f04c3 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 30 Sep 2022 14:20:23 +0000
-Subject: [PATCH 080/186] v4l2_request_hevc: Fix up
- V4L2_CID_CODEC_STATELESS_BASE if missing
-
----
- libavcodec/hevc-ctrls-v4.h | 7 +++++++
- 1 file changed, 7 insertions(+)
-
-diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h
-index 7829d8208435..c02fdbe5a8e9 100644
---- a/libavcodec/hevc-ctrls-v4.h
-+++ b/libavcodec/hevc-ctrls-v4.h
-@@ -53,6 +53,13 @@
- #include <linux/const.h>
- #include <linux/types.h>
- 
-+#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS
-+#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000	/* Stateless codecs controls */
-+#endif
-+#ifndef V4L2_CID_CODEC_STATELESS_BASE
-+#define V4L2_CID_CODEC_STATELESS_BASE		(V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900)
++    return ff_filter_frame(outlink, out);
++
++fail:
++    av_frame_free(&out);
++    av_frame_free(&in);
++    return rv;
++}
++
++#if 0
++static void dump_fmts(const AVFilterFormats * fmts)
++{
++    int i;
++    if (fmts== NULL) {
++        printf("NULL\n");
++        return;
++    }
++    for (i = 0; i < fmts->nb_formats; ++i) {
++        printf(" %d", fmts->formats[i]);
++    }
++    printf("\n");
++}
 +#endif
 +
- #define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++static int query_formats(AVFilterContext *ctx)
++{
++//    UnsandContext *s = ctx->priv;
++    int ret;
++
++    // If we aren't connected at both ends then just do nothing
++    if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL)
++        return 0;
++
++    // Our output formats depend on our input formats and we can't/don't
++    // want to convert between bit depths so we need to wait for the source
++    // to have an opinion before we do
++    if (ctx->inputs[0]->incfg.formats == NULL)
++        return AVERROR(EAGAIN);
++
++    // Accept anything
++    if (ctx->inputs[0]->outcfg.formats == NULL &&
++        (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0)
++        return ret;
++
++    // Filter out sand formats
++
++    // Generate a container if we don't already have one
++    if (ctx->outputs[0]->incfg.formats == NULL)
++    {
++        // Somewhat rubbish way of ensuring we have a good structure
++        const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE};
++        AVFilterFormats *formats = ff_make_format_list(out_fmts);
++
++        if (formats == NULL)
++            return AVERROR(ENOMEM);
++        if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0)
++            return ret;
++    }
++
++    // Replace old format list with new filtered list derived from what our
++    // input says it can do
++    {
++        const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats;
++        AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats;
++        enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats);
++        int i;
++        int n = 0;
++        int seen_420p = 0;
++        int seen_420p10 = 0;
++
++        for (i = 0; i < src_ff->nb_formats; ++i) {
++            const enum AVPixelFormat f = src_ff->formats[i];
++
++            switch (f){
++                case AV_PIX_FMT_YUV420P:
++                case AV_PIX_FMT_SAND128:
++                case AV_PIX_FMT_RPI4_8:
++                    if (!seen_420p) {
++                        seen_420p = 1;
++                        dst_fmts[n++] = AV_PIX_FMT_YUV420P;
++                    }
++                    break;
++                case AV_PIX_FMT_SAND64_10:
++                case AV_PIX_FMT_YUV420P10:
++                case AV_PIX_FMT_RPI4_10:
++                    if (!seen_420p10) {
++                        seen_420p10 = 1;
++                        dst_fmts[n++] = AV_PIX_FMT_YUV420P10;
++                    }
++                    break;
++                default:
++                    dst_fmts[n++] = f;
++                    break;
++            }
++        }
++
++        av_freep(&dst_ff->formats);
++        dst_ff->formats = dst_fmts;
++        dst_ff->nb_formats = n;
++    }
++
++//    printf("Unsand: %s calc: ", __func__);
++//    dump_fmts(ctx->outputs[0]->incfg.formats);
++
++    return 0;
++}
++
++
++#define OFFSET(x) offsetof(UnsandContext, x)
++static const AVOption unsand_options[] = {
++    { NULL }
++};
++
++
++AVFILTER_DEFINE_CLASS(unsand);
++
++static const AVFilterPad avfilter_vf_unsand_inputs[] = {
++    {
++        .name             = "default",
++        .type             = AVMEDIA_TYPE_VIDEO,
++        .filter_frame = filter_frame,
++    },
++    { NULL }
++};
++
++static const AVFilterPad avfilter_vf_unsand_outputs[] = {
++    {
++        .name = "default",
++        .type = AVMEDIA_TYPE_VIDEO
++    },
++};
++
++AVFilter ff_vf_unsand = {
++    .name          = "unsand",
++    .description   = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"),
++
++    .init          = init,
++    .uninit        = uninit,
++
++    FILTER_QUERY_FUNC(query_formats),
++
++    .priv_size     = sizeof(UnsandContext),
++    .priv_class    = &unsand_class,
++
++    FILTER_INPUTS(avfilter_vf_unsand_inputs),
++    FILTER_OUTPUTS(avfilter_vf_unsand_outputs),
++};
++
+diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
+index 60c896e964eb..3967be997e86 100644
+--- a/libavformat/matroskaenc.c
++++ b/libavformat/matroskaenc.c
+@@ -84,6 +84,10 @@
  
- #define V4L2_CID_STATELESS_HEVC_SPS		(V4L2_CID_CODEC_STATELESS_BASE + 400)
-
-From af32492d37a1d898c05422ba405423ced65adbd7 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Sat, 1 Oct 2022 13:40:57 +0000
-Subject: [PATCH 081/186] vf_deinterlace_v4l2m2m: Fix compile on m/c without
- V4L2 SAND
-
----
- libavfilter/vf_deinterlace_v4l2m2m.c | 33 +++++++++++++++++++++++-----
- 1 file changed, 28 insertions(+), 5 deletions(-)
-
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-index c52dae1c44a8..716789f9881c 100644
---- a/libavfilter/vf_deinterlace_v4l2m2m.c
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -35,6 +35,8 @@
- #include <sys/mman.h>
- #include <unistd.h>
+ #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \
+                       ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER)
++
++/* Reserved size for H264 headers if not extant at init time */
++#define MAX_H264_HEADER_SIZE 1024
++
+ #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \
+                               !(mkv)->is_live)
+ 
+@@ -1136,8 +1140,12 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn
+     case AV_CODEC_ID_WAVPACK:
+         return put_wv_codecpriv(dyn_cp, extradata, extradata_size);
+     case AV_CODEC_ID_H264:
+-        return ff_isom_write_avcc(dyn_cp, extradata,
+-                                  extradata_size);
++        if (extradata_size)
++            return ff_isom_write_avcc(dyn_cp, extradata,
++                                      extradata_size);
++        else
++            *size_to_reserve = MAX_H264_HEADER_SIZE;
++        break;
+     case AV_CODEC_ID_HEVC:
+         return ff_isom_write_hvcc(dyn_cp, extradata,
+                                   extradata_size, 0);
+@@ -2966,8 +2974,8 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
+         }
+         break;
+ #endif
+-    // FIXME: Remove the following once libaom starts propagating proper extradata during init()
+-    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208
++    // FIXME: Remove the following once libaom starts propagating extradata during init()
++    //        See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012
+     case AV_CODEC_ID_AV1:
+         if (side_data_size && mkv->track.bc && !par->extradata_size) {
+             // If the reserved space doesn't suffice, only write
+@@ -2979,6 +2987,16 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt)
+         } else if (!par->extradata_size)
+             return AVERROR_INVALIDDATA;
+         break;
++    // H264 V4L2 has a similar issue
++    case AV_CODEC_ID_H264:
++        if (side_data_size && mkv->track.bc && !par->extradata_size) {
++            ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size,
++                                          par, mkv->track.bc, track, 0);
++            if (ret < 0)
++                return ret;
++        } else if (!par->extradata_size)
++            return AVERROR_INVALIDDATA;
++        break;
+     default:
+         if (side_data_size)
+             av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index);
+@@ -3440,9 +3458,15 @@ static int mkv_init(struct AVFormatContext *s)
+             track->reformat = mkv_reformat_wavpack;
+             break;
+         case AV_CODEC_ID_H264:
++            // Default to reformat if no extradata as the only current
++            // encoder which does this is v4l2m2m which needs reformat
++            if (par->extradata_size == 0 ||
++                (par->extradata_size > 3 &&
++                 (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)))
++                track->reformat = mkv_reformat_h2645;
++            break;
+         case AV_CODEC_ID_HEVC:
+-            if ((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 ||
+-                 par->codec_id == AV_CODEC_ID_HEVC && par->extradata_size > 6) &&
++            if (par->extradata_size > 6 &&
+                 (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))
+                 track->reformat = mkv_reformat_h2645;
+             break;
+diff --git a/libavformat/movenc.c b/libavformat/movenc.c
+index d20e45cf8107..079015da9ac4 100644
+--- a/libavformat/movenc.c
++++ b/libavformat/movenc.c
+@@ -6902,6 +6902,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt)
+     if (trk->par->codec_id == AV_CODEC_ID_MP4ALS ||
+             trk->par->codec_id == AV_CODEC_ID_AAC ||
+             trk->par->codec_id == AV_CODEC_ID_AV1 ||
++            trk->par->codec_id == AV_CODEC_ID_H264 ||
+             trk->par->codec_id == AV_CODEC_ID_FLAC) {
+         size_t side_size;
+         uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size);
+diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
+index 7b4ae37d1376..b026ed5bed4f 100644
+--- a/libavformat/rtpenc.c
++++ b/libavformat/rtpenc.c
+@@ -19,6 +19,7 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
++#include "avc.h"
+ #include "avformat.h"
+ #include "mpegts.h"
+ #include "internal.h"
+@@ -586,8 +587,25 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt)
+         ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0);
+         break;
+     case AV_CODEC_ID_H264:
++    {
++        uint8_t *side_data;
++        size_t side_data_size = 0;
++
++        side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
++                                            &side_data_size);
++
++        if (side_data_size != 0) {
++            int ps_size = side_data_size;
++            uint8_t * ps_buf = NULL;
++
++            ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size);
++            av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size);
++            ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size);
++            av_free(ps_buf);
++        }
+         ff_rtp_send_h264_hevc(s1, pkt->data, size);
+         break;
++    }
+     case AV_CODEC_ID_H261:
+         ff_rtp_send_h261(s1, pkt->data, size);
+         break;
+diff --git a/libavutil/Makefile b/libavutil/Makefile
+index 6e6fa8d800ca..8fa408850998 100644
+--- a/libavutil/Makefile
++++ b/libavutil/Makefile
+@@ -76,6 +76,7 @@ HEADERS = adler32.h                                                     \
+           rational.h                                                    \
+           replaygain.h                                                  \
+           ripemd.h                                                      \
++	  rpi_sand_fns.h                                                \
+           samplefmt.h                                                   \
+           sha.h                                                         \
+           sha512.h                                                      \
+@@ -201,6 +202,7 @@ OBJS-$(CONFIG_MACOS_KPERF)              += macos_kperf.o
+ OBJS-$(CONFIG_MEDIACODEC)               += hwcontext_mediacodec.o
+ OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
+ OBJS-$(CONFIG_QSV)                      += hwcontext_qsv.o
++OBJS-$(CONFIG_SAND)                     += rpi_sand_fns.o
+ OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
+ OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
+ OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
+@@ -222,6 +224,7 @@ SKIPHEADERS-$(CONFIG_D3D12VA)          += hwcontext_d3d12va.h
+ SKIPHEADERS-$(CONFIG_DXVA2)            += hwcontext_dxva2.h
+ SKIPHEADERS-$(CONFIG_QSV)              += hwcontext_qsv.h
+ SKIPHEADERS-$(CONFIG_OPENCL)           += hwcontext_opencl.h
++SKIPHEADERS-$(CONFIG-RPI)              += rpi_sand_fn_pw.h
+ SKIPHEADERS-$(CONFIG_VAAPI)            += hwcontext_vaapi.h
+ SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += hwcontext_videotoolbox.h
+ SKIPHEADERS-$(CONFIG_VDPAU)            += hwcontext_vdpau.h
+diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
+index eba01513379a..1b44beab3942 100644
+--- a/libavutil/aarch64/Makefile
++++ b/libavutil/aarch64/Makefile
+@@ -4,3 +4,5 @@ OBJS += aarch64/cpu.o                                                 \
+ 
+ NEON-OBJS += aarch64/float_dsp_neon.o                                 \
+              aarch64/tx_float_neon.o                                  \
++             aarch64/rpi_sand_neon.o                                  \
++
+diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
+new file mode 100644
+index 000000000000..3a6bc3de74e5
+--- /dev/null
++++ b/libavutil/aarch64/rpi_sand_neon.S
+@@ -0,0 +1,672 @@
++/*
++Copyright (c) 2021 Michael Eiler
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: Michael Eiler <eiler.mike@gmail.com>
++*/
++
++#include "asm.S"
++
++// void ff_rpi_sand8_lines_to_planar_y8(
++//   uint8_t * dest,            : x0
++//   unsigned int dst_stride,   : w1
++//   const uint8_t * src,       : x2
++//   unsigned int src_stride1,  : w3, always 128
++//   unsigned int src_stride2,  : w4
++//   unsigned int _x,           : w5
++//   unsigned int y,            : w6
++//   unsigned int _w,           : w7
++//   unsigned int h);           : [sp, #0]
++
++function ff_rpi_sand8_lines_to_planar_y8, export=1
++    // w15 contains the number of rows we need to process
++    ldr w15, [sp, #0]
++
++    // w8 will contain the number of blocks per row
++    // w8 = floor(_w/stride1)
++    // stride1 is assumed to always be 128
++    mov w8, w1
++    lsr w8, w8, #7
++
++    // in case the width of the image is not a multiple of 128, there will
++    // be an incomplete block at the end of every row
++    // w9 contains the number of pixels stored within this block
++    // w9 = _w - w8 * 128
++    lsl w9, w8, #7
++    sub w9, w7, w9
++
++    // this is the value we have to add to the src pointer after reading a complete block
++    // it will move the address to the start of the next block
++    // w10 = stride2 * stride1 - stride1 
++    mov w10, w4
++    lsl w10, w10, #7
++    sub w10, w10, #128
++
++    // w11 is the row offset, meaning the start offset of the first block of every collumn
++    // this will be increased with stride1 within every iteration of the row_loop
++    eor w11, w11, w11
++
++    // w12 = 0, processed row count
++    eor w12, w12, w12
++row_loop:
++    // start of the first block within the current row
++    // x13 = row offset + src
++    mov x13, x2
++    add x13, x13, x11
++
++    // w14 = 0, processed block count
++    eor w14, w14, w14
++
++    cmp w8, #0
++    beq no_main_y8
++
++block_loop:
++    // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128
++    // fortunately these aren't callee saved ones, meaning we don't need to backup them
++    ld1 { v0.16b,  v1.16b,  v2.16b,  v3.16b}, [x13], #64
++    ld1 { v4.16b,  v5.16b,  v6.16b,  v7.16b}, [x13], #64 
++
++    // write these registers back to the destination vector and increase the dst address by 128
++    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
++    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x0], #64
++
++    // move the source register to the beginning of the next block (x13 = src + block offset)
++    add x13, x13, x10
++    // increase the block counter
++    add w14, w14, #1
++
++    // continue with the block_loop if we haven't copied all full blocks yet
++    cmp w8, w14
++    bgt block_loop
++
++    // handle the last block at the end of each row
++    // at most 127 byte values copied from src to dst
++no_main_y8:
++    eor w5, w5, w5 // i = 0
++incomplete_block_loop_y8:
++    cmp w5, w9
++    bge incomplete_block_loop_end_y8
++
++    ldrb w6, [x13]
++    strb w6, [x0]
++    add x13, x13, #1
++    add x0, x0, #1
++
++    add w5, w5, #1
++    b incomplete_block_loop_y8
++incomplete_block_loop_end_y8:
++    
++   
++    // increase the row offset by 128 (stride1) 
++    add w11, w11, #128
++    // increment the row counter
++    add w12, w12, #1
++    
++    // process the next row if we haven't finished yet
++    cmp w15, w12
++    bgt row_loop
++
++    ret
++endfunc
++
++
++
++// void ff_rpi_sand8_lines_to_planar_c8(
++//   uint8_t * dst_u,           : x0
++//   unsigned int dst_stride_u, : w1 == width
++//   uint8_t * dst_v,           : x2
++//   unsigned int dst_stride_v, : w3 == width
++//   const uint8_t * src,       : x4
++//   unsigned int stride1,      : w5 == 128
++//   unsigned int stride2,      : w6
++//   unsigned int _x,           : w7
++//   unsigned int y,            : [sp, #0]
++//   unsigned int _w,           : [sp, #8]
++//   unsigned int h);           : [sp, #16]
++
++function ff_rpi_sand8_lines_to_planar_c8, export=1
++    // w7 = width
++    ldr w7, [sp, #8]
++
++    // w15 contains the number of rows we need to process
++    // counts down
++    ldr w15, [sp, #16]
++
++    // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6
++    mov w8, w7
++    lsr w8, w8, #6
++
++    // number of pixels in block at the end of every row
++    // w9 = _w - (w8 * 64)
++    lsl w9, w8, #6
++    sub w9, w7, w9
++
++    // Skip at the end of the line to account for stride
++    sub w12, w1, w7
++
++    // address delta to the beginning of the next block
++    // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128
++    lsl w10, w6, #7
++    sub w10, w10, #128
++
++    // w11 = row address start offset = 0
++    eor w11, w11, w11
++
++row_loop_c8:
++    // start of the first block within the current row
++    // x13 = row offset + src
++    mov x13, x4
++    add x13, x13, x11
++
++    // w14 = 0, processed block count
++    eor w14, w14, w14
++
++    cmp w8, #0
++    beq no_main_c8
++
++block_loop_c8:
++    // load the full block -> 128 bytes, the block contains 64 interleaved U and V values 
++    ld2 { v0.16b,  v1.16b }, [x13], #32
++    ld2 { v2.16b,  v3.16b }, [x13], #32
++    ld2 { v4.16b,  v5.16b }, [x13], #32
++    ld2 { v6.16b,  v7.16b }, [x13], #32
++
++    // swap register so that we can write them out with a single instruction
++    mov v16.16b, v1.16b
++    mov v17.16b, v3.16b
++    mov v18.16b, v5.16b
++    mov v1.16b, v2.16b
++    mov v2.16b, v4.16b
++    mov v3.16b, v6.16b
++    mov v4.16b, v16.16b
++    mov v5.16b, v17.16b
++    mov v6.16b, v18.16b
++
++    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
++    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x2], #64
++
++    // increment row counter and move src to the beginning of the next block
++    add w14, w14, #1
++    add x13, x13, x10
++    
++    // jump to block_loop_c8 iff the block count is smaller than the number of full blocks
++    cmp w8, w14
++    bgt block_loop_c8
++
++no_main_c8:
++    // handle incomplete block at the end of every row
++    eor w5, w5, w5 // point counter, this might be 
++incomplete_block_loop_c8:
++    cmp w5, w9
++    bge incomplete_block_loop_end_c8
++
++    ldrb w1, [x13]
++    strb w1, [x0]
++    add x13, x13, #1
++
++    ldrb w1, [x13]
++    strb w1, [x2]
++    add x13, x13, #1
++
++    add x0, x0, #1
++    add x2, x2, #1
++
++    add w5, w5, #1
++    b incomplete_block_loop_c8
++incomplete_block_loop_end_c8:
++
++    // increase row_offset by stride1
++    add w11, w11, #128
++    add x0, x0, w12, sxtw
++    add x2, x2, w12, sxtw
++
++    // jump to row_Loop_c8 iff the row count is small than the height
++    subs w15, w15, #1
++    bgt row_loop_c8
++
++    ret
++endfunc
++
++// Unzip chroma
++//
++// On entry:
++// a0 = V0, U2,  ...
++// a1 = U0, V1,  ...
++// a2 = U1, V2,  ...
++// b0 = V8, U10, ...
++// b1 = U8, V9,  ...
++// b2 = U9, V10, ...
++//
++// On exit:
++// d0 = U0, U3, ...
++// ...
++// a0 = V0, V3, ..
++// ...
++//
++// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs)
++
++.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2
++                uzp1            \d0\().8h, \a1\().8h, \b1\().8h
++                uzp1            \d1\().8h, \a2\().8h, \b2\().8h
++                uzp2            \d2\().8h, \a0\().8h, \b0\().8h
++
++                uzp1            \a0\().8h, \a0\().8h, \b0\().8h
++                uzp2            \a1\().8h, \a1\().8h, \b1\().8h
++                uzp2            \a2\().8h, \a2\().8h, \b2\().8h
++.endm
++
++// SAND30 -> 10bit
++.macro USAND10 d0, d1, d2, a0, a1
++                shrn            \d2\().4h, \a0\().4s, #14
++                shrn            \d1\().4h, \a0\().4s, #10
++
++                shrn2           \d2\().8h, \a1\().4s, #14
++                shrn2           \d1\().8h, \a1\().4s, #10
++                uzp1            \d0\().8h, \a0\().8h, \a1\().8h
++
++                ushr            \d2\().8h, \d2\().8h, #6
++                bic             \d0\().8h, #0xfc,     lsl #8
++                bic             \d1\().8h, #0xfc,     lsl #8
++.endm
++
++// SAND30 -> 8bit
++.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2
++                shrn            \d1\().4h,  \a0\().4s,  #12
++                shrn2           \d1\().8h,  \a1\().4s,  #12
++                uzp1            \d0\().8h,  \a0\().8h,  \a1\().8h
++                uzp2            \d2\().8h,  \a0\().8h,  \a1\().8h
++
++                shrn            \t1\().4h,  \a2\().4s,  #12
++                shrn2           \t1\().8h,  \a3\().4s,  #12
++                uzp1            \t0\().8h,  \a2\().8h,  \a3\().8h
++                uzp2            \t2\().8h,  \a2\().8h,  \a3\().8h
++
++                shrn            \d0\().8b,  \d0\().8h,  #2
++                shrn2           \d0\().16b, \t0\().8h,  #2
++                shrn            \d2\().8b,  \d2\().8h,  #6
++                shrn2           \d2\().16b, \t2\().8h,  #6
++                uzp1            \d1\().16b, \d1\().16b, \t1\().16b
++.endm
++
++
++// void ff_rpi_sand30_lines_to_planar_c16(
++//   uint8_t * dst_u,            // [x0]
++//   unsigned int dst_stride_u,  // [w1]
++//   uint8_t * dst_v,            // [x2]
++//   unsigned int dst_stride_v,  // [w3]
++//   const uint8_t * src,        // [x4]
++//   unsigned int stride1,       // [w5]      128
++//   unsigned int stride2,       // [w6]
++//   unsigned int _x,            // [w7]      0
++//   unsigned int y,             // [sp, #0]
++//   unsigned int _w,            // [sp, #8]  w9
++//   unsigned int h);            // [sp, #16] w10
++
++function ff_rpi_sand30_lines_to_planar_c16, export=1
++                ldr             w7,  [sp, #0]                   // y
++                ldr             w8,  [sp, #8]                   // _w
++                ldr             w10, [sp, #16]                  // h
++                lsl             w6,  w6,  #7                    // Fixup stride2
++                sub             w6,  w6,  #64
++                uxtw            x6,  w6
++                sub             w1,  w1,  w8,  LSL #1           // Fixup chroma strides
++                sub             w3,  w3,  w8,  LSL #1
++                lsl             w7,  w7,  #7                    // Add y to src
++                add             x4,  x4,  w7,  UXTW
++10:
++                mov             w13, #0
++                mov             x5,  x4
++                mov             w9,  w8
++1:
++                ld1             {v0.4s-v3.4s}, [x5], #64
++                ld1             {v4.4s-v7.4s}, [x5], x6
++                subs            w9,  w9,  #48
++
++                USAND10         v17, v16, v18, v0, v1
++                USAND10         v20, v19, v21, v2, v3
++                UZPH_C          v0, v1, v2, v16, v17, v18, v19, v20, v21
++                USAND10         v23, v22, v24, v4, v5
++                USAND10         v26, v25, v27, v6, v7
++                UZPH_C          v4, v5, v6, v22, v23, v24, v25, v26, v27
++
++                blt             2f
++
++                st3             {v0.8h-v2.8h},   [x0], #48
++                st3             {v4.8h-v6.8h},   [x0], #48
++                st3             {v16.8h-v18.8h}, [x2], #48
++                st3             {v22.8h-v24.8h}, [x2], #48
++
++                bne             1b
++11:
++                subs            w10, w10, #1
++                add             x4,  x4,  #128
++                add             x0,  x0,  w1,  UXTW
++                add             x2,  x2,  w3,  UXTW
++                bne             10b
++99:
++                ret
++
++// Partial final write
++2:
++                cmp             w9,  #24-48
++                blt             1f
++                st3             {v0.8h  - v2.8h},  [x0], #48
++                st3             {v16.8h - v18.8h}, [x2], #48
++                beq             11b
++                mov             v0.16b,  v4.16b
++                mov             v1.16b,  v5.16b
++                sub             w9,  w9,  #24
++                mov             v2.16b,  v6.16b
++                mov             v16.16b, v22.16b
++                mov             v17.16b, v23.16b
++                mov             v18.16b, v24.16b
++1:
++                cmp             w9,  #12-48
++                blt             1f
++                st3             {v0.4h  - v2.4h},  [x0], #24
++                st3             {v16.4h - v18.4h}, [x2], #24
++                beq             11b
++                mov             v0.d[0],  v0.d[1]
++                sub             w9,  w9,  #12
++                mov             v1.d[0],  v1.d[1]
++                mov             v2.d[0],  v2.d[1]
++                mov             v16.d[0], v16.d[1]
++                mov             v17.d[0], v17.d[1]
++                mov             v18.d[0], v18.d[1]
++1:
++                cmp             w9,  #6-48
++                blt             1f
++                st3             {v0.h  - v2.h}[0],  [x0], #6
++                st3             {v0.h  - v2.h}[1],  [x0], #6
++                st3             {v16.h - v18.h}[0], [x2], #6
++                st3             {v16.h - v18.h}[1], [x2], #6
++                beq             11b
++                mov             v0.s[0],  v0.s[1]
++                sub             w9,  w9,  #6
++                mov             v1.s[0],  v1.s[1]
++                mov             v2.s[0],  v2.s[1]
++                mov             v16.s[0], v16.s[1]
++                mov             v17.s[0], v17.s[1]
++                mov             v18.s[0], v18.s[1]
++1:
++                cmp             w9,  #3-48
++                blt             1f
++                st3             {v0.h  - v2.h}[0],  [x0], #6
++                st3             {v16.h - v18.h}[0], [x2], #6
++                beq             11b
++                mov             v0.h[0],  v0.h[1]
++                sub             w9,  w9,  #3
++                mov             v1.h[0],  v1.h[1]
++                mov             v16.h[0], v16.h[1]
++                mov             v17.h[0], v17.h[1]
++1:
++                cmp             w9,  #2-48
++                blt             1f
++                st2             {v0.h  - v1.h}[0],  [x0], #4
++                st2             {v16.h - v17.h}[0], [x2], #4
++                b               11b
++1:
++                st1             {v0.h}[0],  [x0], #2
++                st1             {v16.h}[0], [x2], #2
++                b               11b
++endfunc
++
++
++//void ff_rpi_sand30_lines_to_planar_p010(
++//  uint8_t * dest,
++//  unsigned int dst_stride,
++//  const uint8_t * src,
++//  unsigned int src_stride1,
++//  unsigned int src_stride2,
++//  unsigned int _x,
++//  unsigned int y,
++//  unsigned int _w,
++//  unsigned int h);
++
++// void ff_rpi_sand30_lines_to_planar_y8(
++//   uint8_t * dest,            : x0
++//   unsigned int dst_stride,   : w1
++//   const uint8_t * src,       : x2
++//   unsigned int src_stride1,  : w3, always 128
++//   unsigned int src_stride2,  : w4
++//   unsigned int _x,           : w5
++//   unsigned int y,            : w6
++//   unsigned int _w,           : w7
++//   unsigned int h);           : [sp, #0]
++//
++// Assumes that we are starting on a stripe boundary and that overreading
++// within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y16, export=1
++                lsl             w4,  w4,  #7
++                sub             w4,  w4,  #64
++                uxtw            x4,  w4
++                sub             w1,  w1,  w7, lsl #1
++                uxtw            x6,  w6
++                add             x8,  x2,  x6, lsl #7
++                ldr             w6,  [sp, #0]
++
++10:
++                mov             x2,  x8
++                mov             w5,  w7
++1:
++                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
++                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
++
++                subs            w5,  w5,  #96
++
++                USAND10         v16, v17, v18, v0, v1
++                USAND10         v19, v20, v21, v2, v3
++                USAND10         v22, v23, v24, v4, v5
++                USAND10         v25, v26, v27, v6, v7
++
++                blt             2f
++
++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
++                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
++                st3             {v22.8h, v23.8h, v24.8h}, [x0], #48
++                st3             {v25.8h, v26.8h, v27.8h}, [x0], #48
++
++                bne             1b
++
++11:
++                subs            w6,  w6,  #1
++                add             x0,  x0,  w1,  uxtw
++                add             x8,  x8,  #128
++                bne             10b
++
++                ret
++
++// Partial final write
++2:
++                cmp             w5,  #48-96
++                blt             1f
++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
++                st3             {v19.8h, v20.8h, v21.8h}, [x0], #48
++                beq             11b
++                mov             v16.16b, v22.16b
++                mov             v17.16b, v23.16b
++                sub             w5,  w5,  #48
++                mov             v18.16b, v24.16b
++                mov             v19.16b, v25.16b
++                mov             v20.16b, v26.16b
++                mov             v21.16b, v27.16b
++1:
++                cmp             w5,  #24-96
++                blt             1f
++                st3             {v16.8h, v17.8h, v18.8h}, [x0], #48
++                beq             11b
++                mov             v16.16b, v19.16b
++                mov             v17.16b, v20.16b
++                sub             w5,  w5,  #24
++                mov             v18.16b, v21.16b
++1:
++                cmp             w5,  #12-96
++                blt             1f
++                st3             {v16.4h, v17.4h, v18.4h}, [x0], #24
++                beq             11b
++                mov             v16.d[0], v16.d[1]
++                sub             w5,  w5,  #12
++                mov             v17.d[0], v17.d[1]
++                mov             v18.d[0], v18.d[1]
++1:
++                cmp             w5,  #6-96
++                blt             1f
++                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
++                st3             {v16.h, v17.h, v18.h}[1], [x0], #6
++                beq             11b
++                mov             v16.s[0], v16.s[1]
++                sub             w5,  w5,  #6
++                mov             v17.s[0], v17.s[1]
++                mov             v18.s[0], v18.s[1]
++1:
++                cmp             w5,  #3-96
++                blt             1f
++                st3             {v16.h, v17.h, v18.h}[0], [x0], #6
++                beq             11b
++                mov             v16.h[0], v16.h[1]
++                sub             w5,  w5,  #3
++                mov             v17.h[0], v17.h[1]
++1:
++                cmp             w5,  #2-96
++                blt             1f
++                st2             {v16.h, v17.h}[0], [x0], #4
++                b               11b
++1:
++                st1             {v16.h}[0], [x0], #2
++                b               11b
++
++endfunc
++
++// void ff_rpi_sand30_lines_to_planar_y8(
++//   uint8_t * dest,            : x0
++//   unsigned int dst_stride,   : w1
++//   const uint8_t * src,       : x2
++//   unsigned int src_stride1,  : w3, always 128
++//   unsigned int src_stride2,  : w4
++//   unsigned int _x,           : w5
++//   unsigned int y,            : w6
++//   unsigned int _w,           : w7
++//   unsigned int h);           : [sp, #0]
++//
++// Assumes that we are starting on a stripe boundary and that overreading
++// within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y8, export=1
++                lsl             w4,  w4,  #7
++                sub             w4,  w4,  #64
++                uxtw            x4,  w4
++                sub             w1,  w1,  w7
++                uxtw            x6,  w6
++                add             x8,  x2,  x6, lsl #7
++                ldr             w6,  [sp, #0]
++
++10:
++                mov             x2,  x8
++                mov             w5,  w7
++1:
++                ld1             {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64
++                ld1             {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4
++
++                subs            w5,  w5,  #96
++
++                // v0, v1
++                USAND8          v16, v17, v18, v0, v1, v2, v3, v22, v23, v24
++                USAND8          v19, v20, v21, v4, v5, v6, v7, v22, v23, v24
++
++                blt             2f
++
++                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
++                st3             {v19.16b, v20.16b, v21.16b}, [x0], #48
++
++                bne             1b
++
++11:
++                subs            w6,  w6,  #1
++                add             x0,  x0,  w1,  uxtw
++                add             x8,  x8,  #128
++                bne             10b
++
++                ret
++
++// Partial final write
++2:
++                cmp             w5,  #48-96
++                blt             1f
++                st3             {v16.16b, v17.16b, v18.16b}, [x0], #48
++                beq             11b
++                mov             v16.16b, v22.16b
++                mov             v17.16b, v23.16b
++                sub             w5,  w5,  #48
++                mov             v18.16b, v24.16b
++1:
++                cmp             w5,  #24-96
++                blt             1f
++                st3             {v16.8b, v17.8b, v18.8b}, [x0], #24
++                beq             11b
++                mov             v16.d[0], v16.d[1]
++                sub             w5,  w5,  #24
++                mov             v17.d[0], v17.d[1]
++                mov             v18.d[0], v18.d[1]
++1:
++                cmp             w5,  #12-96
++                blt             1f
++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
++                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
++                st3             {v16.b, v17.b, v18.b}[2], [x0], #3
++                st3             {v16.b, v17.b, v18.b}[3], [x0], #3
++                beq             11b
++                mov             v16.s[0], v16.s[1]
++                sub             w5,  w5,  #12
++                mov             v17.s[0], v17.s[1]
++                mov             v18.s[0], v18.s[1]
++1:
++                cmp             w5,  #6-96
++                blt             1f
++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
++                st3             {v16.b, v17.b, v18.b}[1], [x0], #3
++                beq             11b
++                mov             v16.h[0], v16.h[1]
++                sub             w5,  w5,  #6
++                mov             v17.h[0], v17.h[1]
++                mov             v18.h[0], v18.h[1]
++1:
++                cmp             w5,  #3-96
++                blt             1f
++                st3             {v16.b, v17.b, v18.b}[0], [x0], #3
++                beq             11b
++                mov             v16.b[0], v16.b[1]
++                sub             w5,  w5,  #3
++                mov             v17.b[0], v17.b[1]
++1:
++                cmp             w5,  #2-96
++                blt             1f
++                st2             {v16.b, v17.b}[0], [x0], #2
++                b               11b
++1:
++                st1             {v16.b}[0], [x0], #1
++                b               11b
++
++endfunc
++
+diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h
+new file mode 100644
+index 000000000000..e0e94f6aa1a2
+--- /dev/null
++++ b/libavutil/aarch64/rpi_sand_neon.h
+@@ -0,0 +1,61 @@
++/*
++Copyright (c) 2021 Michael Eiler
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: Michael Eiler <eiler.mike@gmail.com>
++*/
++
++#ifndef AVUTIL_AARCH64_RPI_SAND_NEON_H
++#define AVUTIL_AARCH64_RPI_SAND_NEON_H
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u,
++  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src,
++  unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y,
++  unsigned int _w, unsigned int h);
++
++void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride,
++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u,
++  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
++  unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* AVCODEC_SAND_NEON_H */
+diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile
+index 5da44b05427a..b74b7c4e2f25 100644
+--- a/libavutil/arm/Makefile
++++ b/libavutil/arm/Makefile
+@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o                                    \
+ 
+ NEON-OBJS += arm/float_dsp_init_neon.o                                  \
+              arm/float_dsp_neon.o                                       \
++             arm/rpi_sand_neon.o                                        \
+diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S
+new file mode 100644
+index 000000000000..60e697f6819b
+--- /dev/null
++++ b/libavutil/arm/rpi_sand_neon.S
+@@ -0,0 +1,925 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#include "libavutil/arm/asm.S"
++
++
++@ General notes:
++@ Having done some timing on this in sand8->y8 (Pi4)
++@  vst1 (680fps) is a bit faster than vstm (660fps)
++@  vldm (680fps) is noticably faster than vld1 (480fps)
++@  (or it might be that a mix is what is required)
++@
++@ At least on a Pi4 it is no more expensive to have a single auto-inc register
++@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted
++@ the latter was better)
++@
++@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless
++@ the memory is uncached.
++@ As these are Sand -> planar we can assume that src is going to be aligned but
++@ it is possible that dest isn't (converting to .yuv or other packed format).
++@ Luckily vst1 is faster than vstm :-) so all is well
++@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4
++@ .8 stores would let us do non-word aligned stores into uncached but it
++@ probably isn't worth it.
++
++
++
++
++@ void ff_rpi_sand128b_stripe_to_8_10(
++@   uint8_t * dest,             // [r0]
++@   const uint8_t * src1,       // [r1]
++@   const uint8_t * src2,       // [r2]
++@   unsigned int lines);        // [r3]
++
++.macro  stripe2_to_8, bit_depth
++        vpush    {q4-q7}
++1:
++        vldm     r1!, {q0-q7}
++        subs     r3, #1
++        vldm     r2!, {q8-q15}
++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
++        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
++        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
++        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
++        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
++        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
++        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
++        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
++        vqrshrn.u16 d8,  q8,  #\bit_depth - 8
++        vqrshrn.u16 d9,  q9,  #\bit_depth - 8
++        vqrshrn.u16 d10, q10, #\bit_depth - 8
++        vqrshrn.u16 d11, q11, #\bit_depth - 8
++        vqrshrn.u16 d12, q12, #\bit_depth - 8
++        vqrshrn.u16 d13, q13, #\bit_depth - 8
++        vqrshrn.u16 d14, q14, #\bit_depth - 8
++        vqrshrn.u16 d15, q15, #\bit_depth - 8
++        vstm     r0!, {q0-q7}
++        bne      1b
++        vpop     {q4-q7}
++        bx       lr
++.endm
++
++function ff_rpi_sand128b_stripe_to_8_10, export=1
++        stripe2_to_8     10
++endfunc
++
++@ void ff_rpi_sand8_lines_to_planar_y8(
++@   uint8_t * dest,             // [r0]
++@   unsigned int dst_stride,    // [r1]
++@   const uint8_t * src,        // [r2]
++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++@   unsigned int src_stride2,   // [sp, #0]  -> r3
++@   unsigned int _x,            // [sp, #4]  Ignored - 0
++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++@   unsigned int h);            // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand8_lines_to_planar_y8, export=1
++                push            {r4-r8, lr}     @ +24            L
++                ldr             r3,  [sp, #24]
++                ldr             r6,  [sp, #36]
++                ldr             r7,  [sp, #32]  @ y
++                lsl             r3,  #7
++                sub             r1,  r6
++                add             r8,  r2,  r7,  lsl #7
++                ldr             r7,  [sp, #40]
++
++10:
++                mov             r2,  r8
++                add             r4,  r0,  #24
++                mov             r5,  r6
++                mov             lr,  #0
++1:
++                vldm            r2,  {q8-q15}
++                add             r2,  r3
++                subs            r5,  #128
++                blt             2f
++                vst1.8          {d16, d17, d18, d19}, [r0]!
++                vst1.8          {d20, d21, d22, d23}, [r0]!
++                vst1.8          {d24, d25, d26, d27}, [r0]!
++                vst1.8          {d28, d29, d30, d31}, [r0]!
++                bne             1b
++11:
++                subs            r7,  #1
++                add             r0,  r1
++                add             r8,  #128
++                bne             10b
++
++                pop             {r4-r8, pc}
++
++@ Partial final write
++2:
++                cmp             r5,  #64-128
++                blt             1f
++                vst1.8          {d16, d17, d18, d19}, [r0]!
++                vst1.8          {d20, d21, d22, d23}, [r0]!
++                beq             11b
++                vmov            q8,  q12
++                vmov            q9,  q13
++                sub             r5,  #64
++                vmov            q10, q14
++                vmov            q11, q15
++1:
++                cmp             r5,  #32-128
++                blt             1f
++                vst1.8          {d16, d17, d18, d19}, [r0]!
++                beq             11b
++                vmov            q8,  q10
++                sub             r5,  #32
++                vmov            q9,  q11
++1:
++                cmp             r5,  #16-128
++                blt             1f
++                vst1.8          {d16, d17}, [r0]!
++                beq             11b
++                sub             r5,  #16
++                vmov            q8,  q9
++1:
++                cmp             r5,  #8-128
++                blt             1f
++                vst1.8          {d16}, [r0]!
++                beq             11b
++                sub             r5,  #8
++                vmov            d16, d17
++1:
++                cmp             r5,  #4-128
++                blt             1f
++                vst1.32         {d16[0]}, [r0]!
++                beq             11b
++                sub             r5,  #4
++                vshr.u64        d16, #32
++1:
++                cmp             r5,  #2-128
++                blt             1f
++                vst1.16         {d16[0]}, [r0]!
++                beq             11b
++                vst1.8          {d16[2]}, [r0]!
++                b               11b
++1:
++                vst1.8          {d16[0]}, [r0]!
++                b               11b
++endfunc
++
++@ void ff_rpi_sand8_lines_to_planar_c8(
++@   uint8_t * dst_u,            // [r0]
++@   unsigned int dst_stride_u,  // [r1]
++@   uint8_t * dst_v,            // [r2]
++@   unsigned int dst_stride_v,  // [r3]
++@   const uint8_t * src,        // [sp, #0]  -> r4, r5
++@   unsigned int stride1,       // [sp, #4]  128
++@   unsigned int stride2,       // [sp, #8]  -> r8
++@   unsigned int _x,            // [sp, #12] 0
++@   unsigned int y,             // [sp, #16] (r7 in prefix)
++@   unsigned int _w,            // [sp, #20] -> r12, r6
++@   unsigned int h);            // [sp, #24] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand8_lines_to_planar_c8, export=1
++                push            {r4-r8, lr}     @ +24
++
++                ldr             r5,  [sp, #24]
++                ldr             r8,  [sp, #32]
++                ldr             r7,  [sp, #40]
++                ldr             r6,  [sp, #44]
++                lsl             r8,  #7
++                add             r5,  r5,  r7,  lsl #7
++                sub             r1,  r1,  r6
++                sub             r3,  r3,  r6
++                ldr             r7,  [sp, #48]
++                vpush           {q4-q7}
++
++10:
++                mov             r4,  r5
++                mov             r12, r6
++1:
++                subs            r12, #64
++                vldm            r4,  {q0-q7}
++                add             r4,  r8
++                it              gt
++                vldmgt          r4,  {q8-q15}
++                add             r4,  r8
++
++                vuzp.8          q0,  q1
++                vuzp.8          q2,  q3
++                vuzp.8          q4,  q5
++                vuzp.8          q6,  q7
++
++                vuzp.8          q8,  q9
++                vuzp.8          q10, q11
++                vuzp.8          q12, q13
++                vuzp.8          q14, q15
++                subs            r12, #64
++
++                @ Rearrange regs so we can use vst1 with 4 regs
++                vswp            q1,  q2
++                vswp            q5,  q6
++                vswp            q9,  q10
++                vswp            q13, q14
++                blt             2f
++
++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
++                vst1.8          {d8,  d9,  d10, d11}, [r0]!
++                vst1.8          {d16, d17, d18, d19}, [r0]!
++                vst1.8          {d24, d25, d26, d27}, [r0]!
++
++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
++                vst1.8          {d12, d13, d14, d15}, [r2]!
++                vst1.8          {d20, d21, d22, d23}, [r2]!
++                vst1.8          {d28, d29, d30, d31}, [r2]!
++                bne             1b
++11:
++                subs            r7,  #1
++                add             r5,  #128
++                add             r0,  r1
++                add             r2,  r3
++                bne             10b
++                vpop            {q4-q7}
++                pop             {r4-r8,pc}
++
++2:
++                cmp             r12, #64-128
++                blt             1f
++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
++                vst1.8          {d8,  d9,  d10, d11}, [r0]!
++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
++                vst1.8          {d12, d13, d14, d15}, [r2]!
++                beq             11b
++                sub             r12, #64
++                vmov            q0,  q8
++                vmov            q1,  q9
++                vmov            q2,  q10
++                vmov            q3,  q11
++                vmov            q4,  q12
++                vmov            q5,  q13
++                vmov            q6,  q14
++                vmov            q7,  q15
++1:
++                cmp             r12, #32-128
++                blt             1f
++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
++                beq             11b
++                sub             r12, #32
++                vmov            q0,  q4
++                vmov            q1,  q5
++                vmov            q2,  q6
++                vmov            q3,  q7
++1:
++                cmp             r12, #16-128
++                blt             1f
++                vst1.8          {d0,  d1 }, [r0]!
++                vst1.8          {d4,  d5 }, [r2]!
++                beq             11b
++                sub             r12, #16
++                vmov            q0,  q1
++                vmov            q2,  q3
++1:
++                cmp             r12, #8-128
++                blt             1f
++                vst1.8          {d0}, [r0]!
++                vst1.8          {d4}, [r2]!
++                beq             11b
++                sub             r12, #8
++                vmov            d0,  d1
++                vmov            d4,  d5
++1:
++                cmp             r12, #4-128
++                blt             1f
++                vst1.32         {d0[0]}, [r0]!
++                vst1.32         {d4[0]}, [r2]!
++                beq             11b
++                sub             r12, #4
++                vmov            s0,  s1
++                vmov            s8,  s9
++1:
++                cmp             r12, #2-128
++                blt             1f
++                vst1.16         {d0[0]}, [r0]!
++                vst1.16         {d4[0]}, [r2]!
++                beq             11b
++                vst1.8          {d0[2]}, [r0]!
++                vst1.8          {d4[2]}, [r2]!
++                b               11b
++1:
++                vst1.8          {d0[0]}, [r0]!
++                vst1.8          {d4[0]}, [r2]!
++                b               11b
++endfunc
++
++
++
++@ void ff_rpi_sand30_lines_to_planar_y16(
++@   uint8_t * dest,             // [r0]
++@   unsigned int dst_stride,    // [r1]
++@   const uint8_t * src,        // [r2]
++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++@   unsigned int src_stride2,   // [sp, #0]  -> r3
++@   unsigned int _x,            // [sp, #4]  Ignored - 0
++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++@   unsigned int h);            // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_y16, export=1
++                push            {r4-r8, lr}     @ +24
++                ldr             r3,  [sp, #24]
++                ldr             r6,  [sp, #36]
++                ldr             r7,  [sp, #32]  @ y
++                mov             r12, #48
++                sub             r3,  #1
++                lsl             r3,  #7
++                sub             r1,  r1,  r6,  lsl #1
++                add             r8,  r2,  r7,  lsl #7
++                ldr             r7,  [sp, #40]
++
++10:
++                mov             r2,  r8
++                add             r4,  r0,  #24
++                mov             r5,  r6
++                mov             lr,  #0
++1:
++                vldm            r2!, {q10-q13}
++                add             lr,  #64
++
++                vshrn.u32       d4 , q10, #14    @ Cannot vshrn.u32 #20!
++                ands            lr,  #127
++                vshrn.u32       d2,  q10, #10
++                vmovn.u32       d0,  q10
++
++                vshrn.u32       d5,  q11, #14
++                it              eq
++                addeq           r2,  r3
++                vshrn.u32       d3,  q11, #10
++                vmovn.u32       d1,  q11
++
++                subs            r5,  #48
++                vshr.u16        q2,  #6
++                vbic.u16        q0,  #0xfc00
++                vbic.u16        q1,  #0xfc00
++
++                vshrn.u32       d20, q12, #14
++                vshrn.u32       d18, q12, #10
++                vmovn.u32       d16, q12
++
++                vshrn.u32       d21, q13, #14
++                vshrn.u32       d19, q13, #10
++                vmovn.u32       d17, q13
++
++                vshr.u16        q10, #6
++                vbic.u16        q8,  #0xfc00
++                vbic.u16        q9 , #0xfc00
++                blt             2f
++
++                vst3.16         {d0,  d2,  d4},  [r0], r12
++                vst3.16         {d1,  d3,  d5},  [r4], r12
++                vst3.16         {d16, d18, d20}, [r0], r12
++                vst3.16         {d17, d19, d21}, [r4], r12
++
++                bne             1b
++
++11:
++                subs            r7,  #1
++                add             r0,  r1
++                add             r8,  #128
++                bne             10b
++
++                pop             {r4-r8, pc}
++
++@ Partial final write
++2:
++                cmp             r5,  #24-48
++                blt             1f
++                vst3.16         {d0,  d2,  d4},  [r0], r12
++                vst3.16         {d1,  d3,  d5},  [r4]
++                beq             11b
++                vmov            q0,  q8
++                sub             r5,  #24
++                vmov            q1,  q9
++                vmov            q2,  q10
++1:
++                cmp             r5,  #12-48
++                blt             1f
++                vst3.16         {d0,  d2,  d4},  [r0]!
++                beq             11b
++                vmov            d0, d1
++                sub             r5, #12
++                vmov            d2, d3
++                vmov            d4, d5
++1:
++                cmp             r5,  #6-48
++                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
++                blt             1f
++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
++                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
++                add             r0,  #12
++                beq             11b
++                vmov            s0,  s1
++                sub             r5,  #6
++                vmov            s4,  s5
++                vmov            s8,  s9
++1:
++                cmp             r5, #3-48
++                blt             1f
++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
++                beq             11b
++                sub             r5, #3
++                vshr.u32        d0, #16
++                vshr.u32        d2, #16
++1:
++                cmp             r5, #2-48
++                blt             1f
++                vst2.16         {d0[0], d2[0]}, [r0]!
++                b               11b
++1:
++                vst1.16         {d0[0]}, [r0]!
++                b               11b
++
++endfunc
++
++
++@ void ff_rpi_sand30_lines_to_planar_c16(
++@   uint8_t * dst_u,            // [r0]
++@   unsigned int dst_stride_u,  // [r1]
++@   uint8_t * dst_v,            // [r2]
++@   unsigned int dst_stride_v,  // [r3]
++@   const uint8_t * src,        // [sp, #0]  -> r4, r5
++@   unsigned int stride1,       // [sp, #4]  128
++@   unsigned int stride2,       // [sp, #8]  -> r8
++@   unsigned int _x,            // [sp, #12] 0
++@   unsigned int y,             // [sp, #16] (r7 in prefix)
++@   unsigned int _w,            // [sp, #20] -> r6, r9
++@   unsigned int h);            // [sp, #24] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_c16, export=1
++                push            {r4-r10, lr}    @ +32
++                ldr             r5,  [sp, #32]
++                ldr             r8,  [sp, #40]
++                ldr             r7,  [sp, #48]
++                ldr             r9,  [sp, #52]
++                mov             r12, #48
++                sub             r8,  #1
++                lsl             r8,  #7
++                add             r5,  r5,  r7,  lsl #7
++                sub             r1,  r1,  r9,  lsl #1
++                sub             r3,  r3,  r9,  lsl #1
++                ldr             r7,  [sp, #56]
++10:
++                mov             lr,  #0
++                mov             r4,  r5
++                mov             r6,  r9
++1:
++                vldm            r4!, {q0-q3}
++                add             lr,  #64
++
++                @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
++                vshrn.u32       d20, q0,  #14
++                vmovn.u32       d18, q0
++                vshrn.u32       d0,  q0,  #10
++                ands            lr,  #127
++
++                vshrn.u32       d21, q1,  #14
++                vmovn.u32       d19, q1
++                vshrn.u32       d1,  q1,  #10
++
++                vshrn.u32       d22, q2,  #10
++                vmovn.u32       d2,  q2
++                vshrn.u32       d4,  q2,  #14
++
++                add             r10, r0,  #24
++                vshrn.u32       d23, q3,  #10
++                vmovn.u32       d3,  q3
++                vshrn.u32       d5,  q3,  #14
++
++                it              eq
++                addeq           r4,  r8
++                vuzp.16         q0,  q11
++                vuzp.16         q9,  q1
++                vuzp.16         q10, q2
++
++                @ q0   V0, V3,..
++                @ q9   U0, U3...
++                @ q10  U1, U4...
++                @ q11  U2, U5,..
++                @ q1   V1, V4,
++                @ q2   V2, V5,..
++
++                subs            r6,  #24
++                vbic.u16        q11, #0xfc00
++                vbic.u16        q9,  #0xfc00
++                vshr.u16        q10, #6
++                vshr.u16        q2,  #6
++                vbic.u16        q0,  #0xfc00
++                vbic.u16        q1,  #0xfc00
++
++                blt             2f
++
++                vst3.16         {d18, d20, d22}, [r0],  r12
++                vst3.16         {d19, d21, d23}, [r10]
++                add             r10, r2,  #24
++                vst3.16         {d0,  d2,  d4},  [r2],  r12
++                vst3.16         {d1,  d3,  d5},  [r10]
++
++                bne             1b
++
++11:
++                subs            r7,  #1
++                add             r5,  #128
++                add             r0,  r1
++                add             r2,  r3
++                bne             10b
++
++                pop             {r4-r10, pc}
++
++@ Partial final write
++2:
++                cmp             r6,  #-12
++                blt             1f
++                vst3.16         {d18, d20, d22}, [r0]!
++                vst3.16         {d0,  d2,  d4},  [r2]!
++                beq             11b
++                vmov            d18, d19
++                vmov            d20, d21
++                vmov            d22, d23
++                sub             r6,  #12
++                vmov            d0,  d1
++                vmov            d2,  d3
++                vmov            d4,  d5
++1:
++                cmp             r6,  #-18
++                @ Rezip here as it makes the remaining tail handling easier
++                vzip.16         d0,  d18
++                vzip.16         d2,  d20
++                vzip.16         d4,  d22
++                blt             1f
++                vst3.16         {d0[1],  d2[1],  d4[1]},  [r0]!
++                vst3.16         {d0[0],  d2[0],  d4[0]},  [r2]!
++                vst3.16         {d0[3],  d2[3],  d4[3]},  [r0]!
++                vst3.16         {d0[2],  d2[2],  d4[2]},  [r2]!
++                beq             11b
++                vmov            d0,  d18
++                vmov            d2,  d20
++                sub             r6,  #6
++                vmov            d4,  d22
++1:
++                cmp             r6,  #-21
++                blt             1f
++                vst3.16         {d0[1], d2[1], d4[1]}, [r0]!
++                vst3.16         {d0[0], d2[0], d4[0]}, [r2]!
++                beq             11b
++                vmov            s4,  s5
++                sub             r6,  #3
++                vmov            s0,  s1
++1:
++                cmp             r6,  #-22
++                blt             1f
++                vst2.16         {d0[1], d2[1]}, [r0]!
++                vst2.16         {d0[0], d2[0]}, [r2]!
++                b               11b
++1:
++                vst1.16         {d0[1]}, [r0]!
++                vst1.16         {d0[0]}, [r2]!
++                b               11b
++
++endfunc
++
++@ void ff_rpi_sand30_lines_to_planar_p010(
++@   uint8_t * dest,             // [r0]
++@   unsigned int dst_stride,    // [r1]
++@   const uint8_t * src,        // [r2]
++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++@   unsigned int src_stride2,   // [sp, #0]  -> r3
++@   unsigned int _x,            // [sp, #4]  Ignored - 0
++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++@   unsigned int h);            // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_p010, export=1
++                push            {r4-r8, lr}     @ +24
++                ldr             r3,  [sp, #24]
++                ldr             r6,  [sp, #36]
++                ldr             r7,  [sp, #32]  @ y
++                mov             r12, #48
++                vmov.u16        q15, #0xffc0
++                sub             r3,  #1
++                lsl             r3,  #7
++                sub             r1,  r1,  r6,  lsl #1
++                add             r8,  r2,  r7,  lsl #7
++                ldr             r7,  [sp, #40]
++
++10:
++                mov             r2,  r8
++                add             r4,  r0,  #24
++                mov             r5,  r6
++                mov             lr,  #0
++1:
++                vldm            r2!, {q10-q13}
++                add             lr,  #64
++
++                vshl.u32        q14, q10, #6
++                ands            lr,  #127
++                vshrn.u32       d4,  q10, #14
++                vshrn.u32       d2,  q10, #4
++                vmovn.u32       d0,  q14
++
++                vshl.u32        q14, q11, #6
++                it              eq
++                addeq           r2,  r3
++                vshrn.u32       d5,  q11, #14
++                vshrn.u32       d3,  q11, #4
++                vmovn.u32       d1,  q14
++
++                subs            r5,  #48
++                vand            q2,  q15
++                vand            q1,  q15
++                vand            q0,  q15
++
++                vshl.u32        q14, q12, #6
++                vshrn.u32       d20, q12, #14
++                vshrn.u32       d18, q12, #4
++                vmovn.u32       d16, q14
++
++                vshl.u32        q14, q13, #6
++                vshrn.u32       d21, q13, #14
++                vshrn.u32       d19, q13, #4
++                vmovn.u32       d17, q14
++
++                vand            q10, q15
++                vand            q9,  q15
++                vand            q8,  q15
++                blt             2f
++
++                vst3.16         {d0,  d2,  d4},  [r0], r12
++                vst3.16         {d1,  d3,  d5},  [r4], r12
++                vst3.16         {d16, d18, d20}, [r0], r12
++                vst3.16         {d17, d19, d21}, [r4], r12
++
++                bne             1b
++
++11:
++                subs            r7,  #1
++                add             r0,  r1
++                add             r8,  #128
++                bne             10b
++
++                pop             {r4-r8, pc}
++
++@ Partial final write
++2:
++                cmp             r5,  #24-48
++                blt             1f
++                vst3.16         {d0,  d2,  d4},  [r0], r12
++                vst3.16         {d1,  d3,  d5},  [r4]
++                beq             11b
++                vmov            q0,  q8
++                sub             r5,  #24
++                vmov            q1,  q9
++                vmov            q2,  q10
++1:
++                cmp             r5,  #12-48
++                blt             1f
++                vst3.16         {d0,  d2,  d4},  [r0]!
++                beq             11b
++                vmov            d0, d1
++                sub             r5, #12
++                vmov            d2, d3
++                vmov            d4, d5
++1:
++                cmp             r5,  #6-48
++                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
++                blt             1f
++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
++                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
++                add             r0,  #12
++                beq             11b
++                vmov            s0,  s1
++                sub             r5,  #6
++                vmov            s4,  s5
++                vmov            s8,  s9
++1:
++                cmp             r5, #3-48
++                blt             1f
++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
++                beq             11b
++                sub             r5, #3
++                vshr.u32        d0, #16
++                vshr.u32        d2, #16
++1:
++                cmp             r5, #2-48
++                blt             1f
++                vst2.16         {d0[0], d2[0]}, [r0]!
++                b               11b
++1:
++                vst1.16         {d0[0]}, [r0]!
++                b               11b
++
++endfunc
++
++
++@ void ff_rpi_sand30_lines_to_planar_y8(
++@   uint8_t * dest,             // [r0]
++@   unsigned int dst_stride,    // [r1]
++@   const uint8_t * src,        // [r2]
++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++@   unsigned int src_stride2,   // [sp, #0]  -> r3
++@   unsigned int _x,            // [sp, #4]  Ignored - 0
++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++@   unsigned int h);            // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for wri
++
++function ff_rpi_sand30_lines_to_planar_y8, export=1
++                push            {r4-r8, lr}     @ +24
++                ldr             r3,  [sp, #24]
++                ldr             r6,  [sp, #36]
++                ldr             r7,  [sp, #32]  @ y
++                mov             r12, #48
++                lsl             r3,  #7
++                sub             r1,  r1,  r6
++                add             r8,  r2,  r7,  lsl #7
++                ldr             r7,  [sp, #40]
++
++10:
++                mov             r2,  r8
++                add             r4,  r0,  #24
++                mov             r5,  r6
++1:
++                vldm            r2,  {q8-q15}
++
++                subs            r5,  #96
++
++                vmovn.u32       d0,  q8
++                vshrn.u32       d2,  q8,  #12
++                vshrn.u32       d4,  q8,  #16    @ Cannot vshrn.u32 #20!
++
++                add             r2,  r3
++
++                vmovn.u32       d1,  q9
++                vshrn.u32       d3,  q9,  #12
++                vshrn.u32       d5,  q9,  #16
++
++                pld             [r2, #0]
++
++                vshrn.u16       d0,  q0,  #2
++                vmovn.u16       d1,  q1
++                vshrn.u16       d2,  q2,  #6
++
++                vmovn.u32       d16, q10
++                vshrn.u32       d18, q10, #12
++                vshrn.u32       d20, q10, #16
++
++                vmovn.u32       d17, q11
++                vshrn.u32       d19, q11, #12
++                vshrn.u32       d21, q11, #16
++
++                pld             [r2, #64]
++
++                vshrn.u16       d4,  q8,  #2
++                vmovn.u16       d5,  q9
++                vshrn.u16       d6,  q10, #6
++
++                vmovn.u32       d16, q12
++                vshrn.u32       d18, q12, #12
++                vshrn.u32       d20, q12, #16
++
++                vmovn.u32       d17, q13
++                vshrn.u32       d19, q13, #12
++                vshrn.u32       d21, q13, #16
++
++                vshrn.u16       d16, q8,  #2
++                vmovn.u16       d17, q9
++                vshrn.u16       d18, q10, #6
++
++                vmovn.u32       d20, q14
++                vshrn.u32       d22, q14, #12
++                vshrn.u32       d24, q14, #16
++
++                vmovn.u32       d21, q15
++                vshrn.u32       d23, q15, #12
++                vshrn.u32       d25, q15, #16
++
++                vshrn.u16       d20, q10, #2
++                vmovn.u16       d21, q11
++                vshrn.u16       d22, q12, #6
++
++                blt             2f
++
++                vst3.8          {d0,  d1,  d2},  [r0], r12
++                vst3.8          {d4,  d5,  d6},  [r4], r12
++                vst3.8          {d16, d17, d18}, [r0], r12
++                vst3.8          {d20, d21, d22}, [r4], r12
++
++                bne             1b
++
++11:
++                subs            r7,  #1
++                add             r0,  r1
++                add             r8,  #128
++                bne             10b
++
++                pop             {r4-r8, pc}
++
++@ Partial final write
++2:
++                cmp             r5,  #48-96
++                blt             1f
++                vst3.8          {d0,  d1,  d2},  [r0], r12
++                vst3.8          {d4,  d5,  d6},  [r4], r12
++                beq             11b
++                vmov            q0,  q8
++                vmov            q2,  q10
++                sub             r5,  #48
++                vmov            d2,  d18
++                vmov            d6,  d22
++1:
++                cmp             r5,  #24-96
++                blt             1f
++                vst3.8          {d0,  d1,  d2},  [r0]!
++                beq             11b
++                vmov            q0,  q2
++                sub             r5,  #24
++                vmov            d2,  d6
++1:
++                cmp             r5,  #12-96
++                blt             1f
++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
++                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
++                vst3.8          {d0[2], d1[2], d2[2]}, [r0]!
++                vst3.8          {d0[3], d1[3], d2[3]}, [r0]!
++                beq             11b
++                vmov            s0,  s1
++                sub             r5,  #12
++                vmov            s2,  s3
++                vmov            s4,  s5
++1:
++                cmp             r5,  #6-96
++                blt             1f
++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
++                vst3.8          {d0[1], d1[1], d2[1]}, [r0]!
++                add             r0,  #12
++                beq             11b
++                vshr.u32        d0,  #16
++                sub             r5,  #6
++                vshr.u32        d1,  #16
++                vshr.u32        d2,  #16
++1:
++                cmp             r5, #3-96
++                blt             1f
++                vst3.8          {d0[0], d1[0], d2[0]}, [r0]!
++                beq             11b
++                sub             r5, #3
++                vshr.u32        d0, #8
++                vshr.u32        d1, #8
++1:
++                cmp             r5, #2-96
++                blt             1f
++                vst2.8          {d0[0], d1[0]}, [r0]!
++                b               11b
++1:
++                vst1.8          {d0[0]}, [r0]!
++                b               11b
++
++endfunc
++
++
+diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h
+new file mode 100644
+index 000000000000..d8126676ee0c
+--- /dev/null
++++ b/libavutil/arm/rpi_sand_neon.h
+@@ -0,0 +1,110 @@
++/*
++Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#ifndef AVUTIL_ARM_RPI_SAND_NEON_H
++#define AVUTIL_ARM_RPI_SAND_NEON_H
++
++void ff_rpi_sand128b_stripe_to_8_10(
++  uint8_t * dest,             // [r0]
++  const uint8_t * src1,       // [r1]
++  const uint8_t * src2,       // [r2]
++  unsigned int lines);        // [r3]
++
++void ff_rpi_sand8_lines_to_planar_y8(
++  uint8_t * dest,             // [r0]
++  unsigned int dst_stride,    // [r1]
++  const uint8_t * src,        // [r2]
++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++  unsigned int src_stride2,   // [sp, #0]  -> r3
++  unsigned int _x,            // [sp, #4]  Ignored - 0
++  unsigned int y,             // [sp, #8]  (r7 in prefix)
++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++  unsigned int h);            // [sp, #16] -> r7
++
++void ff_rpi_sand8_lines_to_planar_c8(
++  uint8_t * dst_u,            // [r0]
++  unsigned int dst_stride_u,  // [r1]
++  uint8_t * dst_v,            // [r2]
++  unsigned int dst_stride_v,  // [r3]
++  const uint8_t * src,        // [sp, #0]  -> r4, r5
++  unsigned int stride1,       // [sp, #4]  128
++  unsigned int stride2,       // [sp, #8]  -> r8
++  unsigned int _x,            // [sp, #12] 0
++  unsigned int y,             // [sp, #16] (r7 in prefix)
++  unsigned int _w,            // [sp, #20] -> r12, r6
++  unsigned int h);            // [sp, #24] -> r7
++
++void ff_rpi_sand30_lines_to_planar_y16(
++  uint8_t * dest,             // [r0]
++  unsigned int dst_stride,    // [r1]
++  const uint8_t * src,        // [r2]
++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++  unsigned int src_stride2,   // [sp, #0]  -> r3
++  unsigned int _x,            // [sp, #4]  Ignored - 0
++  unsigned int y,             // [sp, #8]  (r7 in prefix)
++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++  unsigned int h);            // [sp, #16] -> r7
++
++void ff_rpi_sand30_lines_to_planar_c16(
++  uint8_t * dst_u,            // [r0]
++  unsigned int dst_stride_u,  // [r1]
++  uint8_t * dst_v,            // [r2]
++  unsigned int dst_stride_v,  // [r3]
++  const uint8_t * src,        // [sp, #0]  -> r4, r5
++  unsigned int stride1,       // [sp, #4]  128
++  unsigned int stride2,       // [sp, #8]  -> r8
++  unsigned int _x,            // [sp, #12] 0
++  unsigned int y,             // [sp, #16] (r7 in prefix)
++  unsigned int _w,            // [sp, #20] -> r6, r9
++  unsigned int h);            // [sp, #24] -> r7
++
++void ff_rpi_sand30_lines_to_planar_p010(
++  uint8_t * dest,             // [r0]
++  unsigned int dst_stride,    // [r1]
++  const uint8_t * src,        // [r2]
++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++  unsigned int src_stride2,   // [sp, #0]  -> r3
++  unsigned int _x,            // [sp, #4]  Ignored - 0
++  unsigned int y,             // [sp, #8]  (r7 in prefix)
++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++  unsigned int h);            // [sp, #16] -> r7
++
++void ff_rpi_sand30_lines_to_planar_y8(
++  uint8_t * dest,             // [r0]
++  unsigned int dst_stride,    // [r1]
++  const uint8_t * src,        // [r2]
++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++  unsigned int src_stride2,   // [sp, #0]  -> r3
++  unsigned int _x,            // [sp, #4]  Ignored - 0
++  unsigned int y,             // [sp, #8]  (r7 in prefix)
++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++  unsigned int h);            // [sp, #16] -> r7
++
++#endif // AVUTIL_ARM_SAND_NEON_H
++
+diff --git a/libavutil/frame.c b/libavutil/frame.c
+index f0a0dba018a6..38458696c6ae 100644
+--- a/libavutil/frame.c
++++ b/libavutil/frame.c
+@@ -16,6 +16,8 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
  
 +#include "config.h"
 +
- #include "libavutil/avassert.h"
- #include "libavutil/avstring.h"
- #include "libavutil/common.h"
-@@ -59,6 +61,16 @@
- #define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */
- #endif
- 
-+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
-+// in drm_fourcc.h hopefully will be sometime in the future but until then...
-+#ifndef V4L2_PIX_FMT_NV12_10_COL128
-+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
-+#endif
-+
-+#ifndef V4L2_PIX_FMT_NV12_COL128
-+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
-+#endif
-+
- typedef struct V4L2Queue V4L2Queue;
- typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared;
- 
-@@ -176,9 +188,11 @@ fmt_av_to_v4l2(const enum AVPixelFormat avfmt)
-         return V4L2_PIX_FMT_YUV420;
-     case AV_PIX_FMT_NV12:
-         return V4L2_PIX_FMT_NV12;
+ #include "channel_layout.h"
+ #include "avassert.h"
+ #include "buffer.h"
+@@ -27,6 +29,9 @@
+ #include "mem.h"
+ #include "samplefmt.h"
+ #include "hwcontext.h"
 +#if CONFIG_SAND
-     case AV_PIX_FMT_RPI4_8:
-     case AV_PIX_FMT_SAND128:
-         return V4L2_PIX_FMT_NV12_COL128;
++#include "rpi_sand_fns.h"
 +#endif
-     default:
-         break;
-     }
-@@ -193,8 +207,10 @@ fmt_v4l2_to_av(const uint32_t pixfmt)
-         return AV_PIX_FMT_YUV420P;
-     case V4L2_PIX_FMT_NV12:
-         return AV_PIX_FMT_NV12;
+ 
+ static const AVSideDataDescriptor sd_props[] = {
+     [AV_FRAME_DATA_PANSCAN]                     = { "AVPanScan" },
+@@ -1077,6 +1082,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags)
+         (frame->crop_top + frame->crop_bottom) >= frame->height)
+         return AVERROR(ERANGE);
+ 
 +#if CONFIG_SAND
-     case V4L2_PIX_FMT_NV12_COL128:
-         return AV_PIX_FMT_RPI4_8;
-+#endif
-     default:
-         break;
-     }
-@@ -823,6 +839,7 @@ static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame)
-                 h = src->layers[0].planes[1].offset / bpl;
-                 w = bpl;
-             }
-+#if CONFIG_SAND
-             else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
-                 if (src->layers[0].nb_planes != 2)
-                     break;
-@@ -831,9 +848,11 @@ static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame)
-                 h = src->layers[0].planes[1].offset / 128;
-                 bpl = fourcc_mod_broadcom_param(mod);
-             }
-+#endif
-             break;
- 
-         case DRM_FORMAT_P030:
-+#if CONFIG_SAND
-             if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) {
-                 if (src->layers[0].nb_planes != 2)
-                     break;
-@@ -842,6 +861,7 @@ static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame)
-                 h = src->layers[0].planes[1].offset / 128;
-                 bpl = fourcc_mod_broadcom_param(mod);
-             }
-+#endif
-             break;
- 
-         default:
-@@ -1048,7 +1068,6 @@ static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
-     AVDRMLayerDescriptor * const layer = &drm_desc->layers[0];
-     const struct v4l2_format *const fmt = &q->format;
-     const uint32_t height = fmt_height(fmt);
--    const uint32_t width  = fmt_width(fmt);
-     ptrdiff_t bpl0;
- 
-     /* fill the DRM frame descriptor */
-@@ -1063,7 +1082,7 @@ static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
-     bpl0 = layer->planes[0].pitch;
- 
-     switch (fmt_pixelformat(fmt)) {
--
-+#if CONFIG_SAND
-         case V4L2_PIX_FMT_NV12_COL128:
-             mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0);
-             layer->format = V4L2_PIX_FMT_NV12;
-@@ -1074,9 +1093,10 @@ static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf)
-             layer->nb_planes = 2;
-             layer->planes[1].object_index = 0;
-             layer->planes[1].offset = height * 128;
--            layer->planes[0].pitch = width;
--            layer->planes[1].pitch = width;
-+            layer->planes[0].pitch = fmt_width(fmt);
-+            layer->planes[1].pitch = layer->planes[0].pitch;
-             break;
-+#endif
- 
-         case DRM_FORMAT_NV12:
-             layer->format = V4L2_PIX_FMT_NV12;
-@@ -1576,7 +1596,10 @@ static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc)
-         return is_linear ? V4L2_PIX_FMT_YUV420 : 0;
-     case DRM_FORMAT_NV12:
-         return is_linear ? V4L2_PIX_FMT_NV12 :
--            fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : 0;
-+#if CONFIG_SAND
-+            fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 :
-+#endif
-+            0;
-     default:
-         break;
-     }
-
-From d13ef67d14056374de1d66962e067633a5ca1e95 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Sun, 2 Oct 2022 12:36:43 +0000
-Subject: [PATCH 082/186] configure: Fix v4l2_req_hevc_vx setup; set after deps
- fixups
-
----
- configure | 9 +++------
- 1 file changed, 3 insertions(+), 6 deletions(-)
-
-diff --git a/configure b/configure
-index 055944934476..c57b24bf8946 100755
---- a/configure
-+++ b/configure
-@@ -6914,12 +6914,6 @@ fi
- check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
- check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;"
- disable v4l2_req_hevc_vx
--if enabled hevc_v4l2request_hwaccel; then
--    enable v4l2_req_hevc_vx
--fi
--if enabled hevc_v4l2_request; then
--    disable v4l2_req_hevc_vx
--fi
- 
- check_headers sys/videoio.h
- test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
-@@ -7415,6 +7409,9 @@ check_deps $CONFIG_LIST       \
- 
- enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86"
- 
-+# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done
-+enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx
-+
- case $target_os in
- haiku)
-     disable memalign
-
-From ea9b52297f42fd55758be7944de7c5b24c77d900 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Sat, 1 Oct 2022 12:39:45 +0000
-Subject: [PATCH 083/186] vf_deinterlace_v4l2m2m: Ensure we get consistent
- final frames
-
-On getting EOS at the input of the filster do not simply drop everything
-in transit on the floor but attempt to retrieve everything possible from
-the capture Q before on-signalling EOS.
-If we know that we expect 1 frame in to always produce 1 frame out then
-match CAPTURE frame to the last OUTPUT frame Qed (scale)
-If frames out have an unknown relation to source frames (deinterlace) try
-an encode stop and wait for the last frame marker to emerge from CAPTURE
----
- libavfilter/vf_deinterlace_v4l2m2m.c | 172 +++++++++++++++++++++++----
- 1 file changed, 148 insertions(+), 24 deletions(-)
-
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-index 716789f9881c..ce875c2c619c 100644
---- a/libavfilter/vf_deinterlace_v4l2m2m.c
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -94,6 +94,7 @@ typedef struct V4L2Buffer {
- typedef struct V4L2Queue {
-     struct v4l2_format format;
-     struct v4l2_selection sel;
-+    int eos;
-     int num_buffers;
-     V4L2Buffer *buffers;
-     const char * name;
-@@ -127,20 +128,41 @@ typedef struct pts_track_s
-     pts_track_el_t a[PTS_TRACK_SIZE];
- } pts_track_t;
- 
-+typedef enum drain_state_e
-+{
-+    DRAIN_NONE = 0,     // Not draining
-+    DRAIN_TIMEOUT,      // Drain until normal timeout setup yields no frame
-+    DRAIN_LAST,         // Drain with long timeout last_frame in received on output expected
-+    DRAIN_EOS,          // Drain with long timeout EOS expected
-+    DRAIN_DONE          // Drained
-+} drain_state_t;
-+
- typedef struct DeintV4L2M2MContextShared {
-     void * logctx;  // For logging - will be NULL when done
-     filter_type_v4l2_t filter_type;
- 
-     int fd;
--    int done;
-+    int done;   // fd closed - awating all refs dropped
-     int width;
-     int height;
- 
-+    int drain;          // EOS received (inlink status)
-+    drain_state_t drain_state;
-+    int64_t drain_pts;  // PTS associated with inline status
-+
-+    unsigned int frames_rx;
-+    unsigned int frames_tx;
-+
-     // from options
-     int output_width;
-     int output_height;
-     enum AVPixelFormat output_format;
- 
-+    int has_enc_stop;
-+    // We expect to get exactly the same number of frames out as we put in
-+    // We can drain by matching input to output
-+    int one_to_one;
-+
-     int orig_width;
-     int orig_height;
-     atomic_uint refcount;
-@@ -179,6 +201,12 @@ typedef struct DeintV4L2M2MContext {
-     enum AVChromaLocation chroma_location;
- } DeintV4L2M2MContext;
- 
-+
-+static inline int drain_frame_expected(const drain_state_t d)
-+{
-+    return d == DRAIN_EOS || d == DRAIN_LAST;
-+}
-+
- // These just list the ones we know we can cope with
- static uint32_t
- fmt_av_to_v4l2(const enum AVPixelFormat avfmt)
-@@ -334,6 +362,13 @@ fail:
-     return 0;
- }
- 
-+// We are only ever expecting in-order frames so nothing more clever is required
-+static unsigned int
-+pts_track_count(const pts_track_t * const trk)
-+{
-+    return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1);
-+}
-+
- static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src)
- {
-     const uint32_t n = pts_track_next_n(trk);
-@@ -406,6 +441,12 @@ fmt_pixelformat(const struct v4l2_format * const fmt)
-     return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat;
- }
- 
-+static inline uint32_t
-+buf_bytesused0(const struct v4l2_buffer * const buf)
-+{
-+    return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused;
-+}
-+
- static void
- init_format(V4L2Queue * const q, const uint32_t format_type)
- {
-@@ -1469,12 +1510,24 @@ static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int tim
- 
-     av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__);
- 
-+    if (queue->eos) {
-+        av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__);
-+        return AVERROR_EOF;
-+    }
-+
-     avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout);
-     if (!avbuf) {
-         av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout);
-         return AVERROR(EAGAIN);
-     }
- 
-+    if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) {
-+        if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0)
-+            queue->eos = 1;
-+        if (buf_bytesused0(&avbuf->buffer) == 0)
-+            return queue->eos ? AVERROR_EOF : AVERROR(EINVAL);
-+    }
-+
-     // Fill in PTS and anciliary info from src frame
-     pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame);
- 
-@@ -1686,6 +1739,20 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
-         else
-             ctx->field_order = V4L2_FIELD_INTERLACED_BT;
- 
-+        {
-+            struct v4l2_encoder_cmd ecmd = {
-+                .cmd = V4L2_ENC_CMD_STOP
-+            };
-+            ctx->has_enc_stop = 0;
-+            if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) {
-+                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n");
-+                ctx->has_enc_stop = 1;
-+            }
-+            else {
-+                av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno)));
-+            }
-+
-+        }
-     }
- 
-     ret = deint_v4l2m2m_enqueue_frame(output, in);
-@@ -1694,6 +1761,41 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
-     return ret;
- }
- 
-+static int
-+ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s,
-+           AVFilterLink * const inlink)
-+{
-+    int instatus;
-+    int64_t inpts;
-+
-+    if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0)
++    // Sand cannot be cropped - do not try
++    if (av_rpi_is_sand_format(frame->format))
 +        return 0;
++#endif
 +
-+    s->drain      = instatus;
-+    s->drain_pts  = inpts;
-+    s->drain_state = DRAIN_TIMEOUT;
-+
-+    if (s->field_order == V4L2_FIELD_ANY) {  // Not yet started
-+        s->drain_state = DRAIN_DONE;
-+    }
-+    else if (s->one_to_one) {
-+        s->drain_state = DRAIN_LAST;
-+    }
-+    else if (s->has_enc_stop) {
-+        struct v4l2_encoder_cmd ecmd = {
-+            .cmd = V4L2_ENC_CMD_STOP
-+        };
-+        if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) {
-+            av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n");
-+            s->drain_state = DRAIN_EOS;
-+        }
-+        else {
-+            av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno)));
-+        }
-+    }
-+    return 1;
-+}
-+
- static int deint_v4l2m2m_activate(AVFilterContext *avctx)
- {
-     DeintV4L2M2MContext * const priv = avctx->priv;
-@@ -1702,15 +1804,13 @@ static int deint_v4l2m2m_activate(AVFilterContext *avctx)
-     AVFilterLink * const inlink = avctx->inputs[0];
-     int n = 0;
-     int cn = 99;
--    int instatus = 0;
--    int64_t inpts = 0;
-     int did_something = 0;
- 
-     av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__);
- 
-     FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx);
- 
--    ff_inlink_acknowledge_status(inlink, &instatus, &inpts);
-+    ack_inlink(avctx, s, inlink);
- 
-     if (!ff_outlink_frame_wanted(outlink)) {
-         av_log(priv, AV_LOG_TRACE, "%s: Not wanted out\n", __func__);
-@@ -1720,7 +1820,6 @@ static int deint_v4l2m2m_activate(AVFilterContext *avctx)
-         AVFrame * frame = av_frame_alloc();
-         int rv;
- 
--again:
-         recycle_q(&s->output);
-         n = count_enqueued(&s->output);
- 
-@@ -1729,10 +1828,21 @@ again:
-             return AVERROR(ENOMEM);
-         }
- 
--        rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, n > 4 ? 300 : 0);
-+        rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame,
-+                                         drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0);
-         if (rv != 0) {
-             av_frame_free(&frame);
--            if (rv != AVERROR(EAGAIN)) {
-+            if (rv == AVERROR_EOF) {
-+                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__);
-+                s->drain_state = DRAIN_DONE;
-+            }
-+            else if (rv == AVERROR(EAGAIN)) {
-+                if (s->drain_state != DRAIN_NONE) {
-+                    av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__);
-+                    s->drain_state = DRAIN_DONE;
-+                }
-+            }
-+            else {
-                 av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv));
-                 return rv;
-             }
-@@ -1742,29 +1852,30 @@ again:
-             // frame is always consumed by filter_frame - even on error despite
-             // a somewhat confusing comment in the header
-             rv = ff_filter_frame(outlink, frame);
--
--            if (instatus != 0) {
--                av_log(priv, AV_LOG_TRACE, "%s: eof loop\n", __func__);
--                goto again;
--            }
-+            ++s->frames_tx;
- 
-             av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv));
-             did_something = 1;
-+
-+            if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) {
-+                av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__);
-+                s->drain_state = DRAIN_DONE;
-+            }
-         }
- 
-         cn = count_enqueued(&s->capture);
-     }
- 
--    if (instatus != 0) {
--        ff_outlink_set_status(outlink, instatus, inpts);
--        av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(instatus));
-+    if (s->drain_state == DRAIN_DONE) {
-+        ff_outlink_set_status(outlink, s->drain, s->drain_pts);
-+        av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain));
-         return 0;
-     }
- 
-     recycle_q(&s->output);
-     n = count_enqueued(&s->output);
- 
--    while (n < 6) {
-+    while (n < 6 && !s->drain) {
-         AVFrame * frame;
-         int rv;
- 
-@@ -1775,8 +1886,13 @@ again:
- 
-         if (frame == NULL) {
-             av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__);
-+            if (!ack_inlink(avctx, s, inlink)) {
-+                ff_inlink_request_frame(inlink);
-+                av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__);
-+            }
-             break;
-         }
-+        ++s->frames_rx;
- 
-         rv = deint_v4l2m2m_filter_frame(inlink, frame);
-         av_frame_free(&frame);
-@@ -1785,16 +1901,11 @@ again:
-             return rv;
- 
-         av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__);
--        ++n;
--    }
--
--    if (n < 6) {
--        ff_inlink_request_frame(inlink);
-         did_something = 1;
--        av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__);
-+        ++n;
-     }
- 
--    if (n > 4 && ff_outlink_frame_wanted(outlink)) {
-+    if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) {
-         ff_filter_set_ready(avctx, 1);
-         did_something = 1;
-         av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__);
-@@ -1873,7 +1984,18 @@ static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx)
- 
- static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx)
- {
--    return common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE);
-+    int rv;
-+    DeintV4L2M2MContext * priv;
-+    DeintV4L2M2MContextShared * ctx;
-+
-+    if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0)
-+        return rv;
-+
-+    priv = avctx->priv;
-+    ctx = priv->shared;
-+
-+    ctx->one_to_one = 1;
-+    return 0;
- }
- 
- static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
-@@ -1881,6 +2003,8 @@ static void deint_v4l2m2m_uninit(AVFilterContext *avctx)
-     DeintV4L2M2MContext *priv = avctx->priv;
-     DeintV4L2M2MContextShared *ctx = priv->shared;
- 
-+    av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n",
-+           ctx->frames_rx, ctx->frames_tx);
-     ctx->done = 1;
-     ctx->logctx = NULL;  // Log to NULL works, log to missing crashes
-     pts_track_uninit(&ctx->track);
-
-From 87f4f5cf8cadb5db52d474138e20dbcf53d865ed Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 5 Oct 2022 16:12:02 +0000
-Subject: [PATCH 084/186] v4l2_m2m_dec: Rework decode pending heuristic
-
-The old code measured the length of the entire Q in the decoder and
-attempted to dynamically guess an appropriate length. This was prone to
-failure when the guesswork became confused.
-The new code attempts to measure the Q length before insertion into decode
-which, after all, is what we actually care about. It does this by
-asserting that the decoder must have consumed all packets that came
-before the one associated with the most recent CAPTURE frame.  This
-avoids all need for reorder buffer size guesswork.
----
- libavcodec/v4l2_m2m.h     |  2 -
- libavcodec/v4l2_m2m_dec.c | 77 +++++++++++++++++----------------------
- 2 files changed, 34 insertions(+), 45 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index babf101d650a..26a7161042b5 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -107,8 +107,6 @@ typedef struct V4L2m2mContext {
- 
-     /* Frame tracking */
-     xlat_track_t xlat;
--    int pending_hw;
--    int pending_n;
- 
-     pts_stats_t pts_stat;
- 
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 6bd9926b3f31..bec9b22fcf3f 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -349,41 +349,54 @@ static void
- xlat_flush(xlat_track_t * const x)
- {
-     unsigned int i;
-+    // Do not reset track_no - this ensures that any frames left in the decoder
-+    // that turn up later get discarded.
-+
-+    x->last_pts = AV_NOPTS_VALUE;
-+    x->last_opaque = 0;
-     for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) {
-         x->track_els[i].pending = 0;
-         x->track_els[i].discard = 1;
-     }
--    x->last_pts = AV_NOPTS_VALUE;
-+}
-+
-+static void
-+xlat_init(xlat_track_t * const x)
-+{
-+    memset(x, 0, sizeof(*x));
-+    xlat_flush(x);
- }
- 
- static int
- xlat_pending(const xlat_track_t * const x)
- {
-     unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE;
--    unsigned int i;
--    int r = 0;
--    int64_t now = AV_NOPTS_VALUE;
-+    int i;
-+    const int64_t now = x->last_pts;
- 
--    for (i = 0; i < 32; ++i, n = (n - 1) % FF_V4L2_M2M_TRACK_SIZE) {
-+    for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) {
-         const V4L2m2mTrackEl * const t = x->track_els + n;
- 
-+        // Discard only set on never-set or flushed entries
-+        // So if we get here we've never successfully decoded a frame so allow
-+        // more frames into the buffer before stalling
-+        if (t->discard)
-+            return i - 16;
-+
-+        // If we've got this frame out then everything before this point
-+        // must have entered the decoder
-         if (!t->pending)
--            continue;
-+            break;
- 
-+        // If we've never seen a pts all we can do is count frames
-         if (now == AV_NOPTS_VALUE)
--            now = t->dts;
-+            continue;
- 
--        if (t->pts == AV_NOPTS_VALUE ||
--            ((now == AV_NOPTS_VALUE || t->pts <= now) &&
--             (x->last_pts == AV_NOPTS_VALUE || t->pts > x->last_pts)))
--            ++r;
-+        if (t->dts != AV_NOPTS_VALUE && now >= t->dts)
-+            break;
-     }
- 
--    // If we never get any ideas about PTS vs DTS allow a lot more buffer
--    if (now == AV_NOPTS_VALUE)
--        r -= 16;
--
--    return r;
-+    return i;
- }
- 
- static inline int stream_started(const V4L2m2mContext * const s) {
-@@ -557,18 +570,6 @@ static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
-     return rv;
- }
- 
--// Number of frames over what xlat_pending returns that we keep *16
--// This is a min value - if it appears to be too small the threshold should
--// adjust dynamically.
--#define PENDING_HW_MIN      (3 * 16)
--// Offset to use when setting dynamically
--// Set to %16 == 15 to avoid the threshold changing immediately as we relax
--#define PENDING_HW_OFFSET   (PENDING_HW_MIN - 1)
--// Number of consecutive times we've failed to get a frame when we prefer it
--// before we increase the prefer threshold (5ms * N = max expected decode
--// time)
--#define PENDING_N_THRESHOLD 6
--
- static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- {
-     V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-@@ -578,9 +579,11 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- 
-     do {
-         const int pending = xlat_pending(&s->xlat);
--        const int prefer_dq = (pending > s->pending_hw / 16);
-+        const int prefer_dq = (pending > 3);
-         const int last_src_rv = src_rv;
- 
-+        av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt);
-+
-         // Enqueue another pkt for decode if
-         // (a) We don't have a lot of stuff in the buffer already OR
-         // (b) ... we (think we) do but we've failed to get a frame already OR
-@@ -625,20 +628,8 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-                 }
-             }
- 
--            // Adjust dynamic pending threshold
--            if (dst_rv == 0) {
--                if (--s->pending_hw < PENDING_HW_MIN)
--                    s->pending_hw = PENDING_HW_MIN;
--                s->pending_n = 0;
--
-+            if (dst_rv == 0)
-                 set_best_effort_pts(avctx, &s->pts_stat, frame);
--            }
--            else if (dst_rv == AVERROR(EAGAIN)) {
--                if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) {
--                    s->pending_hw = pending * 16 + PENDING_HW_OFFSET;
--                    s->pending_n = 0;
--                }
--            }
- 
-             if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
-                 av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
-@@ -857,8 +848,8 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     if (ret < 0)
-         return ret;
- 
-+    xlat_init(&s->xlat);
-     pts_stats_init(&s->pts_stat, avctx, "decoder");
--    s->pending_hw = PENDING_HW_MIN;
- 
-     capture = &s->capture;
-     output = &s->output;
-
-From a7bdc67135c3d0e69b0f8b58bb317194bbabe105 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 21 Oct 2022 13:48:07 +0000
-Subject: [PATCH 085/186] pthread_frame: Fix MT hwaccel. Recent change broke
- it.
-
-Revert the effects of 35aa7e70e7ec350319e7634a30d8d8aa1e6ecdda if the
-hwaccel is marked MT_SAFE.
----
- libavcodec/pthread_frame.c | 48 ++++++++++++++++++++++++++++----------
- 1 file changed, 36 insertions(+), 12 deletions(-)
-
-diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
-index 2cc89a41f55f..b14f8e9360b5 100644
---- a/libavcodec/pthread_frame.c
-+++ b/libavcodec/pthread_frame.c
-@@ -231,7 +231,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg)
-             p->hwaccel_serializing = 0;
-             pthread_mutex_unlock(&p->parent->hwaccel_mutex);
-         }
--        av_assert0(!avctx->hwaccel);
-+        av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
- 
-         if (p->async_serializing) {
-             p->async_serializing = 0;
-@@ -319,6 +319,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
-         }
- 
-         dst->hwaccel_flags = src->hwaccel_flags;
-+        if (src->hwaccel &&
-+            (src->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
-+            dst->hwaccel = src->hwaccel;
-+            dst->hwaccel_context = src->hwaccel_context;
-+            dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data;
-+        }
- 
-         err = av_buffer_replace(&dst->internal->pool, src->internal->pool);
-         if (err < 0)
-@@ -434,10 +440,13 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
-     }
- 
-     /* transfer the stashed hwaccel state, if any */
--    av_assert0(!p->avctx->hwaccel);
--    FFSWAP(const AVHWAccel*, p->avctx->hwaccel,                     fctx->stash_hwaccel);
--    FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
--    FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
-+    av_assert0(!p->avctx->hwaccel || (p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
-+    if (p->avctx->hwaccel &&
-+        !(p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
-+        FFSWAP(const AVHWAccel*, p->avctx->hwaccel,                     fctx->stash_hwaccel);
-+        FFSWAP(void*,            p->avctx->hwaccel_context,             fctx->stash_hwaccel_context);
-+        FFSWAP(void*,            p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
-+    }
- 
-     av_packet_unref(p->avpkt);
-     ret = av_packet_ref(p->avpkt, avpkt);
-@@ -610,9 +619,12 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
-      * this is done here so that this worker thread can wipe its own hwaccel
-      * state after decoding, without requiring synchronization */
-     av_assert0(!p->parent->stash_hwaccel);
--    p->parent->stash_hwaccel         = avctx->hwaccel;
--    p->parent->stash_hwaccel_context = avctx->hwaccel_context;
--    p->parent->stash_hwaccel_priv    = avctx->internal->hwaccel_priv_data;
-+    if (avctx->hwaccel &&
-+        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
-+        p->parent->stash_hwaccel         = avctx->hwaccel;
-+        p->parent->stash_hwaccel_context = avctx->hwaccel_context;
-+        p->parent->stash_hwaccel_priv    = avctx->internal->hwaccel_priv_data;
-+    }
- 
-     pthread_mutex_lock(&p->progress_mutex);
-     if(atomic_load(&p->state) == STATE_SETUP_FINISHED){
-@@ -667,6 +679,15 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
- 
-     park_frame_worker_threads(fctx, thread_count);
- 
-+     if (fctx->prev_thread &&
-+         avctx->hwaccel && (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
-+         avctx->internal->hwaccel_priv_data !=
-+                             fctx->prev_thread->avctx->internal->hwaccel_priv_data) {
-+        if (update_context_from_thread(avctx, fctx->prev_thread->avctx, 1) < 0) {
-+            av_log(avctx, AV_LOG_ERROR, "Failed to update user thread.\n");
-+        }
-+    }
-+
-     for (i = 0; i < thread_count; i++) {
-         PerThreadContext *p = &fctx->threads[i];
-         AVCodecContext *ctx = p->avctx;
-@@ -710,10 +731,13 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count)
- 
-     /* if we have stashed hwaccel state, move it to the user-facing context,
-      * so it will be freed in avcodec_close() */
--    av_assert0(!avctx->hwaccel);
--    FFSWAP(const AVHWAccel*, avctx->hwaccel,                     fctx->stash_hwaccel);
--    FFSWAP(void*,            avctx->hwaccel_context,             fctx->stash_hwaccel_context);
--    FFSWAP(void*,            avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
-+    av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE));
-+    if (avctx->hwaccel &&
-+        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
-+        FFSWAP(const AVHWAccel*, avctx->hwaccel,                     fctx->stash_hwaccel);
-+        FFSWAP(void*,            avctx->hwaccel_context,             fctx->stash_hwaccel_context);
-+        FFSWAP(void*,            avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv);
-+    }
- 
-     av_freep(&avctx->internal->thread_ctx);
- }
-
-From 9d2cf061d17a54ac854fb53574fa1006968cedeb Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 18 Oct 2022 13:18:27 +0000
-Subject: [PATCH 086/186] v4l2_req: Add swfmt to init logging
-
-(cherry picked from commit dfa03b702baaf2952bcd2bbf8badcc2f9c961ddf)
----
- libavcodec/v4l2_request_hevc.c | 6 ++++--
- 1 file changed, 4 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-index 614a1b4d99e4..767ecb036ad2 100644
---- a/libavcodec/v4l2_request_hevc.c
-+++ b/libavcodec/v4l2_request_hevc.c
-@@ -26,6 +26,7 @@
- #include "v4l2_request_hevc.h"
- 
- #include "libavutil/hwcontext_drm.h"
-+#include "libavutil/pixdesc.h"
- 
- #include "v4l2_req_devscan.h"
- #include "v4l2_req_dmabufs.h"
-@@ -306,10 +307,11 @@ retry_src_memtype:
-     // Set our s/w format
-     avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format;
- 
--    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s\n",
-+    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n",
-            ctx->fns->name,
-            decdev_media_path(decdev), decdev_video_path(decdev),
--           mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype));
-+           mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype),
-+           av_get_pix_fmt_name(avctx->sw_pix_fmt));
- 
-     return 0;
- 
-
-From c848f442ed183086c17ca83a9401f33e6b36ce80 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 18 Oct 2022 13:39:54 +0000
-Subject: [PATCH 087/186] v4l2_m2m: Avoid polling on a queue that is streamoff
-
-(cherry picked from commit b2658bc56d3034a17db7f39597fc7d71bfe9a43b)
----
- libavcodec/v4l2_context.c | 13 +++++++++----
- 1 file changed, 9 insertions(+), 4 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 4a359bf45e30..b296dc111c1c 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -578,6 +578,11 @@ get_event(V4L2m2mContext * const m)
-     return 0;
- }
- 
-+static inline int
-+dq_ok(const V4L2Context * const c)
-+{
-+    return c->streamon && atomic_load(&c->q_count) != 0;
-+}
- 
- // Get a buffer
- // If output then just gets the buffer in the expected way
-@@ -613,13 +618,13 @@ get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout
-         }
- 
-         // If capture && timeout == -1 then also wait for rx buffer free
--        if (is_cap && timeout == -1 && m->output.streamon && !m->draining)
-+        if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining)
-             pfd.events |= poll_out;
- 
-         // If nothing Qed all we will get is POLLERR - avoid that
--        if ((pfd.events == poll_out && atomic_load(&m->output.q_count) == 0) ||
--            (pfd.events == poll_cap && atomic_load(&m->capture.q_count) == 0) ||
--            (pfd.events == (poll_cap | poll_out) && atomic_load(&m->capture.q_count) == 0 && atomic_load(&m->output.q_count) == 0)) {
-+        if ((pfd.events == poll_out && !dq_ok(&m->output)) ||
-+            (pfd.events == poll_cap && !dq_ok(&m->capture)) ||
-+            (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) {
-             av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name);
-             return AVERROR(ENOSPC);
-         }
-
-From 3e501d4ca4266ef8ffbf19808c42ccb3e40d6392 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 18 Oct 2022 14:07:04 +0000
-Subject: [PATCH 088/186] v4l2_m2m: Add function to get number of queued
- buffers
-
-(cherry picked from commit f9ac6485c00b4531dcff354222aef450b29728f4)
----
- libavcodec/v4l2_context.h | 11 +++++++++++
- 1 file changed, 11 insertions(+)
-
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 523c53e97dc5..8e4f68164351 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -220,4 +220,15 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
- 
- void ff_v4l2_dq_all(V4L2Context *const ctx);
- 
-+/**
-+ * Returns the number of buffers currently queued
-+ *
-+ * @param[in] ctx The V4L2Context to evaluate
-+ */
-+static inline int
-+ff_v4l2_context_q_count(const V4L2Context* const ctx)
-+{
-+    return atomic_load(&ctx->q_count);
-+}
-+
- #endif // AVCODEC_V4L2_CONTEXT_H
-
-From c0eac42165afdf7a7efad03bd140506263948940 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 18 Oct 2022 14:48:20 +0000
-Subject: [PATCH 089/186] v4l2_m2m: Add timeouts to dq_all and dequeue_packet
-
-Add timeouts and use them to have better flow control in encode
-
-(cherry picked from commit c6173cad7f21697e12887982bda796de9719bb32)
----
- libavcodec/v4l2_context.c | 16 +++++++++++-----
- libavcodec/v4l2_context.h | 15 +++++++++++++--
- libavcodec/v4l2_m2m_enc.c | 28 +++++++++++++++++++---------
- 3 files changed, 43 insertions(+), 16 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index b296dc111c1c..7031f3d3409d 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -712,13 +712,19 @@ clean_v4l2_buffer(V4L2Buffer * const avbuf)
-     return avbuf;
- }
- 
--void
--ff_v4l2_dq_all(V4L2Context *const ctx)
-+int
-+ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1)
- {
-     V4L2Buffer * avbuf;
-+    if (timeout1 != 0) {
-+        int rv = get_qbuf(ctx, &avbuf, timeout1);
-+        if (rv != 0)
-+            return rv;
-+    }
-     do {
-         get_qbuf(ctx, &avbuf, 0);
-     } while (avbuf);
-+    return 0;
- }
- 
- static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
-@@ -727,7 +733,7 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx)
- 
-     /* get back as many output buffers as possible */
-     if (V4L2_TYPE_IS_OUTPUT(ctx->type))
--        ff_v4l2_dq_all(ctx);
-+        ff_v4l2_dq_all(ctx, 0);
- 
-     for (i = 0; i < ctx->num_buffers; i++) {
-         V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
-@@ -1047,7 +1053,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
-    return 0;
- }
- 
--int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
-+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout)
- {
-     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
-     AVCodecContext *const avctx = s->avctx;
-@@ -1055,7 +1061,7 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
-     int rv;
- 
-     do {
--        if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0)
-+        if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0)
-             return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv;  // Caller not currently expecting ENOSPC
-         if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0)
-             return rv;
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 8e4f68164351..5afed3e6ecb4 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -179,7 +179,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd);
-  * @param[inout] pkt The AVPacket to dequeue to.
-  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
+     desc = av_pix_fmt_desc_get(frame->format);
+     if (!desc)
+         return AVERROR_BUG;
+diff --git a/libavutil/frame.h b/libavutil/frame.h
+index f7806566d54c..00c5c925e31d 100644
+--- a/libavutil/frame.h
++++ b/libavutil/frame.h
+@@ -1037,6 +1037,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags);
   */
--int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt);
-+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout);
+ const char *av_frame_side_data_name(enum AVFrameSideDataType type);
  
++
++static inline int av_frame_cropped_width(const AVFrame * const frame)
++{
++    return frame->width - (frame->crop_left + frame->crop_right);
++}
++static inline int av_frame_cropped_height(const AVFrame * const frame)
++{
++    return frame->height - (frame->crop_top + frame->crop_bottom);
++}
++
  /**
-  * Dequeues a buffer from a V4L2Context to an AVFrame.
-@@ -218,7 +218,18 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const
-  */
- int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f);
- 
--void ff_v4l2_dq_all(V4L2Context *const ctx);
-+/**
-+ * Dequeue all buffers on this queue
-+ *
-+ * Used to recycle output buffers
-+ *
-+ * @param[in] ctx The V4L2Context to dequeue from.
-+ * @param[in] timeout1 A timeout on dequeuing the 1st buffer, 
-+ *       all others have a timeout of zero
-+ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return
-+ *         of the first dequeue operation, 0 otherwise.
-+ */
-+int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1);
- 
- /**
-  * Returns the number of buffers currently queued
-diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
-index a992a3cccc68..d0d27e5bc2f4 100644
---- a/libavcodec/v4l2_m2m_enc.c
-+++ b/libavcodec/v4l2_m2m_enc.c
-@@ -420,16 +420,24 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
- {
-     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-     V4L2Context *const output = &s->output;
-+    int rv;
-+    int needs_slot = ff_v4l2_context_q_count(output) == output->num_buffers;
- 
--    ff_v4l2_dq_all(output);
-+    av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot);
- 
--    // Signal EOF if needed
-+    // Signal EOF if needed (doesn't need q slot)
-     if (!frame) {
-         return ff_v4l2_context_enqueue_frame(output, frame);
-     }
- 
-+    if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) {
-+        // We should be able to return AVERROR(EAGAIN) to indicate buffer
-+        // exhaustion, but ffmpeg currently treats that as fatal.
-+        av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv));
-+        return rv;
-+    }
-+
-     if (s->input_drm && !output->streamon) {
--        int rv;
-         struct v4l2_format req_format = {.type = output->format.type};
- 
-         // Set format when we first get a buffer
-@@ -494,7 +502,9 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-     AVFrame *frame = s->frame;
-     int ret;
- 
--    ff_v4l2_dq_all(output);
-+    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
-+
-+    ff_v4l2_dq_all(output, 0);
- 
-     if (s->draining)
-         goto dequeue;
-@@ -532,10 +542,10 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-     }
- 
- dequeue:
--    ret = ff_v4l2_context_dequeue_packet(capture, avpkt);
--    ff_v4l2_dq_all(output);
-+    ret = ff_v4l2_context_dequeue_packet(capture, avpkt, s->draining ? 300 : 0);
-+    ff_v4l2_dq_all(output, 0);
-     if (ret)
--        return ret;
-+        return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret;
- 
-     if (capture->first_buf == 1) {
-         uint8_t * data;
-@@ -566,8 +576,8 @@ dequeue:
-             s->extdata_size = len;
-         }
- 
--        ret = ff_v4l2_context_dequeue_packet(capture, avpkt);
--        ff_v4l2_dq_all(output);
-+        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0);
-+        ff_v4l2_dq_all(output, 0);
-         if (ret)
-             return ret;
-     }
-
-From f09618a055068582ebe7f6e704212b04f8d00bc7 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 18 Oct 2022 14:23:32 +0000
-Subject: [PATCH 090/186] v4l2_m2m_enc: Improve debug trace
-
-(cherry picked from commit 113e89daffb329a0cd3d920abd483a4025664bf5)
----
- libavcodec/v4l2_m2m_enc.c | 13 ++++++++++---
- 1 file changed, 10 insertions(+), 3 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
-index d0d27e5bc2f4..c8c2de3d4706 100644
---- a/libavcodec/v4l2_m2m_enc.c
-+++ b/libavcodec/v4l2_m2m_enc.c
-@@ -427,6 +427,7 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
- 
-     // Signal EOF if needed (doesn't need q slot)
-     if (!frame) {
-+        av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__);
-         return ff_v4l2_context_enqueue_frame(output, frame);
-     }
- 
-@@ -491,7 +492,12 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
-         v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1);
- #endif
- 
--    return ff_v4l2_context_enqueue_frame(output, frame);
-+    rv = ff_v4l2_context_enqueue_frame(output, frame);
-+    if (rv) {
-+        av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv));
-+    }
-+
-+    return rv;
- }
- 
- static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-@@ -502,7 +508,8 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-     AVFrame *frame = s->frame;
-     int ret;
- 
--    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
-+    av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__,
-+           ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture));
- 
-     ff_v4l2_dq_all(output, 0);
- 
-@@ -615,11 +622,11 @@ dequeue:
-         avpkt->size = newlen;
-     }
- 
--//    av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret);
-     capture->first_buf = 0;
-     return 0;
- 
- fail_no_mem:
-+    av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n");
-     ret = AVERROR(ENOMEM);
-     av_packet_unref(avpkt);
-     return ret;
-
-From 739483231401288794ff9e4acc253ef1129436cf Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 18 Oct 2022 13:22:36 +0000
-Subject: [PATCH 091/186] v4l2_m2m_enc: Copy dest packets to memory if short of
- v4l2 buffers
-
-(cherry picked from commit aa4ebbda400b42db952fc713b26927fc8636b0e5)
----
- libavcodec/v4l2_m2m_enc.c | 16 ++++++++++++++++
- 1 file changed, 16 insertions(+)
-
-diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
-index c8c2de3d4706..c23187e6e67a 100644
---- a/libavcodec/v4l2_m2m_enc.c
-+++ b/libavcodec/v4l2_m2m_enc.c
-@@ -621,6 +621,22 @@ dequeue:
-         avpkt->data = buf->data;
-         avpkt->size = newlen;
-     }
-+    else if (ff_v4l2_context_q_count(capture) < 2) {
-+        // Avoid running out of capture buffers
-+        // In most cases the buffers will be returned quickly in which case
-+        // we don't copy and can use the v4l2 buffers directly but sometimes
-+        // ffmpeg seems to hold onto all of them for a long time (.mkv
-+        // creation?) so avoid deadlock in those cases.
-+        AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE);
-+        if (buf == NULL)
-+            goto fail_no_mem;
-+
-+        memcpy(buf->data, avpkt->data, avpkt->size);
-+        av_buffer_unref(&avpkt->buf);  // Will recycle the V4L2 buffer
-+
-+        avpkt->buf = buf;
-+        avpkt->data = buf->data;
-+    }
- 
-     capture->first_buf = 0;
-     return 0;
-
-From 680669b95cae532061fef61d972fb3c9c2b92d67 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 19 Oct 2022 11:00:16 +0000
-Subject: [PATCH 092/186] v4l2_m2m_dec: Fix pts_best_effort guessing for
- initial pts
-
-(cherry picked from commit 1af32e5c87586a0f7e76cdf19a012ddbcf3eac67)
----
- libavcodec/v4l2_m2m_dec.c | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index bec9b22fcf3f..47b2735f8252 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -113,6 +113,8 @@ log_dump(void * logctx, int lvl, const void * const data, const size_t len)
- 
- static int64_t pts_stats_guess(const pts_stats_t * const stats)
- {
-+    if (stats->last_count <= 1)
-+        return stats->last_pts;
-     if (stats->last_pts == AV_NOPTS_VALUE ||
-             stats->last_interval == 0 ||
-             stats->last_count >= STATS_LAST_COUNT_MAX)
-
-From 3da063bb46e1d4ed5804d97230d74762076e1b13 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 19 Oct 2022 14:47:04 +0000
-Subject: [PATCH 093/186] v4l2_m2m_enc: Wait for frame or space in src Q in
- rx_pkt
-
-If receive_packet we should ensure that there is space in the source Q
-if we return EAGAIN so wait for either an output packet or space if
-the source Q is currently full.
-
-(cherry picked from commit 82f0c55782a67a8cc665d937647706c2a75f5548)
----
- libavcodec/v4l2_m2m_enc.c | 22 +++++++++++++++++++---
- 1 file changed, 19 insertions(+), 3 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c
-index c23187e6e67a..524e9424a5e8 100644
---- a/libavcodec/v4l2_m2m_enc.c
-+++ b/libavcodec/v4l2_m2m_enc.c
-@@ -415,13 +415,17 @@ static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format *
-     return 1;
- }
- 
-+static inline int q_full(const V4L2Context *const output)
-+{
-+    return ff_v4l2_context_q_count(output) == output->num_buffers;
-+}
- 
- static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame)
- {
-     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
-     V4L2Context *const output = &s->output;
-     int rv;
--    int needs_slot = ff_v4l2_context_q_count(output) == output->num_buffers;
-+    const int needs_slot = q_full(output);
- 
-     av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot);
- 
-@@ -549,8 +553,20 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt)
-     }
- 
- dequeue:
--    ret = ff_v4l2_context_dequeue_packet(capture, avpkt, s->draining ? 300 : 0);
--    ff_v4l2_dq_all(output, 0);
-+    // Dequeue a frame
-+    for (;;) {
-+        int t = q_full(output) ? -1 : s->draining ? 300 : 0;
-+        int rv2;
-+
-+        // If output is full wait for either a packet or output to become not full
-+        ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t);
-+
-+        // If output was full retry packet dequeue
-+        t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300;
-+        rv2 = ff_v4l2_dq_all(output, t);
-+        if (t == 0 || rv2 != 0)
-+            break;
-+    }
-     if (ret)
-         return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret;
- 
-
-From 90a2740b29653aaddb67b309899f50475b76c330 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 19 Oct 2022 14:54:29 +0000
-Subject: [PATCH 094/186] vf_deinterlace_v4l2m2m: Print dts rather that NOPTS
- in trace
-
-(cherry picked from commit e9b468f35f0c6ad9bfe96f5a05e449afa8ae074a)
----
- libavfilter/vf_deinterlace_v4l2m2m.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-index ce875c2c619c..7c6751b69c65 100644
---- a/libavfilter/vf_deinterlace_v4l2m2m.c
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -1668,8 +1668,8 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in)
-     V4L2Queue *output              = &ctx->output;
-     int ret;
- 
--    av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" (%"PRId64") field :%d interlaced: %d aspect:%d/%d\n",
--          __func__, in->pts, AV_NOPTS_VALUE, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den);
-+    av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n",
-+           __func__, in->pts, in->pkt_dts, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den);
-     av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__,
-            avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out);
- 
-
-From 56e2c1564e5288ad8642bb4e1954505586566765 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 19 Oct 2022 14:55:21 +0000
-Subject: [PATCH 095/186] vf_deinterlace_v4l2m2m: Ignore "wanted" when
- processing input
-
-If we gate send a frame to the outlink on its frame_wanted flag then we
-will sometimes stall as the flag may not get set by ffmpeg's filter
-processing. So stuff the output whether or not it wants it which works
-much better.
-
-(cherry picked from commit 808254cc04e5e6574cbab9af254b6c2f3d4142e3)
----
- libavfilter/vf_deinterlace_v4l2m2m.c | 5 +----
- 1 file changed, 1 insertion(+), 4 deletions(-)
-
-diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c
-index 7c6751b69c65..a173a291f837 100644
---- a/libavfilter/vf_deinterlace_v4l2m2m.c
-+++ b/libavfilter/vf_deinterlace_v4l2m2m.c
-@@ -1812,10 +1812,7 @@ static int deint_v4l2m2m_activate(AVFilterContext *avctx)
- 
-     ack_inlink(avctx, s, inlink);
- 
--    if (!ff_outlink_frame_wanted(outlink)) {
--        av_log(priv, AV_LOG_TRACE, "%s: Not wanted out\n", __func__);
--    }
--    else if (s->field_order != V4L2_FIELD_ANY)  // Can't DQ if no setup!
-+    if (s->field_order != V4L2_FIELD_ANY)  // Can't DQ if no setup!
-     {
-         AVFrame * frame = av_frame_alloc();
-         int rv;
-
-From ec50574d7b8ae1188e3d1f4f526e09a827485052 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 19 Oct 2022 15:00:43 +0000
-Subject: [PATCH 096/186] conf_native: Add --enable-gpl
-
-(cherry picked from commit bab9bf4a2e39391940d88af2ce5d70236ac21f15)
----
- pi-util/conf_native.sh | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
-index f22d531ca448..082d9b58320e 100755
---- a/pi-util/conf_native.sh
-+++ b/pi-util/conf_native.sh
-@@ -94,6 +94,7 @@ $FFSRC/configure \
-  --enable-libdrm\
-  --enable-vout-egl\
-  --enable-vout-drm\
-+ --enable-gpl\
-  $SHARED_LIBS\
-  $RPIOPTS\
-  --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\
-
-From 491ca2cc58d75d0fe16ba6be000e92c47219e7f2 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 15 Nov 2022 13:33:00 +0000
-Subject: [PATCH 097/186] egl_vout: Make formatting consistent - no code
- changes
-
----
- libavdevice/egl_vout.c | 741 ++++++++++++++++++++---------------------
- 1 file changed, 369 insertions(+), 372 deletions(-)
-
-diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c
-index 7b9c610ace28..a52cabb082e9 100644
---- a/libavdevice/egl_vout.c
-+++ b/libavdevice/egl_vout.c
-@@ -48,20 +48,20 @@
- #define TRACE_ALL 0
- 
- struct egl_setup {
--   int conId;
--
--   Display *dpy;
--   EGLDisplay egl_dpy;
--   EGLContext ctx;
--   EGLSurface surf;
--   Window win;
--
--   uint32_t crtcId;
--   int crtcIdx;
--   uint32_t planeId;
--   struct {
--       int x, y, width, height;
--   } compose;
-+    int conId;
-+
-+    Display *dpy;
-+    EGLDisplay egl_dpy;
-+    EGLContext ctx;
-+    EGLSurface surf;
-+    Window win;
-+
-+    uint32_t crtcId;
-+    int crtcIdx;
-+    uint32_t planeId;
-+    struct {
-+        int x, y, width, height;
-+    } compose;
- };
- 
- typedef struct egl_aux_s {
-@@ -70,8 +70,7 @@ typedef struct egl_aux_s {
- 
- } egl_aux_t;
- 
--typedef struct egl_display_env_s
--{
-+typedef struct egl_display_env_s {
-     AVClass *class;
- 
-     struct egl_setup setup;
-@@ -89,8 +88,8 @@ typedef struct egl_display_env_s
-     sem_t display_start_sem;
-     sem_t q_sem;
-     int q_terminate;
--    AVFrame * q_this;
--    AVFrame * q_next;
-+    AVFrame *q_this;
-+    AVFrame *q_next;
- 
- } egl_display_env_t;
- 
-@@ -99,45 +98,44 @@ typedef struct egl_display_env_s
-  * Remove window border/decorations.
-  */
- static void
--no_border( Display *dpy, Window w)
-+no_border(Display *dpy, Window w)
- {
--   static const unsigned MWM_HINTS_DECORATIONS = (1 << 1);
--   static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5;
--
--   typedef struct
--   {
--      unsigned long       flags;
--      unsigned long       functions;
--      unsigned long       decorations;
--      long                inputMode;
--      unsigned long       status;
--   } PropMotifWmHints;
--
--   PropMotifWmHints motif_hints;
--   Atom prop, proptype;
--   unsigned long flags = 0;
--
--   /* setup the property */
--   motif_hints.flags = MWM_HINTS_DECORATIONS;
--   motif_hints.decorations = flags;
--
--   /* get the atom for the property */
--   prop = XInternAtom( dpy, "_MOTIF_WM_HINTS", True );
--   if (!prop) {
--      /* something went wrong! */
--      return;
--   }
--
--   /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */
--   proptype = prop;
--
--   XChangeProperty( dpy, w,                         /* display, window */
-+    static const unsigned MWM_HINTS_DECORATIONS = (1 << 1);
-+    static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5;
-+
-+    typedef struct {
-+        unsigned long       flags;
-+        unsigned long       functions;
-+        unsigned long       decorations;
-+        long                inputMode;
-+        unsigned long       status;
-+    } PropMotifWmHints;
-+
-+    PropMotifWmHints motif_hints;
-+    Atom prop, proptype;
-+    unsigned long flags = 0;
-+
-+    /* setup the property */
-+    motif_hints.flags = MWM_HINTS_DECORATIONS;
-+    motif_hints.decorations = flags;
-+
-+    /* get the atom for the property */
-+    prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True);
-+    if (!prop) {
-+        /* something went wrong! */
-+        return;
-+    }
-+
-+    /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */
-+    proptype = prop;
-+
-+    XChangeProperty(dpy, w,                         /* display, window */
-                     prop, proptype,                 /* property, type */
-                     32,                             /* format: 32-bit datums */
-                     PropModeReplace,                /* mode */
--                    (unsigned char *) &motif_hints, /* data */
-+                    (unsigned char *)&motif_hints, /* data */
-                     PROP_MOTIF_WM_HINTS_ELEMENTS    /* nelements */
--                  );
-+                   );
- }
- 
- 
-@@ -146,247 +144,247 @@ no_border( Display *dpy, Window w)
-  * Return the window and context handles.
-  */
- static int
--make_window(struct AVFormatContext * const s,
--            egl_display_env_t * const de,
-+make_window(struct AVFormatContext *const s,
-+            egl_display_env_t *const de,
-             Display *dpy, EGLDisplay egl_dpy, const char *name,
-             Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet)
- {
--   int scrnum = DefaultScreen( dpy );
--   XSetWindowAttributes attr;
--   unsigned long mask;
--   Window root = RootWindow( dpy, scrnum );
--   Window win;
--   EGLContext ctx;
--   const int fullscreen = de->fullscreen;
--   EGLConfig config;
--   int x = de->window_x;
--   int y = de->window_y;
--   int width = de->window_width ? de->window_width : 1280;
--   int height = de->window_height ? de->window_height : 720;
--
--
--   if (fullscreen) {
--      int scrnum = DefaultScreen(dpy);
--
--      x = 0; y = 0;
--      width = DisplayWidth(dpy, scrnum);
--      height = DisplayHeight(dpy, scrnum);
--   }
--
--   {
--      EGLint num_configs;
--      static const EGLint attribs[] = {
--         EGL_RED_SIZE, 1,
--         EGL_GREEN_SIZE, 1,
--         EGL_BLUE_SIZE, 1,
--         EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
--         EGL_NONE
--      };
--
--      if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) {
--         av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n");
--         return -1;
--      }
--   }
--
--   {
--      EGLint vid;
--      if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) {
--         av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n");
--         return -1;
--      }
--
--      {
--         XVisualInfo visTemplate = {
--            .visualid = vid,
--         };
--         int num_visuals;
--         XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask,
--                                               &visTemplate, &num_visuals);
--
--         /* window attributes */
--         attr.background_pixel = 0;
--         attr.border_pixel = 0;
--         attr.colormap = XCreateColormap( dpy, root, visinfo->visual, AllocNone);
--         attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask;
--         /* XXX this is a bad way to get a borderless window! */
--         mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask;
--
--         win = XCreateWindow( dpy, root, x, y, width, height,
--                              0, visinfo->depth, InputOutput,
--                              visinfo->visual, mask, &attr );
--         XFree(visinfo);
--      }
--   }
--
--   if (fullscreen)
--      no_border(dpy, win);
--
--   /* set hints and properties */
--   {
--      XSizeHints sizehints;
--      sizehints.x = x;
--      sizehints.y = y;
--      sizehints.width  = width;
--      sizehints.height = height;
--      sizehints.flags = USSize | USPosition;
--      XSetNormalHints(dpy, win, &sizehints);
--      XSetStandardProperties(dpy, win, name, name,
--                              None, (char **)NULL, 0, &sizehints);
--   }
--
--   eglBindAPI(EGL_OPENGL_ES_API);
--
--   {
--      static const EGLint ctx_attribs[] = {
--         EGL_CONTEXT_CLIENT_VERSION, 2,
--         EGL_NONE
--      };
--      ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs );
--      if (!ctx) {
--         av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
--         return -1;
--      }
--   }
--
--
--   XMapWindow(dpy, win);
--
--   {
--      EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL);
--      if (!surf) {
--         av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n");
--         return -1;
--      }
--
--      if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) {
--         av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
--         return -1;
--      }
--
--      *winRet = win;
--      *ctxRet = ctx;
--      *surfRet = surf;
--   }
--
--   return 0;
-+    int scrnum = DefaultScreen(dpy);
-+    XSetWindowAttributes attr;
-+    unsigned long mask;
-+    Window root = RootWindow(dpy, scrnum);
-+    Window win;
-+    EGLContext ctx;
-+    const int fullscreen = de->fullscreen;
-+    EGLConfig config;
-+    int x = de->window_x;
-+    int y = de->window_y;
-+    int width = de->window_width ? de->window_width : 1280;
-+    int height = de->window_height ? de->window_height : 720;
-+
-+
-+    if (fullscreen) {
-+        int scrnum = DefaultScreen(dpy);
-+
-+        x = 0; y = 0;
-+        width = DisplayWidth(dpy, scrnum);
-+        height = DisplayHeight(dpy, scrnum);
-+    }
-+
-+    {
-+        EGLint num_configs;
-+        static const EGLint attribs[] = {
-+            EGL_RED_SIZE, 1,
-+            EGL_GREEN_SIZE, 1,
-+            EGL_BLUE_SIZE, 1,
-+            EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
-+            EGL_NONE
-+        };
-+
-+        if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) {
-+            av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n");
-+            return -1;
-+        }
-+    }
-+
-+    {
-+        EGLint vid;
-+        if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) {
-+            av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n");
-+            return -1;
-+        }
-+
-+        {
-+            XVisualInfo visTemplate = {
-+                .visualid = vid,
-+            };
-+            int num_visuals;
-+            XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask,
-+                                                  &visTemplate, &num_visuals);
-+
-+            /* window attributes */
-+            attr.background_pixel = 0;
-+            attr.border_pixel = 0;
-+            attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone);
-+            attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask;
-+            /* XXX this is a bad way to get a borderless window! */
-+            mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask;
-+
-+            win = XCreateWindow(dpy, root, x, y, width, height,
-+                                0, visinfo->depth, InputOutput,
-+                                visinfo->visual, mask, &attr);
-+            XFree(visinfo);
-+        }
-+    }
-+
-+    if (fullscreen)
-+        no_border(dpy, win);
-+
-+    /* set hints and properties */
-+    {
-+        XSizeHints sizehints;
-+        sizehints.x = x;
-+        sizehints.y = y;
-+        sizehints.width  = width;
-+        sizehints.height = height;
-+        sizehints.flags = USSize | USPosition;
-+        XSetNormalHints(dpy, win, &sizehints);
-+        XSetStandardProperties(dpy, win, name, name,
-+                               None, (char **)NULL, 0, &sizehints);
-+    }
-+
-+    eglBindAPI(EGL_OPENGL_ES_API);
-+
-+    {
-+        static const EGLint ctx_attribs[] = {
-+            EGL_CONTEXT_CLIENT_VERSION, 2,
-+            EGL_NONE
-+        };
-+        ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs);
-+        if (!ctx) {
-+            av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
-+            return -1;
-+        }
-+    }
-+
-+
-+    XMapWindow(dpy, win);
-+
-+    {
-+        EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL);
-+        if (!surf) {
-+            av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n");
-+            return -1;
-+        }
-+
-+        if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) {
-+            av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
-+            return -1;
-+        }
-+
-+        *winRet = win;
-+        *ctxRet = ctx;
-+        *surfRet = surf;
-+    }
-+
-+    return 0;
- }
- 
- static GLint
--compile_shader(struct AVFormatContext * const avctx, GLenum target, const char *source)
-+compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source)
- {
--   GLuint s = glCreateShader(target);
-+    GLuint s = glCreateShader(target);
- 
--   if (s == 0) {
--      av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n");
--      return 0;
--   }
-+    if (s == 0) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n");
-+        return 0;
-+    }
- 
--   glShaderSource(s, 1, (const GLchar **) &source, NULL);
--   glCompileShader(s);
-+    glShaderSource(s, 1, (const GLchar **)&source, NULL);
-+    glCompileShader(s);
- 
--   {
--      GLint ok;
--      glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
-+    {
-+        GLint ok;
-+        glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
- 
--      if (!ok) {
--         GLchar *info;
--         GLint size;
-+        if (!ok) {
-+            GLchar *info;
-+            GLint size;
- 
--         glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size);
--         info = malloc(size);
-+            glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size);
-+            info = malloc(size);
- 
--         glGetShaderInfoLog(s, size, NULL, info);
--         av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source);
-+            glGetShaderInfoLog(s, size, NULL, info);
-+            av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source);
- 
--         return 0;
--      }
--   }
-+            return 0;
-+        }
-+    }
- 
--   return s;
-+    return s;
- }
- 
--static GLuint link_program(struct AVFormatContext * const s, GLint vs, GLint fs)
-+static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs)
- {
--   GLuint prog = glCreateProgram();
--
--   if (prog == 0) {
--      av_log(s, AV_LOG_ERROR, "Failed to create program\n");
--      return 0;
--   }
--
--   glAttachShader(prog, vs);
--   glAttachShader(prog, fs);
--   glLinkProgram(prog);
--
--   {
--      GLint ok;
--      glGetProgramiv(prog, GL_LINK_STATUS, &ok);
--      if (!ok) {
--         /* Some drivers return a size of 1 for an empty log.  This is the size
--          * of a log that contains only a terminating NUL character.
--          */
--         GLint size;
--         GLchar *info = NULL;
--         glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size);
--         if (size > 1) {
--            info = malloc(size);
--            glGetProgramInfoLog(prog, size, NULL, info);
--         }
-+    GLuint prog = glCreateProgram();
- 
--         av_log(s, AV_LOG_ERROR, "Failed to link: %s\n",
--                 (info != NULL) ? info : "<empty log>");
--         return 0;
--      }
--   }
-+    if (prog == 0) {
-+        av_log(s, AV_LOG_ERROR, "Failed to create program\n");
-+        return 0;
-+    }
-+
-+    glAttachShader(prog, vs);
-+    glAttachShader(prog, fs);
-+    glLinkProgram(prog);
-+
-+    {
-+        GLint ok;
-+        glGetProgramiv(prog, GL_LINK_STATUS, &ok);
-+        if (!ok) {
-+            /* Some drivers return a size of 1 for an empty log.  This is the size
-+             * of a log that contains only a terminating NUL character.
-+             */
-+            GLint size;
-+            GLchar *info = NULL;
-+            glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size);
-+            if (size > 1) {
-+                info = malloc(size);
-+                glGetProgramInfoLog(prog, size, NULL, info);
-+            }
- 
--   return prog;
-+            av_log(s, AV_LOG_ERROR, "Failed to link: %s\n",
-+                   (info != NULL) ? info : "<empty log>");
-+            return 0;
-+        }
-+    }
-+
-+    return prog;
- }
- 
- static int
--gl_setup(struct AVFormatContext * const s)
-+gl_setup(struct AVFormatContext *const s)
- {
--   const char *vs =
--      "attribute vec4 pos;\n"
--      "varying vec2 texcoord;\n"
--      "\n"
--      "void main() {\n"
--      "  gl_Position = pos;\n"
--      "  texcoord.x = (pos.x + 1.0) / 2.0;\n"
--      "  texcoord.y = (-pos.y + 1.0) / 2.0;\n"
--      "}\n";
--   const char *fs =
--      "#extension GL_OES_EGL_image_external : enable\n"
--      "precision mediump float;\n"
--      "uniform samplerExternalOES s;\n"
--      "varying vec2 texcoord;\n"
--      "void main() {\n"
--      "  gl_FragColor = texture2D(s, texcoord);\n"
--      "}\n";
--
--   GLuint vs_s;
--   GLuint fs_s;
--   GLuint prog;
--
--   if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) ||
--       !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) ||
--       !(prog = link_program(s, vs_s, fs_s)))
--      return -1;
--
--   glUseProgram(prog);
--
--   {
--      static const float verts[] = {
--         -1, -1,
--         1, -1,
--         1, 1,
--         -1, 1,
--      };
--      glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts);
--   }
--
--   glEnableVertexAttribArray(0);
--   return 0;
-+    const char *vs =
-+        "attribute vec4 pos;\n"
-+        "varying vec2 texcoord;\n"
-+        "\n"
-+        "void main() {\n"
-+        "  gl_Position = pos;\n"
-+        "  texcoord.x = (pos.x + 1.0) / 2.0;\n"
-+        "  texcoord.y = (-pos.y + 1.0) / 2.0;\n"
-+        "}\n";
-+    const char *fs =
-+        "#extension GL_OES_EGL_image_external : enable\n"
-+        "precision mediump float;\n"
-+        "uniform samplerExternalOES s;\n"
-+        "varying vec2 texcoord;\n"
-+        "void main() {\n"
-+        "  gl_FragColor = texture2D(s, texcoord);\n"
-+        "}\n";
-+
-+    GLuint vs_s;
-+    GLuint fs_s;
-+    GLuint prog;
-+
-+    if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) ||
-+        !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) ||
-+        !(prog = link_program(s, vs_s, fs_s)))
-+        return -1;
-+
-+    glUseProgram(prog);
-+
-+    {
-+        static const float verts[] = {
-+            -1, -1,
-+            1, -1,
-+            1,  1,
-+            -1,  1,
-+        };
-+        glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts);
-+    }
-+
-+    glEnableVertexAttribArray(0);
-+    return 0;
- }
- 
- static int egl_vout_write_trailer(AVFormatContext *s)
-@@ -400,12 +398,12 @@ static int egl_vout_write_trailer(AVFormatContext *s)
- 
- static int egl_vout_write_header(AVFormatContext *s)
- {
--    const AVCodecParameters * const par = s->streams[0]->codecpar;
-+    const AVCodecParameters *const par = s->streams[0]->codecpar;
- 
- #if TRACE_ALL
-     av_log(s, AV_LOG_INFO, "%s\n", __func__);
- #endif
--    if (   s->nb_streams > 1
-+    if (s->nb_streams > 1
-         || par->codec_type != AVMEDIA_TYPE_VIDEO
-         || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
-         av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
-@@ -416,10 +414,10 @@ static int egl_vout_write_header(AVFormatContext *s)
- }
- 
- 
--static int do_display(AVFormatContext * const s, egl_display_env_t * const de, AVFrame * const frame)
-+static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame)
- {
--    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
--    egl_aux_t * da = NULL;
-+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0];
-+    egl_aux_t *da = NULL;
-     unsigned int i;
- 
- #if TRACE_ALL
-@@ -440,26 +438,26 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A
- 
-     if (da->texture == 0) {
-         EGLint attribs[50];
--        EGLint * a = attribs;
-+        EGLint *a = attribs;
-         int i, j;
-         static const EGLint anames[] = {
--           EGL_DMA_BUF_PLANE0_FD_EXT,
--           EGL_DMA_BUF_PLANE0_OFFSET_EXT,
--           EGL_DMA_BUF_PLANE0_PITCH_EXT,
--           EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
--           EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
--           EGL_DMA_BUF_PLANE1_FD_EXT,
--           EGL_DMA_BUF_PLANE1_OFFSET_EXT,
--           EGL_DMA_BUF_PLANE1_PITCH_EXT,
--           EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT,
--           EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
--           EGL_DMA_BUF_PLANE2_FD_EXT,
--           EGL_DMA_BUF_PLANE2_OFFSET_EXT,
--           EGL_DMA_BUF_PLANE2_PITCH_EXT,
--           EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT,
--           EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT,
-+            EGL_DMA_BUF_PLANE0_FD_EXT,
-+            EGL_DMA_BUF_PLANE0_OFFSET_EXT,
-+            EGL_DMA_BUF_PLANE0_PITCH_EXT,
-+            EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
-+            EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
-+            EGL_DMA_BUF_PLANE1_FD_EXT,
-+            EGL_DMA_BUF_PLANE1_OFFSET_EXT,
-+            EGL_DMA_BUF_PLANE1_PITCH_EXT,
-+            EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT,
-+            EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
-+            EGL_DMA_BUF_PLANE2_FD_EXT,
-+            EGL_DMA_BUF_PLANE2_OFFSET_EXT,
-+            EGL_DMA_BUF_PLANE2_PITCH_EXT,
-+            EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT,
-+            EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT,
-         };
--        const EGLint * b = anames;
-+        const EGLint *b = anames;
- 
-         *a++ = EGL_WIDTH;
-         *a++ = av_frame_cropped_width(frame);
-@@ -470,8 +468,8 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A
- 
-         for (i = 0; i < desc->nb_layers; ++i) {
-             for (j = 0; j < desc->layers[i].nb_planes; ++j) {
--                const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
--                const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
-+                const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j;
-+                const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index;
-                 *a++ = *b++;
-                 *a++ = obj->fd;
-                 *a++ = *b++;
-@@ -479,13 +477,13 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A
-                 *a++ = *b++;
-                 *a++ = p->pitch;
-                 if (obj->format_modifier == 0) {
--                   b += 2;
-+                    b += 2;
-                 }
-                 else {
--                   *a++ = *b++;
--                   *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF);
--                   *a++ = *b++;
--                   *a++ = (EGLint)(obj->format_modifier >> 32);
-+                    *a++ = *b++;
-+                    *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF);
-+                    *a++ = *b++;
-+                    *a++ = (EGLint)(obj->format_modifier >> 32);
-                 }
-             }
-         }
-@@ -494,26 +492,26 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A
- 
- #if TRACE_ALL
-         for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) {
--           av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]);
-+            av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]);
-         }
- #endif
-         {
--           const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy,
--                                              EGL_NO_CONTEXT,
--                                              EGL_LINUX_DMA_BUF_EXT,
--                                              NULL, attribs);
--           if (!image) {
--              av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd);
--              return -1;
--           }
--
--           glGenTextures(1, &da->texture);
--           glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
--           glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
--           glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
--           glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
--
--           eglDestroyImageKHR(de->setup.egl_dpy, image);
-+            const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy,
-+                                                     EGL_NO_CONTEXT,
-+                                                     EGL_LINUX_DMA_BUF_EXT,
-+                                                     NULL, attribs);
-+            if (!image) {
-+                av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd);
-+                return -1;
-+            }
-+
-+            glGenTextures(1, &da->texture);
-+            glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
-+            glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-+            glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-+            glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
-+
-+            eglDestroyImageKHR(de->setup.egl_dpy, image);
-         }
- 
-         da->fd = desc->objects[0].fd;
-@@ -540,7 +538,7 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A
-                (long long)modifiers[1],
-                (long long)modifiers[2],
-                (long long)modifiers[3]
--               );
-+              );
- #endif
-     }
- 
-@@ -558,55 +556,55 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A
-     return 0;
- }
- 
--static void * display_thread(void * v)
-+static void* display_thread(void *v)
- {
--    AVFormatContext * const s = v;
--    egl_display_env_t * const de = s->priv_data;
-+    AVFormatContext *const s = v;
-+    egl_display_env_t *const de = s->priv_data;
- 
- #if TRACE_ALL
-     av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
- #endif
-     {
--       EGLint egl_major, egl_minor;
--
--       de->setup.dpy = XOpenDisplay(NULL);
--       if (!de->setup.dpy) {
--          av_log(s, AV_LOG_ERROR, "Couldn't open X display\n");
--          goto fail;
--       }
--
--       de->setup.egl_dpy = eglGetDisplay(de->setup.dpy);
--       if (!de->setup.egl_dpy) {
--          av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n");
--          goto fail;
--       }
--
--       if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) {
--           av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n");
--           goto fail;
--       }
--
--       av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor);
--
--       if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) {
--          av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n");
--          goto fail;
--       }
-+        EGLint egl_major, egl_minor;
-+
-+        de->setup.dpy = XOpenDisplay(NULL);
-+        if (!de->setup.dpy) {
-+            av_log(s, AV_LOG_ERROR, "Couldn't open X display\n");
-+            goto fail;
-+        }
-+
-+        de->setup.egl_dpy = eglGetDisplay(de->setup.dpy);
-+        if (!de->setup.egl_dpy) {
-+            av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n");
-+            goto fail;
-+        }
-+
-+        if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) {
-+            av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n");
-+            goto fail;
-+        }
-+
-+        av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor);
-+
-+        if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) {
-+            av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n");
-+            goto fail;
-+        }
-     }
- 
-     if (!de->window_width || !de->window_height) {
--       de->window_width = 1280;
--       de->window_height = 720;
-+        de->window_width = 1280;
-+        de->window_height = 720;
-     }
-     if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout",
-                     &de->setup.win, &de->setup.ctx, &de->setup.surf)) {
--       av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__);
--       goto fail;
-+        av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__);
-+        goto fail;
-     }
- 
-     if (gl_setup(s)) {
--       av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__);
--       goto fail;
-+        av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__);
-+        goto fail;
-     }
- 
- #if TRACE_ALL
-@@ -615,7 +613,7 @@ static void * display_thread(void * v)
-     sem_post(&de->display_start_sem);
- 
-     for (;;) {
--        AVFrame * frame;
-+        AVFrame *frame;
- 
-         while (sem_wait(&de->q_sem) != 0) {
-             av_assert0(errno == EINTR);
-@@ -653,9 +651,9 @@ fail:
- 
- static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
- {
--    const AVFrame * const src_frame = (AVFrame *)pkt->data;
--    AVFrame * frame;
--    egl_display_env_t * const de = s->priv_data;
-+    const AVFrame *const src_frame = (AVFrame *)pkt->data;
-+    AVFrame *frame;
-+    egl_display_env_t *const de = s->priv_data;
- 
- #if TRACE_ALL
-     av_log(s, AV_LOG_INFO, "%s\n", __func__);
-@@ -668,8 +666,7 @@ static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
-     else if (src_frame->format == AV_PIX_FMT_VAAPI) {
-         frame = av_frame_alloc();
-         frame->format = AV_PIX_FMT_DRM_PRIME;
--        if (av_hwframe_map(frame, src_frame, 0) != 0)
--        {
-+        if (av_hwframe_map(frame, src_frame, 0) != 0) {
-             av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
-             av_frame_free(&frame);
-             return AVERROR(EINVAL);
-@@ -682,12 +679,12 @@ static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
- 
-     // Really hacky sync
-     while (de->show_all && de->q_next) {
--       usleep(3000);
-+        usleep(3000);
-     }
- 
-     pthread_mutex_lock(&de->q_lock);
-     {
--        AVFrame * const t = de->q_next;
-+        AVFrame *const t = de->q_next;
-         de->q_next = frame;
-         frame = t;
-     }
-@@ -702,7 +699,7 @@ static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
- }
- 
- static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
--                          unsigned flags)
-+                                unsigned flags)
- {
-     av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags);
-     return AVERROR_PATCHWELCOME;
-@@ -713,7 +710,7 @@ static int egl_vout_control_message(AVFormatContext *s, int type, void *data, si
- #if TRACE_ALL
-     av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type);
- #endif
--    switch(type) {
-+    switch (type) {
-     case AV_APP_TO_DEV_WINDOW_REPAINT:
-         return 0;
-     default:
-@@ -723,14 +720,14 @@ static int egl_vout_control_message(AVFormatContext *s, int type, void *data, si
- }
- 
- // deinit is called if init fails so no need to clean up explicity here
--static int egl_vout_init(struct AVFormatContext * s)
-+static int egl_vout_init(struct AVFormatContext *s)
- {
--    egl_display_env_t * const de = s->priv_data;
-+    egl_display_env_t *const de = s->priv_data;
-     unsigned int i;
- 
-     av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
- 
--    de->setup = (struct egl_setup){0};
-+    de->setup = (struct egl_setup) { 0 };
- 
-     for (i = 0; i != 32; ++i) {
-         de->aux[i].fd = -1;
-@@ -744,8 +741,8 @@ static int egl_vout_init(struct AVFormatContext * s)
- 
-     sem_wait(&de->display_start_sem);
-     if (de->q_terminate) {
--       av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__);
--       return -1;
-+        av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__);
-+        return -1;
-     }
- 
-     av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
-@@ -753,9 +750,9 @@ static int egl_vout_init(struct AVFormatContext * s)
-     return 0;
- }
- 
--static void egl_vout_deinit(struct AVFormatContext * s)
-+static void egl_vout_deinit(struct AVFormatContext *s)
- {
--    egl_display_env_t * const de = s->priv_data;
-+    egl_display_env_t *const de = s->priv_data;
- 
-     av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
- 
-@@ -773,11 +770,11 @@ static void egl_vout_deinit(struct AVFormatContext * s)
- 
- #define OFFSET(x) offsetof(egl_display_env_t, x)
- static const AVOption options[] = {
--   { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
--   { "window_size",  "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
--   { "window_x",     "set window x offset",    OFFSET(window_x),     AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
--   { "window_y",     "set window y offset",    OFFSET(window_y),     AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
--   { "fullscreen",   "set fullscreen display", OFFSET(fullscreen),   AV_OPT_TYPE_BOOL,   {.i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
-+    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
-+    { "window_size",  "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
-+    { "window_x",     "set window x offset",    OFFSET(window_x),     AV_OPT_TYPE_INT,    { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
-+    { "window_y",     "set window y offset",    OFFSET(window_y),     AV_OPT_TYPE_INT,    { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
-+    { "fullscreen",   "set fullscreen display", OFFSET(fullscreen),   AV_OPT_TYPE_BOOL,   { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
-     { NULL }
- 
- };
-
-From a132ef51a831edfa36f52ba699922fdb06acd1b0 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 12 Dec 2022 16:49:43 +0000
-Subject: [PATCH 098/186] v4l2m2m: reporganise get_raw_format for loop logic
-
----
- libavcodec/v4l2_context.c | 16 +++++-----------
- 1 file changed, 5 insertions(+), 11 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 7031f3d3409d..79a31cf9300b 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -828,28 +828,22 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
-             return 0;
-     }
- 
--    for (;;) {
-+    for (;; ++fdesc.index) {
-         ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc);
-         if (ret)
-             return AVERROR(EINVAL);
- 
-         if (priv->pix_fmt != AV_PIX_FMT_NONE) {
--            if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) {
--                fdesc.index++;
-+            if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt))
-                 continue;
--            }
-         }
- 
-         pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
-         ret = v4l2_try_raw_format(ctx, pixfmt);
--        if (ret){
--            fdesc.index++;
--            continue;
-+        if (ret == 0) {
-+            *p = pixfmt;
-+            return 0;
-         }
--
--        *p = pixfmt;
--
--        return 0;
-     }
- 
-     return AVERROR(EINVAL);
-
-From 0189b1c3bb002b0385a419f4140371ea1ac4153c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 12 Dec 2022 17:49:12 +0000
-Subject: [PATCH 099/186] drm_vout: Set zpos on the plane we pick to ensure it
- is at the front
-
----
- libavdevice/drm_vout.c | 38 +++++++++++++++++++++++++++++++++-----
- 1 file changed, 33 insertions(+), 5 deletions(-)
-
-diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c
-index cfb33ce7c319..9bd9e04421d7 100644
---- a/libavdevice/drm_vout.c
-+++ b/libavdevice/drm_vout.c
-@@ -115,9 +115,11 @@ static int find_plane(struct AVFormatContext * const avctx,
- {
-    drmModePlaneResPtr planes;
-    drmModePlanePtr plane;
-+   drmModeObjectPropertiesPtr props = NULL;
-+   drmModePropertyPtr prop = NULL;
-    unsigned int i;
-    unsigned int j;
--   int ret = 0;
-+   int ret = -1;
- 
-    planes = drmModeGetPlaneResources(drmfd);
-    if (!planes)
-@@ -154,11 +156,37 @@ static int find_plane(struct AVFormatContext * const avctx,
-       break;
-    }
- 
--   if (i == planes->count_planes)
--      ret = -1;
-+   if (i == planes->count_planes) {
-+       ret = -1;
-+       goto fail;
-+   }
- 
--   drmModeFreePlaneResources(planes);
--   return ret;
-+    props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE);
-+    if (!props)
-+        goto fail;
-+    for (i = 0; i != props->count_props; ++i) {
-+        if (prop)
-+            drmModeFreeProperty(prop);
-+        prop = drmModeGetProperty(drmfd, props->props[i]);
-+        if (!prop)
-+            goto fail;
-+        if (strcmp("zpos", prop->name) == 0) {
-+            if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0)
-+                av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]);
-+            else
-+                av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n");
-+            break;
-+        }
-+    }
-+
-+    ret = 0;
-+fail:
-+    if (props)
-+        drmModeFreeObjectProperties(props);
-+    if (prop)
-+        drmModeFreeProperty(prop);
-+    drmModeFreePlaneResources(planes);
-+    return ret;
- }
- 
- static void da_uninit(drm_display_env_t * const de, drm_aux_t * da)
-
-From 386acb23dd6196fac68a39fa945a5b6b9c18c6a8 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 12 Dec 2022 17:51:46 +0000
-Subject: [PATCH 100/186] drm_vout: Only set modifier flag and pass modifiers
- if there are some
-
----
- libavdevice/drm_vout.c | 17 ++++++++++++-----
- 1 file changed, 12 insertions(+), 5 deletions(-)
-
-diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c
-index 9bd9e04421d7..a56adea86625 100644
---- a/libavdevice/drm_vout.c
-+++ b/libavdevice/drm_vout.c
-@@ -34,6 +34,7 @@
- 
- #include <xf86drm.h>
- #include <xf86drmMode.h>
-+#include <drm_fourcc.h>
- 
- #define TRACE_ALL 0
- 
-@@ -249,6 +250,7 @@ static int do_display(AVFormatContext * const s, drm_display_env_t * const de, A
-         uint32_t offsets[4] = {0};
-         uint64_t modifiers[4] = {0};
-         uint32_t bo_handles[4] = {0};
-+        int has_mods = 0;
-         int i, j, n;
- 
-         da->frame = frame;
-@@ -258,6 +260,9 @@ static int do_display(AVFormatContext * const s, drm_display_env_t * const de, A
-                 av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR);
-                 return -1;
-             }
-+            if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR &&
-+                desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID)
-+                has_mods = 1;
-         }
- 
-         n = 0;
-@@ -299,11 +304,13 @@ static int do_display(AVFormatContext * const s, drm_display_env_t * const de, A
- #endif
- 
-         if (drmModeAddFB2WithModifiers(de->drm_fd,
--                                         av_frame_cropped_width(frame),
--                                         av_frame_cropped_height(frame),
--                                         desc->layers[0].format, bo_handles,
--                                         pitches, offsets, modifiers,
--                                         &da->fb_handle, DRM_MODE_FB_MODIFIERS /** 0 if no mods */) != 0) {
-+                                       av_frame_cropped_width(frame),
-+                                       av_frame_cropped_height(frame),
-+                                       desc->layers[0].format, bo_handles,
-+                                       pitches, offsets,
-+                                       has_mods ? modifiers : NULL,
-+                                       &da->fb_handle,
-+                                       has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) {
-             av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR);
-             return -1;
-         }
-
-From 66cc08dbaec01e45af9c09a06829fcb2db58e30c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 12 Dec 2022 17:52:58 +0000
-Subject: [PATCH 101/186] drm_vout: Fix typo in error message
-
----
- libavdevice/drm_vout.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c
-index a56adea86625..351abf1d60aa 100644
---- a/libavdevice/drm_vout.c
-+++ b/libavdevice/drm_vout.c
-@@ -596,7 +596,7 @@ static int drm_vout_init(struct AVFormatContext * s)
-     sem_init(&de->q_sem_out, 0, 0);
-     if (pthread_create(&de->q_thread, NULL, display_thread, s)) {
-         rv = AVERROR(errno);
--        av_log(s, AV_LOG_ERROR, "Failed to creatye display thread: %s\n", av_err2str(rv));
-+        av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv));
-         goto fail_close;
-     }
- 
-
-From e11c24968da620816853eb0a7d33cb3e9488afb1 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 12 Dec 2022 18:00:41 +0000
-Subject: [PATCH 102/186] drm_vout: Add option to name the drm_module to use
-
----
- libavdevice/drm_vout.c | 8 +++++---
- 1 file changed, 5 insertions(+), 3 deletions(-)
-
-diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c
-index 351abf1d60aa..491e1dc60861 100644
---- a/libavdevice/drm_vout.c
-+++ b/libavdevice/drm_vout.c
-@@ -70,7 +70,9 @@ typedef struct drm_display_env_s
-     uint32_t con_id;
-     struct drm_setup setup;
-     enum AVPixelFormat avfmt;
-+
-     int show_all;
-+    const char * drm_module;
- 
-     unsigned int ano;
-     drm_aux_t aux[AUX_SIZE];
-@@ -569,7 +571,6 @@ static int drm_vout_init(struct AVFormatContext * s)
- {
-     drm_display_env_t * const de = s->priv_data;
-     int rv;
--    const char * drm_module = DRM_MODULE;
- 
-     av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
- 
-@@ -578,10 +579,10 @@ static int drm_vout_init(struct AVFormatContext * s)
-     de->setup = (struct drm_setup){0};
-     de->q_terminate = 0;
- 
--    if ((de->drm_fd = drmOpen(drm_module, NULL)) < 0)
-+    if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0)
-     {
-         rv = AVERROR(errno);
--        av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", drm_module, av_err2str(rv));
-+        av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv));
-         return rv;
-     }
- 
-@@ -641,6 +642,7 @@ static void drm_vout_deinit(struct AVFormatContext * s)
- #define OFFSET(x) offsetof(drm_display_env_t, x)
- static const AVOption options[] = {
-     { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
-+    { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
-     { NULL }
- };
- 
-
-From 397c9473723e936b86ff26fc5c0d5ba381874be3 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Dec 2022 13:01:00 +0000
-Subject: [PATCH 103/186] dmabufs: Rework to allow for non-CMA backends
-
----
- libavcodec/v4l2_req_dmabufs.c | 161 ++++++++++++++++++++++++----------
- 1 file changed, 116 insertions(+), 45 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
-index c4bbed18c680..1c3a5e861f0c 100644
---- a/libavcodec/v4l2_req_dmabufs.c
-+++ b/libavcodec/v4l2_req_dmabufs.c
-@@ -1,3 +1,4 @@
-+#include <stdatomic.h>
- #include <stdio.h>
- #include <stdlib.h>
+  * @return side data descriptor corresponding to a given side data type, NULL
+  *         when not available.
+diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c
+index 0847db09a08b..6365b7cf211b 100644
+--- a/libavutil/hwcontext_drm.c
++++ b/libavutil/hwcontext_drm.c
+@@ -21,6 +21,7 @@
+ #include <fcntl.h>
+ #include <sys/mman.h>
  #include <unistd.h>
-@@ -19,9 +20,21 @@
++#include <sys/ioctl.h>
  
- #define TRACE_ALLOC 0
- 
-+struct dmabufs_ctl;
-+struct dmabuf_h;
-+
-+struct dmabuf_fns {
-+    int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size);
-+    void (*buf_free)(struct dmabuf_h * dh);
-+    int (*ctl_new)(struct dmabufs_ctl * dbsc);
-+    void (*ctl_free)(struct dmabufs_ctl * dbsc);
-+};
-+
- struct dmabufs_ctl {
-     int fd;
-     size_t page_size;
-+    void * v;
-+    const struct dmabuf_fns * fns;
- };
- 
- struct dmabuf_h {
-@@ -29,6 +42,8 @@ struct dmabuf_h {
-     size_t size;
-     size_t len;
-     void * mapptr;
-+    void * v;
-+    const struct dmabuf_fns * fns;
- };
- 
- #if TRACE_ALLOC
-@@ -88,15 +103,8 @@ struct dmabuf_h * dmabuf_import(int fd, size_t size)
- struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size)
- {
-     struct dmabuf_h * dh;
--    struct dma_heap_allocation_data data = {
--        .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
--        .fd = 0,
--        .fd_flags = O_RDWR,
--        .heap_flags = 0
--    };
--
-     if (old != NULL) {
--        if (old->size == data.len) {
-+        if (old->size >= size) {
-             return old;
-         }
-         dmabuf_free(old);
-@@ -106,24 +114,16 @@ struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * ol
-         (dh = malloc(sizeof(*dh))) == NULL)
-         return NULL;
- 
--    while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
--        int err = errno;
--        request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
--                (uint64_t)data.len,
--                dbsc->fd,
--                err,
--                strerror(err));
--        if (err == EINTR)
--            continue;
--        goto fail;
--    }
--
-     *dh = (struct dmabuf_h){
--        .fd = data.fd,
--        .size = (size_t)data.len,
--        .mapptr = MAP_FAILED
-+        .fd = -1,
-+        .mapptr = MAP_FAILED,
-+        .fns = dbsc->fns
-     };
- 
-+    if (dh->fns->buf_alloc(dbsc, dh, size) != 0)
-+        goto fail;
-+
-+
- #if TRACE_ALLOC
-     ++total_bufs;
-     total_size += dh->size;
-@@ -220,8 +220,6 @@ void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len)
-     dh->len = len;
- }
- 
--
--
- void dmabuf_free(struct dmabuf_h * dh)
- {
-     if (!dh)
-@@ -233,20 +231,63 @@ void dmabuf_free(struct dmabuf_h * dh)
-     request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
+ /* This was introduced in version 4.6. And may not exist all without an
+  * optional package. So to prevent a hard dependency on needing the Linux
+@@ -31,6 +32,7 @@
  #endif
  
--    if (dh->mapptr != MAP_FAILED)
-+    dh->fns->buf_free(dh);
-+
-+    if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL)
-         munmap(dh->mapptr, dh->size);
--    while (close(dh->fd) == -1 && errno == EINTR)
--        /* loop */;
-+    if (dh->fd != -1)
-+        while (close(dh->fd) == -1 && errno == EINTR)
-+            /* loop */;
-     free(dh);
- }
+ #include <drm.h>
++#include <libdrm/drm_fourcc.h>
+ #include <xf86drm.h>
  
--struct dmabufs_ctl * dmabufs_ctl_new(void)
-+static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns)
+ #include "avassert.h"
+@@ -40,6 +42,9 @@
+ #include "imgutils.h"
+ #include "mem.h"
+ 
++#if CONFIG_SAND
++#include "libavutil/rpi_sand_fns.h"
++#endif
+ 
+ static void drm_device_free(AVHWDeviceContext *hwdev)
  {
--    struct dmabufs_ctl * dbsc = malloc(sizeof(*dbsc));
-+    struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc));
+@@ -54,6 +59,11 @@ static int drm_device_create(AVHWDeviceContext *hwdev, const char *device,
+     AVDRMDeviceContext *hwctx = hwdev->hwctx;
+     drmVersionPtr version;
  
-     if (!dbsc)
-         return NULL;
- 
-+    dbsc->fd = -1;
-+    dbsc->fns = fns;
-+    dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
-+
-+    if (fns->ctl_new(dbsc) != 0)
-+        goto fail;
-+
-+    return dbsc;
-+
-+fail:
-+    free(dbsc);
-+    return NULL;
-+}
-+
-+static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc)
-+{
-+    request_debug(NULL, "Free dmabuf ctl\n");
-+
-+    dbsc->fns->ctl_free(dbsc);
-+
-+    free(dbsc);
-+}
-+
-+void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc)
-+{
-+    struct dmabufs_ctl * const dbsc = *pDbsc;
-+
-+    if (!dbsc)
-+        return;
-+    *pDbsc = NULL;
-+
-+    dmabufs_ctl_free(dbsc);
-+}
-+
-+//-----------------------------------------------------------------------------
-+//
-+// Alloc dmabuf via CMA
-+
-+static int ctl_cma_new(struct dmabufs_ctl * dbsc)
-+{
-     while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 &&
-            errno == EINTR)
-         /* Loop */;
-@@ -258,31 +299,61 @@ struct dmabufs_ctl * dmabufs_ctl_new(void)
-         if (dbsc->fd == -1) {
-             request_log("Unable to open either %s or %s\n",
-                     DMABUF_NAME1, DMABUF_NAME2);
--            goto fail;
-+            return -1;
-         }
-     }
-+    return 0;
-+}
- 
--    dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
--
--    return dbsc;
-+static void ctl_cma_free(struct dmabufs_ctl * dbsc)
-+{
-+    if (dbsc->fd != -1)
-+        while (close(dbsc->fd) == -1 && errno == EINTR)
-+            /* loop */;
- 
--fail:
--    free(dbsc);
--    return NULL;
- }
- 
--void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc)
-+static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size)
- {
--    struct dmabufs_ctl * const dbsc = *pDbsc;
-+    struct dma_heap_allocation_data data = {
-+        .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
-+        .fd = 0,
-+        .fd_flags = O_RDWR,
-+        .heap_flags = 0
-+    };
- 
--    if (!dbsc)
--        return;
--    *pDbsc = NULL;
-+    while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
-+        int err = errno;
-+        request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
-+                (uint64_t)data.len,
-+                dbsc->fd,
-+                err,
-+                strerror(err));
-+        if (err == EINTR)
-+            continue;
-+        return -err;
++    if (device == NULL) {
++        hwctx->fd = -1;
++        return 0;
 +    }
- 
--    while (close(dbsc->fd) == -1 && errno == EINTR)
--        /* loop */;
-+    dh->fd = data.fd;
-+    dh->size = (size_t)data.len;
-+    return 0;
-+}
- 
--    free(dbsc);
-+static void buf_cma_free(struct dmabuf_h * dh)
-+{
-+    // Nothing needed
- }
- 
-+static const struct dmabuf_fns dmabuf_cma_fns = {
-+    .buf_alloc  = buf_cma_alloc,
-+    .buf_free   = buf_cma_free,
-+    .ctl_new    = ctl_cma_new,
-+    .ctl_free   = ctl_cma_free,
-+};
 +
-+struct dmabufs_ctl * dmabufs_ctl_new(void)
-+{
-+    request_debug(NULL, "Dmabufs using CMA\n");;
-+    return dmabufs_ctl_new2(&dmabuf_cma_fns);
-+}
+     hwctx->fd = open(device, O_RDWR);
+     if (hwctx->fd < 0)
+         return AVERROR(errno);
+@@ -140,6 +150,8 @@ static int drm_map_frame(AVHWFramesContext *hwfc,
+     if (flags & AV_HWFRAME_MAP_WRITE)
+         mmap_prot |= PROT_WRITE;
  
-
-From c788ac962a1a4221d3fe9ab2b0d19ebf43964519 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Dec 2022 13:07:58 +0000
-Subject: [PATCH 104/186] dmabufs: Use unref rather than deleet on cmabufs_ctl
-
----
- libavcodec/v4l2_req_dmabufs.c  | 12 +++++++++++-
- libavcodec/v4l2_req_dmabufs.h  |  3 ++-
- libavcodec/v4l2_request_hevc.c |  4 ++--
- 3 files changed, 15 insertions(+), 4 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
-index 1c3a5e861f0c..acc0366e7630 100644
---- a/libavcodec/v4l2_req_dmabufs.c
-+++ b/libavcodec/v4l2_req_dmabufs.c
-@@ -31,6 +31,7 @@ struct dmabuf_fns {
- };
++    if (dst->format == AV_PIX_FMT_NONE)
++        dst->format = hwfc->sw_format;
+ #if HAVE_LINUX_DMA_BUF_H
+     if (flags & AV_HWFRAME_MAP_READ)
+         map->sync_flags |= DMA_BUF_SYNC_READ;
+@@ -186,6 +198,23 @@ static int drm_map_frame(AVHWFramesContext *hwfc,
  
- struct dmabufs_ctl {
-+    atomic_int ref_count;
-     int fd;
-     size_t page_size;
-     void * v;
-@@ -271,7 +272,7 @@ static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc)
-     free(dbsc);
- }
+     dst->width  = src->width;
+     dst->height = src->height;
++    dst->crop_top    = src->crop_top;
++    dst->crop_bottom = src->crop_bottom;
++    dst->crop_left   = src->crop_left;
++    dst->crop_right  = src->crop_right;
++
++#if CONFIG_SAND
++    // Rework for sand frames
++    if (av_rpi_is_sand_frame(dst)) {
++        // As it stands the sand formats hold stride2 in linesize[3]
++        // linesize[0] & [1] contain stride1 which is always 128 for everything we do
++        // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1]
++        dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier);
++        dst->linesize[0] = 128;
++        dst->linesize[1] = 128;
++        // *** Are we sure src->height is actually what we want ???
++    }
++#endif
  
--void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc)
-+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc)
+     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+                                 &drm_unmap_frame, map);
+@@ -207,16 +236,29 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx,
+                                     enum AVHWFrameTransferDirection dir,
+                                     enum AVPixelFormat **formats)
  {
-     struct dmabufs_ctl * const dbsc = *pDbsc;
+-    enum AVPixelFormat *pix_fmts;
++    enum AVPixelFormat *p;
  
-@@ -279,9 +280,18 @@ void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc)
-         return;
-     *pDbsc = NULL;
+-    pix_fmts = av_malloc_array(2, sizeof(*pix_fmts));
+-    if (!pix_fmts)
++    p = *formats = av_malloc_array(3, sizeof(*p));
++    if (!p)
+         return AVERROR(ENOMEM);
  
-+    if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0)
-+        return;
+-    pix_fmts[0] = ctx->sw_format;
+-    pix_fmts[1] = AV_PIX_FMT_NONE;
++    // **** Offer native sand too ????
++    *p++ =
++#if CONFIG_SAND
++        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
++            AV_PIX_FMT_YUV420P :
++        ctx->sw_format == AV_PIX_FMT_RPI4_10 ?
++            AV_PIX_FMT_YUV420P10LE :
++#endif
++            ctx->sw_format;
 +
-     dmabufs_ctl_free(dbsc);
++#if CONFIG_SAND
++    if (ctx->sw_format == AV_PIX_FMT_RPI4_10 ||
++        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128)
++        *p++ = AV_PIX_FMT_NV12;
++#endif
+ 
+-    *formats = pix_fmts;
++    *p = AV_PIX_FMT_NONE;
+     return 0;
  }
  
-+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc)
-+{
-+    atomic_fetch_add(&dbsc->ref_count, 1);
-+    return dbsc;
-+}
-+
- //-----------------------------------------------------------------------------
- //
- // Alloc dmabuf via CMA
-diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h
-index c1d3d8c8d751..381ba2708da6 100644
---- a/libavcodec/v4l2_req_dmabufs.h
-+++ b/libavcodec/v4l2_req_dmabufs.h
-@@ -7,7 +7,8 @@ struct dmabufs_ctl;
- struct dmabuf_h;
+@@ -232,18 +274,62 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc,
+     map = av_frame_alloc();
+     if (!map)
+         return AVERROR(ENOMEM);
+-    map->format = dst->format;
  
- struct dmabufs_ctl * dmabufs_ctl_new(void);
--void dmabufs_ctl_delete(struct dmabufs_ctl ** const pdbsc);
-+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc);
-+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc);
- 
- // Need not preserve old contents
- // On NULL return old buffer is freed
-diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-index 767ecb036ad2..db7ed13b6d76 100644
---- a/libavcodec/v4l2_request_hevc.c
-+++ b/libavcodec/v4l2_request_hevc.c
-@@ -105,7 +105,7 @@ static int v4l2_request_hevc_uninit(AVCodecContext *avctx)
-     mediabufs_ctl_unref(&ctx->mbufs);
-     media_pool_delete(&ctx->mpool);
-     pollqueue_unref(&ctx->pq);
--    dmabufs_ctl_delete(&ctx->dbufs);
-+    dmabufs_ctl_unref(&ctx->dbufs);
-     devscan_delete(&ctx->devscan);
- 
-     decode_q_uninit(&ctx->decode_q);
-@@ -324,7 +324,7 @@ fail3:
- fail2:
-     pollqueue_unref(&ctx->pq);
- fail1:
--    dmabufs_ctl_delete(&ctx->dbufs);
-+    dmabufs_ctl_unref(&ctx->dbufs);
- fail0:
-     devscan_delete(&ctx->devscan);
-     return ret;
-
-From 95d64bce0aaeb0f2e1b2bcd15e8345349efe295b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Dec 2022 14:21:40 +0000
-Subject: [PATCH 105/186] egl_vout: Remove redundant & completely broken debug
-
----
- libavdevice/egl_vout.c | 25 -------------------------
- 1 file changed, 25 deletions(-)
-
-diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c
-index a52cabb082e9..afc7afd13ea0 100644
---- a/libavdevice/egl_vout.c
-+++ b/libavdevice/egl_vout.c
-@@ -515,31 +515,6 @@ static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVF
-         }
- 
-         da->fd = desc->objects[0].fd;
--
--#if 0
--        av_log(s, AV_LOG_INFO, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
--               " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
--               av_frame_cropped_width(frame),
--               av_frame_cropped_height(frame),
--               desc->layers[0].format,
--               bo_plane_handles[0],
--               bo_plane_handles[1],
--               bo_plane_handles[2],
--               bo_plane_handles[3],
--               pitches[0],
--               pitches[1],
--               pitches[2],
--               pitches[3],
--               offsets[0],
--               offsets[1],
--               offsets[2],
--               offsets[3],
--               (long long)modifiers[0],
--               (long long)modifiers[1],
--               (long long)modifiers[2],
--               (long long)modifiers[3]
--              );
--#endif
-     }
- 
-     glClearColor(0.5, 0.5, 0.5, 0.5);
-
-From b79c28a0644c4d8b83c616dab6005ca862ec99df Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Dec 2022 16:12:12 +0000
-Subject: [PATCH 106/186] v4l2m2m: Use offset from querybuf rather than always
- 0
-
----
- libavcodec/v4l2_buffers.c | 4 +++-
- libavcodec/v4l2_buffers.h | 3 ++-
- 2 files changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 9ef2f40e3991..5ca58ea5935b 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -379,7 +379,7 @@ static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
- 
-     for (int i = 0; i < avbuf->num_planes; i++) {
-         layer->planes[i].object_index = i;
--        layer->planes[i].offset = 0;
-+        layer->planes[i].offset = avbuf->plane_info[i].offset;
-         layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
-     }
- 
-@@ -934,6 +934,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
- 
-         if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
-             avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
-+            avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset;
- 
-             if (want_mmap)
-                 avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
-@@ -941,6 +942,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-                                                buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
-         } else {
-             avbuf->plane_info[i].length = avbuf->buf.length;
-+            avbuf->plane_info[i].offset = 0;
- 
-             if (want_mmap)
-                 avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
-diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
-index 1ac32c5989f1..d91d5d1dd07b 100644
---- a/libavcodec/v4l2_buffers.h
-+++ b/libavcodec/v4l2_buffers.h
-@@ -66,7 +66,8 @@ typedef struct V4L2Buffer {
- 
-     /* keep track of the mmap address and mmap length */
-     struct V4L2Plane_info {
--        int bytesperline;
-+        size_t bytesperline;
-+        size_t offset;
-         void * mm_addr;
-         size_t length;
-     } plane_info[VIDEO_MAX_PLANES];
-
-From 920d901527cbe17accc42659db548229318ac855 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Dec 2022 17:57:27 +0000
-Subject: [PATCH 107/186] v4l2m2m: Fix crash if init errors out before setting
- avctx
-
----
- libavcodec/v4l2_m2m.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index 1e30d15fd866..ac6bae0dc327 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -278,7 +278,7 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
- 
-     av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n");
- 
--    if (av_codec_is_decoder(s->avctx->codec))
-+    if (s->avctx && av_codec_is_decoder(s->avctx->codec))
-         av_packet_unref(&s->buf_pkt);
- 
-     if (s->fd >= 0) {
-
-From c339fbc23b3d0698e29301d7740cba39c9993fbc Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Dec 2022 18:10:30 +0000
-Subject: [PATCH 108/186] v4l2_buffers: Add and use ctx_to_m2mctx + error debug
-
----
- libavcodec/v4l2_buffers.c | 22 +++++++++++++++-------
- 1 file changed, 15 insertions(+), 7 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 5ca58ea5935b..e28ef2d1e802 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -41,11 +41,16 @@
- #define USEC_PER_SEC 1000000
- static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
- 
-+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx)
-+{
-+    return V4L2_TYPE_IS_OUTPUT(ctx->type) ?
-+        container_of(ctx, V4L2m2mContext, output) :
-+        container_of(ctx, V4L2m2mContext, capture);
-+}
-+
- static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf)
- {
--    return V4L2_TYPE_IS_OUTPUT(buf->context->type) ?
--        container_of(buf->context, V4L2m2mContext, output) :
--        container_of(buf->context, V4L2m2mContext, capture);
-+    return ctx_to_m2mctx(buf->context);
- }
- 
- static inline AVCodecContext *logger(const V4L2Buffer * const buf)
-@@ -883,6 +888,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-     int ret, i;
-     V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
-     AVBufferRef * bufref;
-+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
- 
-     *pbufref = NULL;
-     if (avbuf == NULL)
-@@ -910,7 +916,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-         avbuf->buf.m.planes = avbuf->planes;
-     }
- 
--    ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
-+    ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf);
-     if (ret < 0)
++    // Map to default
++    map->format = AV_PIX_FMT_NONE;
+     err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
+     if (err)
          goto fail;
  
-@@ -969,10 +975,12 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-     }
- 
-     if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
--        if (buf_to_m2mctx(avbuf)->output_drm) {
-+        if (s->output_drm) {
-             ret = v4l2_buffer_export_drm(avbuf);
--            if (ret)
--                    goto fail;
-+            if (ret) {
-+                av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n");
-+                goto fail;
-+            }
-         }
-     }
- 
-
-From 5959f5fb7ef1d1cab901393035e7a6ac31d0d78b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Dec 2022 18:53:22 +0000
-Subject: [PATCH 109/186] v4l2m2m: Add ability to use cma alloced dmabufs as
- well as v4l2 mmap
-
----
- libavcodec/Makefile       |  2 +-
- libavcodec/v4l2_buffers.c | 65 ++++++++++++++++++++++++++-------------
- libavcodec/v4l2_buffers.h |  2 ++
- libavcodec/v4l2_m2m.c     |  6 +++-
- libavcodec/v4l2_m2m.h     |  4 +++
- libavcodec/v4l2_m2m_dec.c | 16 ++++++++++
- 6 files changed, 71 insertions(+), 24 deletions(-)
-
-diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index 11f183c9b9ba..8b1d66983423 100644
---- a/libavcodec/Makefile
-+++ b/libavcodec/Makefile
-@@ -170,7 +170,7 @@ OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
- OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
- OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
- OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\
--                                          weak_link.o
-+                                          weak_link.o v4l2_req_dmabufs.o
- OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\
- 					  v4l2_req_devscan.o weak_link.o
- OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index e28ef2d1e802..8d80d1978830 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -36,6 +36,7 @@
- #include "v4l2_context.h"
- #include "v4l2_buffers.h"
- #include "v4l2_m2m.h"
-+#include "v4l2_req_dmabufs.h"
- #include "weak_link.h"
- 
- #define USEC_PER_SEC 1000000
-@@ -477,33 +478,46 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data)
-     av_buffer_unref(&bufref);
- }
- 
-+static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i)
-+{
-+    return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length;
-+}
+-    map->width  = dst->width;
+-    map->height = dst->height;
++#if 0
++    av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__,
++           hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE,
++           map->width, map->height,
++           map->linesize[0],
++           map->linesize[1],
++           map->linesize[2],
++           map->linesize[3],
++           dst->width, dst->height,
++           dst->linesize[0],
++           dst->linesize[1],
++           dst->linesize[2]);
++#endif
++#if CONFIG_SAND
++    if (av_rpi_is_sand_frame(map)) {
++        // Preserve crop - later ffmpeg code assumes that we have in that it
++        // overwrites any crop that we create with the old values
++        const unsigned int w = FFMIN(dst->width, map->width);
++        const unsigned int h = FFMIN(dst->height, map->height);
 +
- static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
- {
--    struct v4l2_exportbuffer expbuf;
-     int i, ret;
-+    const V4L2m2mContext * const s = buf_to_m2mctx(avbuf);
- 
-     for (i = 0; i < avbuf->num_planes; i++) {
--        memset(&expbuf, 0, sizeof(expbuf));
--
--        expbuf.index = avbuf->buf.index;
--        expbuf.type = avbuf->buf.type;
--        expbuf.plane = i;
-+        int dma_fd = -1;
-+        const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i);
++        map->crop_top = 0;
++        map->crop_bottom = 0;
++        map->crop_left = 0;
++        map->crop_right = 0;
 +
-+        if (s->db_ctl != NULL) {
-+            if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL)
-+                return AVERROR(ENOMEM);
-+            dma_fd = dmabuf_fd(avbuf->dmabuf[i]);
-+            if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type))
-+                avbuf->buf.m.planes[i].m.fd = dma_fd;
-+            else
-+                avbuf->buf.m.fd = dma_fd;
++        if (av_rpi_sand_to_planar_frame(dst, map) != 0)
++        {
++            av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
++            err = AVERROR(EINVAL);
++            goto fail;
 +        }
-+        else {
-+            struct v4l2_exportbuffer expbuf;
-+            memset(&expbuf, 0, sizeof(expbuf));
- 
--        ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_EXPBUF, &expbuf);
--        if (ret < 0)
--            return AVERROR(errno);
-+            expbuf.index = avbuf->buf.index;
-+            expbuf.type = avbuf->buf.type;
-+            expbuf.plane = i;
- 
--        if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) {
--            /* drm frame */
--            avbuf->drm_frame.objects[i].size = avbuf->buf.m.planes[i].length;
--            avbuf->drm_frame.objects[i].fd = expbuf.fd;
--            avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
--        } else {
--            /* drm frame */
--            avbuf->drm_frame.objects[0].size = avbuf->buf.length;
--            avbuf->drm_frame.objects[0].fd = expbuf.fd;
--            avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
-+            ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf);
-+            if (ret < 0)
-+                return AVERROR(errno);
-+            dma_fd = expbuf.fd;
-         }
 +
-+        avbuf->drm_frame.objects[i].size = blen;
-+        avbuf->drm_frame.objects[i].fd = dma_fd;
-+        avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
-     }
- 
-     return 0;
-@@ -870,9 +884,16 @@ static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
-             munmap(p->mm_addr, p->length);
-     }
- 
--    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
--        if (avbuf->drm_frame.objects[i].fd != -1)
--            close(avbuf->drm_frame.objects[i].fd);
-+    if (avbuf->dmabuf[0] == NULL) {
-+        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
-+            if (avbuf->drm_frame.objects[i].fd != -1)
-+                close(avbuf->drm_frame.objects[i].fd);
-+        }
++        dst->width = w;
++        dst->height = h;
 +    }
-+    else {
-+        for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) {
-+            dmabuf_free(avbuf->dmabuf[i]);
-+        }
-     }
- 
-     av_buffer_unref(&avbuf->ref_buf);
-diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h
-index d91d5d1dd07b..444ad94b145e 100644
---- a/libavcodec/v4l2_buffers.h
-+++ b/libavcodec/v4l2_buffers.h
-@@ -46,6 +46,7 @@ enum V4L2Buffer_status {
-  */
- struct V4L2Context;
- struct ff_weak_link_client;
-+struct dmabuf_h;
- 
- typedef struct V4L2Buffer {
-     /* each buffer needs to have a reference to its context
-@@ -80,6 +81,7 @@ typedef struct V4L2Buffer {
- 
-     enum V4L2Buffer_status status;
- 
-+    struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here
- } V4L2Buffer;
- 
- /**
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index ac6bae0dc327..f802687b1bb2 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -34,6 +34,7 @@
- #include "v4l2_context.h"
- #include "v4l2_fmt.h"
- #include "v4l2_m2m.h"
-+#include "v4l2_req_dmabufs.h"
- 
- static void
- xlat_init(xlat_track_t * const x)
-@@ -75,7 +76,7 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
- 
-     s->capture.done = s->output.done = 0;
-     s->capture.name = "capture";
--    s->capture.buf_mem = V4L2_MEMORY_MMAP;
-+    s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
-     s->output.name = "output";
-     s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP;
-     atomic_init(&s->refcount, 0);
-@@ -94,12 +95,14 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
-     if (v4l2_mplane_video(&cap)) {
-         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
-         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
-+        s->output.format.type = s->output.type;
-         return 0;
-     }
- 
-     if (v4l2_splane_video(&cap)) {
-         s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-         s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-+        s->output.format.type = s->output.type;
-         return 0;
-     }
- 
-@@ -293,6 +296,7 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
- 
-     ff_v4l2_context_release(&s->output);
- 
-+    dmabufs_ctl_unref(&s->db_ctl);
-     close(s->fd);
-     s->fd = -1;
- 
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 26a7161042b5..0f41f94694d3 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -71,6 +71,8 @@ typedef struct xlat_track_s {
-     V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
- } xlat_track_t;
- 
-+struct dmabufs_ctl;
-+
- typedef struct V4L2m2mContext {
-     char devname[PATH_MAX];
-     int fd;
-@@ -124,6 +126,7 @@ typedef struct V4L2m2mContext {
-     /* Quirks */
-     unsigned int quirks;
- 
-+    struct dmabufs_ctl * db_ctl;
- } V4L2m2mContext;
- 
- typedef struct V4L2m2mPriv {
-@@ -134,6 +137,7 @@ typedef struct V4L2m2mPriv {
- 
-     int num_output_buffers;
-     int num_capture_buffers;
-+    const char * dmabuf_alloc;
-     enum AVPixelFormat pix_fmt;
- } V4L2m2mPriv;
- 
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 47b2735f8252..4d170572980e 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -41,6 +41,7 @@
- #include "v4l2_context.h"
- #include "v4l2_m2m.h"
- #include "v4l2_fmt.h"
-+#include "v4l2_req_dmabufs.h"
- 
- // Pick 64 for max last count - that is >1sec at 60fps
- #define STATS_LAST_COUNT_MAX 64
-@@ -896,6 +897,20 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-         s->output_drm = 0;
-     }
- 
-+    s->db_ctl = NULL;
-+    if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) {
-+        if (strcmp(priv->dmabuf_alloc, "cma") == 0)
-+            s->db_ctl = dmabufs_ctl_new();
-+        else {
-+            av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc);
-+            return AVERROR(EINVAL);
-+        }
-+        if (!s->db_ctl) {
-+            av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc);
-+            return AVERROR(ENOMEM);
-+        }
++    else
++#endif
++    {
++        // Kludge mapped h/w s.t. frame_copy works
++        map->width  = dst->width;
++        map->height = dst->height;
++        err = av_frame_copy(dst, map);
 +    }
+ 
+-    err = av_frame_copy(dst, map);
+     if (err)
++    {
++        av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__);
+         goto fail;
++    }
+ 
+     err = 0;
+ fail:
+@@ -258,7 +344,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc,
+     int err;
+ 
+     if (src->width > hwfc->width || src->height > hwfc->height)
++    {
++        av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height);
+         return AVERROR(EINVAL);
++    }
+ 
+     map = av_frame_alloc();
+     if (!map)
+diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
+index 6317ab7d0efa..7632ba7fa29f 100644
+--- a/libavutil/hwcontext_vulkan.c
++++ b/libavutil/hwcontext_vulkan.c
+@@ -72,6 +72,14 @@
+ #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
+ #endif
+ 
++// Sometimes missing definitions
++#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME
++#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264"
++#endif
++#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME
++#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265"
++#endif
 +
-     s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
-     if (!s->device_ref) {
-         ret = AVERROR(ENOMEM);
-@@ -1000,6 +1015,7 @@ static const AVOption options[] = {
-     { "num_capture_buffers", "Number of buffers in the capture context",
-         OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS },
-     { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS },
-+    { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS },
-     { NULL},
+ typedef struct VulkanDevicePriv {
+     /**
+      * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it.
+diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
+index 1c0bcf2232be..c1d7dde0418d 100644
+--- a/libavutil/pixdesc.c
++++ b/libavutil/pixdesc.c
+@@ -2791,6 +2791,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
+         },
+         .flags = AV_PIX_FMT_FLAG_PLANAR,
+     },
++    [AV_PIX_FMT_SAND128] = {
++        .name = "sand128",
++        .nb_components = 3,
++        .log2_chroma_w = 1,
++        .log2_chroma_h = 1,
++        .comp = {
++            { 0, 1, 0, 0, 8 },        /* Y */
++            { 1, 2, 0, 0, 8 },        /* U */
++            { 1, 2, 1, 0, 8 },        /* V */
++        },
++        .flags = 0,
++    },
++    [AV_PIX_FMT_SAND64_10] = {
++        .name = "sand64_10",
++        .nb_components = 3,
++        .log2_chroma_w = 1,
++        .log2_chroma_h = 1,
++        .comp = {
++            { 0, 2, 0, 0, 10 },        /* Y */
++            { 1, 4, 0, 0, 10 },        /* U */
++            { 1, 4, 2, 0, 10 },        /* V */
++        },
++        .flags = 0,
++    },
++    [AV_PIX_FMT_SAND64_16] = {
++        .name = "sand64_16",
++        .nb_components = 3,
++        .log2_chroma_w = 1,
++        .log2_chroma_h = 1,
++        .comp = {
++            { 0, 2, 0, 0, 16 },        /* Y */
++            { 1, 4, 0, 0, 16 },        /* U */
++            { 1, 4, 2, 0, 16 },        /* V */
++        },
++        .flags = 0,
++    },
++    [AV_PIX_FMT_RPI4_8] = {
++        .name = "rpi4_8",
++        .flags = AV_PIX_FMT_FLAG_HWACCEL,
++    },
++    [AV_PIX_FMT_RPI4_10] = {
++        .name = "rpi4_10",
++        .flags = AV_PIX_FMT_FLAG_HWACCEL,
++    },
  };
  
-
-From 778d48c8652016879f09d7ac4aff7592f9ea9d13 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Dec 2022 19:05:47 +0000
-Subject: [PATCH 110/186] testfilt: Skeleton of hw filter test code
-
----
- pi-util/testfilt.py | 83 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 83 insertions(+)
- create mode 100755 pi-util/testfilt.py
-
+ static const char * const color_range_names[] = {
+diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
+index a7f50e169038..f3f5a38584d2 100644
+--- a/libavutil/pixfmt.h
++++ b/libavutil/pixfmt.h
+@@ -380,6 +380,14 @@ enum AVPixelFormat {
+ 
+     AV_PIX_FMT_Y210BE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian
+     AV_PIX_FMT_Y210LE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian
++// RPI - not on ifdef so can be got at by calling progs
++// #define so code that uses this can know it is there
++#define AVUTIL_HAVE_PIX_FMT_SAND 1
++    AV_PIX_FMT_SAND128,    ///< 4:2:0  8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
++    AV_PIX_FMT_SAND64_10,  ///< 4:2:0 10-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
++    AV_PIX_FMT_SAND64_16,  ///< 4:2:0 16-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
++    AV_PIX_FMT_RPI4_8,
++    AV_PIX_FMT_RPI4_10,
+ 
+     AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined
+     AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined
+diff --git a/libavutil/rpi_sand_fn_pw.c b/libavutil/rpi_sand_fn_pw.c
+new file mode 100644
+index 000000000000..0d5d203dc3cd
+--- /dev/null
++++ b/libavutil/rpi_sand_fn_pw.c
+@@ -0,0 +1,227 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++// * Included twice from rpi_sand_fn with different PW
++
++#define STRCAT(x,y) x##y
++
++#if PW == 1
++#define pixel uint8_t
++#define FUNC(f) STRCAT(f, 8)
++#elif PW == 2
++#define pixel uint16_t
++#define FUNC(f) STRCAT(f, 16)
++#else
++#error Unexpected PW
++#endif
++
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// unclipped
++void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x = _x;
++    const unsigned int w = _w;
++    const unsigned int mask = stride1 - 1;
++
++#if PW == 1 && HAVE_SAND_ASM
++    if (_x == 0) {
++        ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride,
++                                     src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if ((x & ~mask) == ((x + w) & ~mask)) {
++        // All in one sand stripe
++        const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) {
++            memcpy(dst, p, w);
++        }
++    }
++    else
++    {
++        // Two+ stripe
++        const unsigned int sstride = stride1 * stride2;
++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        const uint8_t * p2 = p1 + sstride - (x & mask);
++        const unsigned int w1 = stride1 - (x & mask);
++        const unsigned int w3 = (x + w) & mask;
++        const unsigned int w2 = w - (w1 + w3);
++
++        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) {
++            unsigned int j;
++            const uint8_t * p = p2;
++            uint8_t * d = dst;
++            memcpy(d, p1, w1);
++            d += w1;
++            for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) {
++                memcpy(d, p, stride1);
++            }
++            memcpy(d, p, w3);
++        }
++    }
++}
++
++// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V)
++
++void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x = _x * 2;
++    const unsigned int w = _w * 2;
++    const unsigned int mask = stride1 - 1;
++
++#if PW == 1 && HAVE_SAND_ASM
++    if (_x == 0) {
++        ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v,
++                                     src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if ((x & ~mask) == ((x + w) & ~mask)) {
++        // All in one sand stripe
++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) {
++            pixel * du = (pixel *)dst_u;
++            pixel * dv = (pixel *)dst_v;
++            const pixel * p = (const pixel *)p1;
++            for (unsigned int k = 0; k < w; k += 2 * PW) {
++                *du++ = *p++;
++                *dv++ = *p++;
++            }
++        }
++    }
++    else
++    {
++        // Two+ stripe
++        const unsigned int sstride = stride1 * stride2;
++        const unsigned int sstride_p = (sstride - stride1) / PW;
++
++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        const uint8_t * p2 = p1 + sstride - (x & mask);
++        const unsigned int w1 = stride1 - (x & mask);
++        const unsigned int w3 = (x + w) & mask;
++        const unsigned int w2 = w - (w1 + w3);
++
++        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) {
++            unsigned int j;
++            const pixel * p = (const pixel *)p1;
++            pixel * du = (pixel *)dst_u;
++            pixel * dv = (pixel *)dst_v;
++            for (unsigned int k = 0; k < w1; k += 2 * PW) {
++                *du++ = *p++;
++                *dv++ = *p++;
++            }
++            for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) {
++                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
++                    *du++ = *p++;
++                    *dv++ = *p++;
++                }
++            }
++            for (unsigned int k = 0; k < w3; k += 2 * PW) {
++                *du++ = *p++;
++                *dv++ = *p++;
++            }
++        }
++    }
++}
++
++void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c,
++                             unsigned int stride1, unsigned int stride2,
++                             const uint8_t * src_u, const unsigned int src_stride_u,
++                             const uint8_t * src_v, const unsigned int src_stride_v,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x = _x * 2;
++    const unsigned int w = _w * 2;
++    const unsigned int mask = stride1 - 1;
++    if ((x & ~mask) == ((x + w) & ~mask)) {
++        // All in one sand stripe
++        uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) {
++            const pixel * su = (const pixel *)src_u;
++            const pixel * sv = (const pixel *)src_v;
++            pixel * p = (pixel *)p1;
++            for (unsigned int k = 0; k < w; k += 2 * PW) {
++                *p++ = *su++;
++                *p++ = *sv++;
++            }
++        }
++    }
++    else
++    {
++        // Two+ stripe
++        const unsigned int sstride = stride1 * stride2;
++        const unsigned int sstride_p = (sstride - stride1) / PW;
++
++        const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        const uint8_t * p2 = p1 + sstride - (x & mask);
++        const unsigned int w1 = stride1 - (x & mask);
++        const unsigned int w3 = (x + w) & mask;
++        const unsigned int w2 = w - (w1 + w3);
++
++        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) {
++            unsigned int j;
++            const pixel * su = (const pixel *)src_u;
++            const pixel * sv = (const pixel *)src_v;
++            pixel * p = (pixel *)p1;
++            for (unsigned int k = 0; k < w1; k += 2 * PW) {
++                *p++ = *su++;
++                *p++ = *sv++;
++            }
++            for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) {
++                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
++                    *p++ = *su++;
++                    *p++ = *sv++;
++                }
++            }
++            for (unsigned int k = 0; k < w3; k += 2 * PW) {
++                *p++ = *su++;
++                *p++ = *sv++;
++            }
++        }
++    }
++}
++
++
++#undef pixel
++#undef STRCAT
++#undef FUNC
++
+diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
+new file mode 100644
+index 000000000000..2e19dd3a7b84
+--- /dev/null
++++ b/libavutil/rpi_sand_fns.c
+@@ -0,0 +1,447 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#include "config.h"
++#include <stdint.h>
++#include <string.h>
++#include "rpi_sand_fns.h"
++#include "avassert.h"
++#include "frame.h"
++
++#if ARCH_ARM && HAVE_NEON
++#include "libavutil/arm/cpu.h"
++#include "libavutil/arm/rpi_sand_neon.h"
++#define HAVE_SAND_ASM 1
++#elif ARCH_AARCH64 && HAVE_NEON
++#include "libavutil/aarch64/cpu.h"
++#include "libavutil/aarch64/rpi_sand_neon.h"
++#define HAVE_SAND_ASM 1
++#else
++#define HAVE_SAND_ASM 0
++#endif
++
++#define PW 1
++#include "rpi_sand_fn_pw.c"
++#undef PW
++
++#define PW 2
++#include "rpi_sand_fn_pw.c"
++#undef PW
++
++#if 1
++// Simple round
++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
++{
++    const unsigned int rnd = (1 << shr) >> 1;
++    const uint16_t * src = (const uint16_t *)_src;
++
++    for (; n != 0; --n) {
++        *dst++ = (*src++ + rnd) >> shr;
++    }
++}
++#else
++// Dithered variation
++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
++{
++    unsigned int rnd = (1 << shr) >> 1;
++    const unsigned int mask = ((1 << shr) - 1);
++    const uint16_t * src = (const uint16_t *)_src;
++
++    for (; n != 0; --n) {
++        rnd = *src++ + (rnd & mask);
++        *dst++ = rnd >> shr;
++    }
++}
++#endif
++
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// unclipped
++// _x & _w in pixels, strides in bytes
++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
++    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
++    const unsigned int x1 = ((_x + _w) / 3) * 4;
++    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
++    const unsigned int mask = stride1 - 1;
++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++    if (_x == 0 && have_neon(av_get_cpu_flags())) {
++        ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if (x0 == x1) {
++        // *******************
++        // Partial single word xfer
++        return;
++    }
++
++    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
++    {
++        unsigned int x = x0;
++        const uint32_t * p = (const uint32_t *)p0;
++        uint16_t * d = (uint16_t *)dst;
++
++        if (xskip0 != 0) {
++            const uint32_t p3 = *p++;
++
++            if (xskip0 == 1)
++                *d++ = (p3 >> 10) & 0x3ff;
++            *d++ = (p3 >> 20) & 0x3ff;
++
++            if (((x += 4) & mask) == 0)
++                p += slice_inc;
++        }
++
++        while (x != x1) {
++            const uint32_t p3 = *p++;
++            *d++ = p3 & 0x3ff;
++            *d++ = (p3 >> 10) & 0x3ff;
++            *d++ = (p3 >> 20) & 0x3ff;
++
++            if (((x += 4) & mask) == 0)
++                p += slice_inc;
++        }
++
++        if (xrem1 != 0) {
++            const uint32_t p3 = *p;
++
++            *d++ = p3 & 0x3ff;
++            if (xrem1 == 2)
++                *d++ = (p3 >> 10) & 0x3ff;
++        }
++    }
++}
++
++
++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word
++    const unsigned int xskip0 = _x - (x0 >> 3) * 3;
++    const unsigned int x1 = ((_x + _w) / 3) * 8;
++    const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3;
++    const unsigned int mask = stride1 - 1;
++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++    if (_x == 0 && have_neon(av_get_cpu_flags())) {
++        ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v,
++                                       src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if (x0 == x1) {
++        // *******************
++        // Partial single word xfer
++        return;
++    }
++
++    for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1)
++    {
++        unsigned int x = x0;
++        const uint32_t * p = (const uint32_t *)p0;
++        uint16_t * du = (uint16_t *)dst_u;
++        uint16_t * dv = (uint16_t *)dst_v;
++
++        if (xskip0 != 0) {
++            const uint32_t p3a = *p++;
++            const uint32_t p3b = *p++;
++
++            if (xskip0 == 1)
++            {
++                *du++ = (p3a >> 20) & 0x3ff;
++                *dv++ = (p3b >>  0) & 0x3ff;
++            }
++            *du++ = (p3b >> 10) & 0x3ff;
++            *dv++ = (p3b >> 20) & 0x3ff;
++
++            if (((x += 8) & mask) == 0)
++                p += slice_inc;
++        }
++
++        while (x != x1) {
++            const uint32_t p3a = *p++;
++            const uint32_t p3b = *p++;
++
++            *du++ = p3a & 0x3ff;
++            *dv++ = (p3a >> 10) & 0x3ff;
++            *du++ = (p3a >> 20) & 0x3ff;
++            *dv++ = p3b & 0x3ff;
++            *du++ = (p3b >> 10) & 0x3ff;
++            *dv++ = (p3b >> 20) & 0x3ff;
++
++            if (((x += 8) & mask) == 0)
++                p += slice_inc;
++        }
++
++        if (xrem1 != 0) {
++            const uint32_t p3a = *p++;
++            const uint32_t p3b = *p++;
++
++            *du++ = p3a & 0x3ff;
++            *dv++ = (p3a >> 10) & 0x3ff;
++            if (xrem1 == 2)
++            {
++                *du++ = (p3a >> 20) & 0x3ff;
++                *dv++ = p3b & 0x3ff;
++            }
++        }
++    }
++}
++
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// single lose bottom 2 bits truncation
++// _x & _w in pixels, strides in bytes
++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
++    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
++    const unsigned int x1 = ((_x + _w) / 3) * 4;
++    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
++    const unsigned int mask = stride1 - 1;
++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++    if (_x == 0) {
++        ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if (x0 == x1) {
++        // *******************
++        // Partial single word xfer
++        return;
++    }
++
++    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
++    {
++        unsigned int x = x0;
++        const uint32_t * p = (const uint32_t *)p0;
++        uint8_t * d = dst;
++
++        if (xskip0 != 0) {
++            const uint32_t p3 = *p++;
++
++            if (xskip0 == 1)
++                *d++ = (p3 >> 12) & 0xff;
++            *d++ = (p3 >> 22) & 0xff;
++
++            if (((x += 4) & mask) == 0)
++                p += slice_inc;
++        }
++
++        while (x != x1) {
++            const uint32_t p3 = *p++;
++            *d++ = (p3 >> 2) & 0xff;
++            *d++ = (p3 >> 12) & 0xff;
++            *d++ = (p3 >> 22) & 0xff;
++
++            if (((x += 4) & mask) == 0)
++                p += slice_inc;
++        }
++
++        if (xrem1 != 0) {
++            const uint32_t p3 = *p;
++
++            *d++ = (p3 >> 2) & 0xff;
++            if (xrem1 == 2)
++                *d++ = (p3 >> 12) & 0xff;
++        }
++    }
++}
++
++
++
++// w/h in pixels
++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
++                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
++                         unsigned int w, unsigned int h, const unsigned int shr)
++{
++    const unsigned int n = dst_stride1 / 2;
++    unsigned int j;
++
++    // This is true for our current layouts
++    av_assert0(dst_stride1 == src_stride1);
++
++    // As we have the same stride1 for src & dest and src is wider than dest
++    // then if we loop on src we can always write contiguously to dest
++    // We make no effort to copy an exact width - round up to nearest src stripe
++    // as we will always have storage in dest for that
++
++#if ARCH_ARM && HAVE_NEON
++    if (shr == 3 && src_stride1 == 128) {
++        for (j = 0; j + n < w; j += dst_stride1) {
++            uint8_t * d = dst + j * dst_stride2;
++            const uint8_t * s1 = src + j * 2 * src_stride2;
++            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
++
++            ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h);
++        }
++    }
++    else
++#endif
++    {
++        for (j = 0; j + n < w; j += dst_stride1) {
++            uint8_t * d = dst + j * dst_stride2;
++            const uint8_t * s1 = src + j * 2 * src_stride2;
++            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
++
++            for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) {
++                cpy16_to_8(d, s1, n, shr);
++                cpy16_to_8(d + n, s2, n, shr);
++            }
++        }
++    }
++
++    // Fix up a trailing dest half stripe
++    if (j < w) {
++        uint8_t * d = dst + j * dst_stride2;
++        const uint8_t * s1 = src + j * 2 * src_stride2;
++
++        for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) {
++            cpy16_to_8(d, s1, n, shr);
++        }
++    }
++}
++
++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
++{
++    const int w = av_frame_cropped_width(src);
++    const int h = av_frame_cropped_height(src);
++    const int x = src->crop_left;
++    const int y = src->crop_top;
++
++    // We will crop as part of the conversion
++    dst->crop_top = 0;
++    dst->crop_left = 0;
++    dst->crop_bottom = 0;
++    dst->crop_right = 0;
++
++    switch (src->format){
++        case AV_PIX_FMT_SAND128:
++        case AV_PIX_FMT_RPI4_8:
++            switch (dst->format){
++                case AV_PIX_FMT_YUV420P:
++                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y, w, h);
++                    av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
++                                             dst->data[2], dst->linesize[2],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x/2, y/2,  w/2, h/2);
++                    break;
++                case AV_PIX_FMT_NV12:
++                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y, w, h);
++                    av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x/2, y/2, w, h/2);
++                    break;
++                default:
++                    return -1;
++            }
++            break;
++        case AV_PIX_FMT_SAND64_10:
++            switch (dst->format){
++                case AV_PIX_FMT_YUV420P10:
++                    av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x*2, y, w*2, h);
++                    av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1],
++                                             dst->data[2], dst->linesize[2],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y/2,  w, h/2);
++                    break;
++                default:
++                    return -1;
++            }
++            break;
++        case AV_PIX_FMT_RPI4_10:
++            switch (dst->format){
++                case AV_PIX_FMT_YUV420P10:
++                    av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y, w, h);
++                    av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
++                                             dst->data[2], dst->linesize[2],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x/2, y/2, w/2, h/2);
++                    break;
++                case AV_PIX_FMT_NV12:
++                    av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y, w, h);
++                    av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x/2, y/2, w, h/2);
++                    break;
++                default:
++                    return -1;
++            }
++            break;
++        default:
++            return -1;
++    }
++
++    return av_frame_copy_props(dst, src);
++}
+diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h
+new file mode 100644
+index 000000000000..f7ba62ff7380
+--- /dev/null
++++ b/libavutil/rpi_sand_fns.h
+@@ -0,0 +1,188 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#ifndef AVUTIL_RPI_SAND_FNS_H
++#define AVUTIL_RPI_SAND_FNS_H
++
++#include "libavutil/frame.h"
++
++// For all these fns _x & _w are measured as coord * PW
++// For the C fns coords are in chroma pels (so luma / 2)
++// Strides are in bytes
++
++void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++void av_rpi_planar_to_sand_c8(uint8_t * dst_c,
++                             unsigned int stride1, unsigned int stride2,
++                             const uint8_t * src_u, const unsigned int src_stride_u,
++                             const uint8_t * src_v, const unsigned int src_stride_v,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++void av_rpi_planar_to_sand_c16(uint8_t * dst_c,
++                             unsigned int stride1, unsigned int stride2,
++                             const uint8_t * src_u, const unsigned int src_stride_u,
++                             const uint8_t * src_v, const unsigned int src_stride_v,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++// w/h in pixels
++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
++                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
++                         unsigned int w, unsigned int h, const unsigned int shr);
++
++
++// dst must contain required pixel format & allocated data buffers
++// Cropping on the src buffer will be honoured and dst crop will be set to zero
++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src);
++
++
++static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame)
++{
++#ifdef RPI_ZC_SAND128_ONLY
++    // If we are sure we only only support 128 byte sand formats replace the
++    // var with a constant which should allow for better optimisation
++    return 128;
++#else
++    return frame->linesize[0];
++#endif
++}
++
++static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame)
++{
++    return frame->linesize[3];
++}
++
++
++static inline int av_rpi_is_sand_format(const int format)
++{
++    return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10);
++}
++
++static inline int av_rpi_is_sand_frame(const AVFrame * const frame)
++{
++    return av_rpi_is_sand_format(frame->format);
++}
++
++static inline int av_rpi_is_sand8_frame(const AVFrame * const frame)
++{
++    return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8);
++}
++
++static inline int av_rpi_is_sand16_frame(const AVFrame * const frame)
++{
++    return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16);
++}
++
++static inline int av_rpi_is_sand30_frame(const AVFrame * const frame)
++{
++    return (frame->format == AV_PIX_FMT_RPI4_10);
++}
++
++static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame)
++{
++    return av_rpi_is_sand8_frame(frame) ? 0 : 1;
++}
++
++// If x is measured in bytes (not pixels) then this works for sand64_16 as
++// well as sand128 - but in the general case we work that out
++
++static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y)
++{
++    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
++    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
++    const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame);
++    const unsigned int x1 = x & (stride1 - 1);
++    const unsigned int x2 = x ^ x1;
++
++    return x1 + stride1 * y + stride2 * x2;
++}
++
++static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c)
++{
++    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
++    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
++    const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1);
++    const unsigned int x1 = x & (stride1 - 1);
++    const unsigned int x2 = x ^ x1;
++
++    return x1 + stride1 * y_c + stride2 * x2;
++}
++
++static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y)
++{
++    return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y);
++}
++
++static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y)
++{
++    return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y);
++}
++
++#endif
++
+diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c
+index 20a25033cba2..ebbb5e04fc17 100644
+--- a/libswscale/aarch64/rgb2rgb.c
++++ b/libswscale/aarch64/rgb2rgb.c
+@@ -57,6 +57,12 @@ void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2,
+ void ff_deinterleave_bytes_neon(const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
+                                 int width, int height, int srcStride,
+                                 int dst1Stride, int dst2Stride);
++void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv);
++void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv);
+ 
+ av_cold void rgb2rgb_init_aarch64(void)
+ {
+@@ -66,5 +72,7 @@ av_cold void rgb2rgb_init_aarch64(void)
+         ff_rgb24toyv12  = rgb24toyv12;
+         interleaveBytes = ff_interleave_bytes_neon;
+         deinterleaveBytes = ff_deinterleave_bytes_neon;
++        ff_rgb24toyv12 = ff_rgb24toyv12_aarch64;
++        ff_bgr24toyv12 = ff_bgr24toyv12_aarch64;
+     }
+ }
+diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
+index 1382e00261a9..a2bab11b34de 100644
+--- a/libswscale/aarch64/rgb2rgb_neon.S
++++ b/libswscale/aarch64/rgb2rgb_neon.S
+@@ -296,3 +296,359 @@ function ff_deinterleave_bytes_neon, export=1
+ 0:
+         ret
+ endfunc
++
++// Expand rgb2 into r0+r1/g0+g1/b0+b1
++.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2
++        uxtl            \r0\().8h, \r2\().8b
++        uxtl            \g0\().8h, \g2\().8b
++        uxtl            \b0\().8h, \b2\().8b
++
++        uxtl2           \r1\().8h, \r2\().16b
++        uxtl2           \g1\().8h, \g2\().16b
++        uxtl2           \b1\().8h, \b2\().16b
++.endm
++
++// Expand rgb2 into r0+r1/g0+g1/b0+b1
++// and pick every other el to put back into rgb2 for chroma
++.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2
++        XRGB3Y          \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2
++
++        bic             \r2\().8h, #0xff, LSL #8
++        bic             \g2\().8h, #0xff, LSL #8
++        bic             \b2\().8h, #0xff, LSL #8
++.endm
++
++.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2
++        smull           \d0\().4s, \s0\().4h, \c0
++        smlal           \d0\().4s, \s1\().4h, \c1
++        smlal           \d0\().4s, \s2\().4h, \c2
++        smull2          \d1\().4s, \s0\().8h, \c0
++        smlal2          \d1\().4s, \s1\().8h, \c1
++        smlal2          \d1\().4s, \s2\().8h, \c2
++.endm
++
++// d0 may be s0
++// s0, s2 corrupted
++.macro SHRN_Y d0, s0, s1, s2, s3, k128h
++        shrn            \s0\().4h, \s0\().4s, #12
++        shrn2           \s0\().8h, \s1\().4s, #12
++        add             \s0\().8h, \s0\().8h, \k128h\().8h     // +128 (>> 3 = 16)
++        sqrshrun        \d0\().8b, \s0\().8h, #3
++        shrn            \s2\().4h, \s2\().4s, #12
++        shrn2           \s2\().8h, \s3\().4s, #12
++        add             \s2\().8h, \s2\().8h, \k128h\().8h
++        sqrshrun2       \d0\().16b, v28.8h, #3
++.endm
++
++.macro SHRN_C d0, s0, s1, k128b
++        shrn            \s0\().4h, \s0\().4s, #14
++        shrn2           \s0\().8h, \s1\().4s, #14
++        sqrshrn         \s0\().8b, \s0\().8h, #1
++        add             \d0\().8b, \s0\().8b, \k128b\().8b     // +128
++.endm
++
++.macro STB2V s0, n, a
++        st1             {\s0\().b}[(\n+0)], [\a], #1
++        st1             {\s0\().b}[(\n+1)], [\a], #1
++.endm
++
++.macro STB4V s0, n, a
++        STB2V           \s0, (\n+0), \a
++        STB2V           \s0, (\n+2), \a
++.endm
++
++
++// void ff_rgb24toyv12_aarch64(
++//              const uint8_t *src,             // x0
++//              uint8_t *ydst,                  // x1
++//              uint8_t *udst,                  // x2
++//              uint8_t *vdst,                  // x3
++//              int width,                      // w4
++//              int height,                     // w5
++//              int lumStride,                  // w6
++//              int chromStride,                // w7
++//              int srcStr,                     // [sp, #0]
++//              int32_t *rgb2yuv);              // [sp, #8]
++
++function ff_rgb24toyv12_aarch64, export=1
++        ldr             x15, [sp, #8]
++        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
++        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
++        ld3             {v3.s, v4.s, v5.s}[2], [x15]
++        mov             v6.16b, v3.16b
++        mov             v3.16b, v5.16b
++        mov             v5.16b, v6.16b
++        b               99f
++endfunc
++
++// void ff_bgr24toyv12_aarch64(
++//              const uint8_t *src,             // x0
++//              uint8_t *ydst,                  // x1
++//              uint8_t *udst,                  // x2
++//              uint8_t *vdst,                  // x3
++//              int width,                      // w4
++//              int height,                     // w5
++//              int lumStride,                  // w6
++//              int chromStride,                // w7
++//              int srcStr,                     // [sp, #0]
++//              int32_t *rgb2yuv);              // [sp, #8] (including Mac)
++
++// regs
++// v0-2         Src bytes - reused as chroma src
++// v3-5         Coeffs (packed very inefficiently - could be squashed)
++// v6           128b
++// v7           128h
++// v8-15        Reserved
++// v16-18       Lo Src expanded as H
++// v19          -
++// v20-22       Hi Src expanded as H
++// v23          -
++// v24          U out
++// v25          U tmp
++// v26          Y out
++// v27-29       Y tmp
++// v30          V out
++// v31          V tmp
++
++function ff_bgr24toyv12_aarch64, export=1
++        ldr             x15, [sp, #8]
++        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
++        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
++        ld3             {v3.s, v4.s, v5.s}[2], [x15]
++
++99:
++        ldr             w14, [sp, #0]
++        movi            v7.8b, #128
++        uxtl            v6.8h, v7.8b
++        // Ensure if nothing to do then we do nothing
++        cmp             w4, #0
++        b.le            90f
++        cmp             w5, #0
++        b.le            90f
++        // If w % 16 != 0 then -16 so we do main loop 1 fewer times with
++        // the remainder done in the tail
++        tst             w4, #15
++        b.eq            1f
++        sub             w4, w4, #16
++1:
++
++// -------------------- Even line body - YUV
++11:
++        subs            w9,  w4, #0
++        mov             x10, x0
++        mov             x11, x1
++        mov             x12, x2
++        mov             x13, x3
++        b.lt            12f
++
++        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
++        subs            w9, w9, #16
++        b.le            13f
++
++10:
++        XRGB3YC         v16, v17, v18,  v20, v21, v22,  v0, v1, v2
++
++        // Testing shows it is faster to stack the smull/smlal ops together
++        // rather than interleave them between channels and indeed even the
++        // shift/add sections seem happier not interleaved
++
++        // Y0
++        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
++        // Y1
++        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
++        SHRN_Y          v26, v26, v27, v28, v29, v6
++
++        // U
++        // Vector subscript *2 as we loaded into S but are only using H
++        SMLAL3          v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
++
++        // V
++        SMLAL3          v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
++
++        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
++
++        SHRN_C          v24, v24, v25, v7
++        SHRN_C          v30, v30, v31, v7
++
++        subs            w9, w9, #16
++
++        st1             {v26.16b}, [x11], #16
++        st1             {v24.8b}, [x12], #8
++        st1             {v30.8b}, [x13], #8
++
++        b.gt            10b
++
++// -------------------- Even line tail - YUV
++// If width % 16 == 0 then simply runs once with preloaded RGB
++// If other then deals with preload & then does remaining tail
++
++13:
++        // Body is simple copy of main loop body minus preload
++
++        XRGB3YC         v16, v17, v18,  v20, v21, v22,  v0, v1, v2
++        // Y0
++        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
++        // Y1
++        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
++        SHRN_Y          v26, v26, v27, v28, v29, v6
++        // U
++        SMLAL3          v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
++        // V
++        SMLAL3          v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
++
++        cmp             w9, #-16
++
++        SHRN_C          v24, v24, v25, v7
++        SHRN_C          v30, v30, v31, v7
++
++        // Here:
++        // w9 == 0      width % 16 == 0, tail done
++        // w9 > -16     1st tail done (16 pels), remainder still to go
++        // w9 == -16    shouldn't happen
++        // w9 > -32     2nd tail done
++        // w9 <= -32    shouldn't happen
++
++        b.lt            2f
++        st1             {v26.16b}, [x11], #16
++        st1             {v24.8b}, [x12], #8
++        st1             {v30.8b}, [x13], #8
++        cbz             w9, 3f
++
++12:
++        sub             w9, w9, #16
++
++        tbz             w9, #3, 1f
++        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
++1:      tbz             w9, #2, 1f
++        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
++        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
++        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
++        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
++1:      tbz             w9, #1, 1f
++        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
++        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
++1:      tbz             w9, #0, 13b
++        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
++        b               13b
++
++2:
++        tbz             w9, #3, 1f
++        st1             {v26.8b},    [x11], #8
++        STB4V           v24, 0, x12
++        STB4V           v30, 0, x13
++1:      tbz             w9, #2, 1f
++        STB4V           v26  8, x11
++        STB2V           v24, 4, x12
++        STB2V           v30, 4, x13
++1:      tbz             w9, #1, 1f
++        STB2V           v26, 12, x11
++        st1             {v24.b}[6],  [x12], #1
++        st1             {v30.b}[6],  [x13], #1
++1:      tbz             w9, #0, 1f
++        st1             {v26.b}[14], [x11]
++        st1             {v24.b}[7],  [x12]
++        st1             {v30.b}[7],  [x13]
++1:
++3:
++
++// -------------------- Odd line body - Y only
++
++        subs            w5, w5, #1
++        b.eq            90f
++
++        subs            w9,  w4, #0
++        add             x0, x0, w14, sxtw
++        add             x1, x1, w6, sxtw
++        mov             x10, x0
++        mov             x11, x1
++        b.lt            12f
++
++        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
++        subs            w9, w9, #16
++        b.le            13f
++
++10:
++        XRGB3Y          v16, v17, v18,  v20, v21, v22,  v0, v1, v2
++        // Y0
++        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
++        // Y1
++        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
++
++        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
++
++        SHRN_Y          v26, v26, v27, v28, v29, v6
++
++        subs            w9, w9, #16
++
++        st1             {v26.16b}, [x11], #16
++
++        b.gt            10b
++
++// -------------------- Odd line tail - Y
++// If width % 16 == 0 then simply runs once with preloaded RGB
++// If other then deals with preload & then does remaining tail
++
++13:
++        // Body is simple copy of main loop body minus preload
++
++        XRGB3Y          v16, v17, v18,  v20, v21, v22,  v0, v1, v2
++        // Y0
++        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
++        // Y1
++        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
++
++        cmp             w9, #-16
++
++        SHRN_Y          v26, v26, v27, v28, v29, v6
++
++        // Here:
++        // w9 == 0      width % 16 == 0, tail done
++        // w9 > -16     1st tail done (16 pels), remainder still to go
++        // w9 == -16    shouldn't happen
++        // w9 > -32     2nd tail done
++        // w9 <= -32    shouldn't happen
++
++        b.lt            2f
++        st1             {v26.16b}, [x11], #16
++        cbz             w9, 3f
++
++12:
++        sub             w9, w9, #16
++
++        tbz             w9, #3, 1f
++        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
++1:      tbz             w9, #2, 1f
++        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
++        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
++        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
++        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
++1:      tbz             w9, #1, 1f
++        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
++        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
++1:      tbz             w9, #0, 13b
++        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
++        b               13b
++
++2:
++        tbz             w9, #3, 1f
++        st1             {v26.8b},    [x11], #8
++1:      tbz             w9, #2, 1f
++        STB4V           v26, 8,  x11
++1:      tbz             w9, #1, 1f
++        STB2V           v26, 12, x11
++1:      tbz             w9, #0, 1f
++        st1             {v26.b}[14], [x11]
++1:
++3:
++
++// ------------------- Loop to start
++
++        add             x0, x0, w14, sxtw
++        add             x1, x1, w6, sxtw
++        add             x2, x2, w7, sxtw
++        add             x3, x3, w7, sxtw
++        subs            w5, w5, #1
++        b.gt            11b
++90:
++        ret
++endfunc
+diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
+index e98fdac8ead6..c3b9079d2b3e 100644
+--- a/libswscale/rgb2rgb.c
++++ b/libswscale/rgb2rgb.c
+@@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
+                        int width, int height,
+                        int lumStride, int chromStride, int srcStride,
+                        int32_t *rgb2yuv);
++void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
++                       uint8_t *udst, uint8_t *vdst,
++                       int width, int height,
++                       int lumStride, int chromStride, int srcStride,
++                       int32_t *rgb2yuv);
++void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst,
++					  uint8_t *udst, uint8_t *vdst,
++					  int width, int height,
++					  int lumStride, int chromStride, int srcStride,
++					  int32_t *rgb2yuv);
++void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst,
++					  uint8_t *udst, uint8_t *vdst,
++					  int width, int height,
++					  int lumStride, int chromStride, int srcStride,
++					  int32_t *rgb2yuv);
++void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst,
++					  uint8_t *udst, uint8_t *vdst,
++					  int width, int height,
++					  int lumStride, int chromStride, int srcStride,
++					  int32_t *rgb2yuv);
++void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst,
++					  uint8_t *udst, uint8_t *vdst,
++					  int width, int height,
++					  int lumStride, int chromStride, int srcStride,
++					  int32_t *rgb2yuv);
+ void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+                  int srcStride, int dstStride);
+ void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
+diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
+index e3b088390184..b8b1d649033e 100644
+--- a/libswscale/rgb2rgb.h
++++ b/libswscale/rgb2rgb.h
+@@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
+ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                       uint8_t *vdst, int width, int height, int lumStride,
+                       int chromStride, int srcStride, int32_t *rgb2yuv);
++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                      uint8_t *vdst, int width, int height, int lumStride,
++                      int chromStride, int srcStride, int32_t *rgb2yuv);
+ 
+ /**
+  * Height should be a multiple of 2 and width should be a multiple of 16.
+@@ -126,6 +129,26 @@ extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                               int width, int height,
+                               int lumStride, int chromStride, int srcStride,
+                               int32_t *rgb2yuv);
++extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++                              int width, int height,
++                              int lumStride, int chromStride, int srcStride,
++                              int32_t *rgb2yuv);
++extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++                             int width, int height,
++                             int lumStride, int chromStride, int srcStride,
++                             int32_t *rgb2yuv);
++extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++                             int width, int height,
++                             int lumStride, int chromStride, int srcStride,
++                             int32_t *rgb2yuv);
++extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++                             int width, int height,
++                             int lumStride, int chromStride, int srcStride,
++                             int32_t *rgb2yuv);
++extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
++                             int width, int height,
++                             int lumStride, int chromStride, int srcStride,
++                             int32_t *rgb2yuv);
+ extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
+                         int srcStride, int dstStride);
+ 
+diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
+index 32d90d44feb7..e711589e1e1a 100644
+--- a/libswscale/rgb2rgb_template.c
++++ b/libswscale/rgb2rgb_template.c
+@@ -642,65 +642,235 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
+ /**
+  * Height should be a multiple of 2 and width should be a multiple of 2.
+  * (If this is a problem for anyone then tell me, and I will fix it.)
++ * Chrominance data is only taken from every second line,
++ * others are ignored in the C version.
++ * FIXME: Write HQ version.
+  */
++static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv,
++                   const uint8_t x[9])
++{
++    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
++    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
++    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
++    int y;
++    const int chromWidth = width >> 1;
++
++    for (y = 0; y < height; y += 2) {
++        int i;
++        for (i = 0; i < chromWidth; i++) {
++            unsigned int b = src[6 * i + 0];
++            unsigned int g = src[6 * i + 1];
++            unsigned int r = src[6 * i + 2];
++
++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
++            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
++            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
++
++            udst[i]     = U;
++            vdst[i]     = V;
++            ydst[2 * i] = Y;
++
++            b = src[6 * i + 3];
++            g = src[6 * i + 4];
++            r = src[6 * i + 5];
++
++            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++            ydst[2 * i + 1] = Y;
++        }
++        if ((width & 1) != 0) {
++            unsigned int b = src[6 * i + 0];
++            unsigned int g = src[6 * i + 1];
++            unsigned int r = src[6 * i + 2];
++
++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
++            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
++            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
++
++            udst[i]     = U;
++            vdst[i]     = V;
++            ydst[2 * i] = Y;
++        }
++        ydst += lumStride;
++        src  += srcStride;
++
++        if (y+1 == height)
++            break;
++
++        for (i = 0; i < chromWidth; i++) {
++            unsigned int b = src[6 * i + 0];
++            unsigned int g = src[6 * i + 1];
++            unsigned int r = src[6 * i + 2];
++
++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++
++            ydst[2 * i] = Y;
++
++            b = src[6 * i + 3];
++            g = src[6 * i + 4];
++            r = src[6 * i + 5];
++
++            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++            ydst[2 * i + 1] = Y;
++        }
++        if ((width & 1) != 0) {
++            unsigned int b = src[6 * i + 0];
++            unsigned int g = src[6 * i + 1];
++            unsigned int r = src[6 * i + 2];
++
++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++
++            ydst[2 * i] = Y;
++        }
++        udst += chromStride;
++        vdst += chromStride;
++        ydst += lumStride;
++        src  += srcStride;
++    }
++}
++
++static const uint8_t x_rgb[9] = {
++    RY_IDX, GY_IDX, BY_IDX,
++    RU_IDX, GU_IDX, BU_IDX,
++    RV_IDX, GV_IDX, BV_IDX,
++};
++
++static const uint8_t x_bgr[9] = {
++     BY_IDX, GY_IDX, RY_IDX,
++     BU_IDX, GU_IDX, RU_IDX,
++     BV_IDX, GV_IDX, RV_IDX,
++};
++
+ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
+                    uint8_t *vdst, int width, int height, int lumStride,
+                    int chromStride, int srcStride, int32_t *rgb2yuv)
+ {
+-    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+-    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+-    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
++    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
++}
++
++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
++}
++
++static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv,
++                   const uint8_t x[9])
++{
++    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
++    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
++    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
+     int y;
+     const int chromWidth = width >> 1;
+-    const uint8_t *src1 = src;
+-    const uint8_t *src2 = src1 + srcStride;
+-    uint8_t *ydst1 = ydst;
+-    uint8_t *ydst2 = ydst + lumStride;
+ 
+     for (y = 0; y < height; y += 2) {
+         int i;
+         for (i = 0; i < chromWidth; i++) {
+-            unsigned int b11 = src1[6 * i + 0];
+-            unsigned int g11 = src1[6 * i + 1];
+-            unsigned int r11 = src1[6 * i + 2];
+-            unsigned int b12 = src1[6 * i + 3];
+-            unsigned int g12 = src1[6 * i + 4];
+-            unsigned int r12 = src1[6 * i + 5];
+-            unsigned int b21 = src2[6 * i + 0];
+-            unsigned int g21 = src2[6 * i + 1];
+-            unsigned int r21 = src2[6 * i + 2];
+-            unsigned int b22 = src2[6 * i + 3];
+-            unsigned int g22 = src2[6 * i + 4];
+-            unsigned int r22 = src2[6 * i + 5];
+-
+-            unsigned int Y11 = ((ry * r11 + gy * g11 + by * b11) >> RGB2YUV_SHIFT) + 16;
+-            unsigned int Y12 = ((ry * r12 + gy * g12 + by * b12) >> RGB2YUV_SHIFT) + 16;
+-            unsigned int Y21 = ((ry * r21 + gy * g21 + by * b21) >> RGB2YUV_SHIFT) + 16;
+-            unsigned int Y22 = ((ry * r22 + gy * g22 + by * b22) >> RGB2YUV_SHIFT) + 16;
+-
+-            unsigned int bx = (b11 + b12 + b21 + b22) >> 2;
+-            unsigned int gx = (g11 + g12 + g21 + g22) >> 2;
+-            unsigned int rx = (r11 + r12 + r21 + r22) >> 2;
+-
+-            unsigned int U  = ((ru * rx + gu * gx + bu * bx) >> RGB2YUV_SHIFT) + 128;
+-            unsigned int V  = ((rv * rx + gv * gx + bv * bx) >> RGB2YUV_SHIFT) + 128;
+-
+-            ydst1[2 * i + 0] = Y11;
+-            ydst1[2 * i + 1] = Y12;
+-            ydst2[2 * i + 0] = Y21;
+-            ydst2[2 * i + 1] = Y22;
+-            udst[i]          = U;
+-            vdst[i]          = V;
++            unsigned int b = src[8 * i + 2];
++            unsigned int g = src[8 * i + 1];
++            unsigned int r = src[8 * i + 0];
++
++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
++            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
++            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
++
++            udst[i]     = U;
++            vdst[i]     = V;
++            ydst[2 * i] = Y;
++
++            b = src[8 * i + 6];
++            g = src[8 * i + 5];
++            r = src[8 * i + 4];
++
++            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++            ydst[2 * i + 1] = Y;
++        }
++        if ((width & 1) != 0) {
++            unsigned int b = src[8 * i + 2];
++            unsigned int g = src[8 * i + 1];
++            unsigned int r = src[8 * i + 0];
++
++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
++            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
++            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
++
++            udst[i]     = U;
++            vdst[i]     = V;
++            ydst[2 * i] = Y;
++        }
++        ydst += lumStride;
++        src  += srcStride;
++
++        if (y+1 == height)
++            break;
++
++        for (i = 0; i < chromWidth; i++) {
++            unsigned int b = src[8 * i + 2];
++            unsigned int g = src[8 * i + 1];
++            unsigned int r = src[8 * i + 0];
++
++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++
++            ydst[2 * i] = Y;
++
++            b = src[8 * i + 6];
++            g = src[8 * i + 5];
++            r = src[8 * i + 4];
++
++            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++            ydst[2 * i + 1] = Y;
+         }
+-        src1  += srcStride * 2;
+-        src2  += srcStride * 2;
+-        ydst1 += lumStride * 2;
+-        ydst2 += lumStride * 2;
+-        udst  += chromStride;
+-        vdst  += chromStride;
++        if ((width & 1) != 0) {
++            unsigned int b = src[8 * i + 2];
++            unsigned int g = src[8 * i + 1];
++            unsigned int r = src[8 * i + 0];
++
++            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
++
++            ydst[2 * i] = Y;
++        }
++        udst += chromStride;
++        vdst += chromStride;
++        ydst += lumStride;
++        src  += srcStride;
+     }
+ }
+ 
++static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
++}
++
++static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
++}
++
++// As the general code does no SIMD-like ops simply adding 1 to the src address
++// will fix the ignored alpha position
++static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
++}
++
++static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
++                   uint8_t *vdst, int width, int height, int lumStride,
++                   int chromStride, int srcStride, int32_t *rgb2yuv)
++{
++    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
++}
++
++
+ static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
+                               uint8_t *dest, int width, int height,
+                               int src1Stride, int src2Stride, int dstStride)
+@@ -974,6 +1144,11 @@ static av_cold void rgb2rgb_init_c(void)
+     yuy2toyv12         = yuy2toyv12_c;
+     planar2x           = planar2x_c;
+     ff_rgb24toyv12     = ff_rgb24toyv12_c;
++    ff_bgr24toyv12     = ff_bgr24toyv12_c;
++    ff_rgbxtoyv12      = ff_rgbxtoyv12_c;
++    ff_bgrxtoyv12      = ff_bgrxtoyv12_c;
++    ff_xrgbtoyv12      = ff_xrgbtoyv12_c;
++    ff_xbgrtoyv12      = ff_xbgrtoyv12_c;
+     interleaveBytes    = interleaveBytes_c;
+     deinterleaveBytes  = deinterleaveBytes_c;
+     vu9_to_vu12        = vu9_to_vu12_c;
+diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
+index dc1d5f359325..519a69eaa37e 100644
+--- a/libswscale/swscale_unscaled.c
++++ b/libswscale/swscale_unscaled.c
+@@ -1696,6 +1696,91 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+     return srcSliceH;
+ }
+ 
++static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++                              int srcStride[], int srcSliceY, int srcSliceH,
++                              uint8_t *dst[], int dstStride[])
++{
++    ff_bgr24toyv12(
++        src[0],
++        dst[0] +  srcSliceY       * dstStride[0],
++        dst[1] + (srcSliceY >> 1) * dstStride[1],
++        dst[2] + (srcSliceY >> 1) * dstStride[2],
++        c->srcW, srcSliceH,
++        dstStride[0], dstStride[1], srcStride[0],
++        c->input_rgb2yuv_table);
++    if (dst[3])
++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++    return srcSliceH;
++}
++
++static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++                             int srcStride[], int srcSliceY, int srcSliceH,
++                             uint8_t *dst[], int dstStride[])
++{
++    ff_bgrxtoyv12(
++        src[0],
++        dst[0] +  srcSliceY       * dstStride[0],
++        dst[1] + (srcSliceY >> 1) * dstStride[1],
++        dst[2] + (srcSliceY >> 1) * dstStride[2],
++        c->srcW, srcSliceH,
++        dstStride[0], dstStride[1], srcStride[0],
++        c->input_rgb2yuv_table);
++    if (dst[3])
++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++    return srcSliceH;
++}
++
++static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++                             int srcStride[], int srcSliceY, int srcSliceH,
++                             uint8_t *dst[], int dstStride[])
++{
++    ff_rgbxtoyv12(
++        src[0],
++        dst[0] +  srcSliceY       * dstStride[0],
++        dst[1] + (srcSliceY >> 1) * dstStride[1],
++        dst[2] + (srcSliceY >> 1) * dstStride[2],
++        c->srcW, srcSliceH,
++        dstStride[0], dstStride[1], srcStride[0],
++        c->input_rgb2yuv_table);
++    if (dst[3])
++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++    return srcSliceH;
++}
++
++static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++                             int srcStride[], int srcSliceY, int srcSliceH,
++                             uint8_t *dst[], int dstStride[])
++{
++    ff_xbgrtoyv12(
++        src[0],
++        dst[0] +  srcSliceY       * dstStride[0],
++        dst[1] + (srcSliceY >> 1) * dstStride[1],
++        dst[2] + (srcSliceY >> 1) * dstStride[2],
++        c->srcW, srcSliceH,
++        dstStride[0], dstStride[1], srcStride[0],
++        c->input_rgb2yuv_table);
++    if (dst[3])
++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++    return srcSliceH;
++}
++
++static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[],
++                             int srcStride[], int srcSliceY, int srcSliceH,
++                             uint8_t *dst[], int dstStride[])
++{
++    ff_xrgbtoyv12(
++        src[0],
++        dst[0] +  srcSliceY       * dstStride[0],
++        dst[1] + (srcSliceY >> 1) * dstStride[1],
++        dst[2] + (srcSliceY >> 1) * dstStride[2],
++        c->srcW, srcSliceH,
++        dstStride[0], dstStride[1], srcStride[0],
++        c->input_rgb2yuv_table);
++    if (dst[3])
++        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
++    return srcSliceH;
++}
++
+ static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
+                              int srcStride[], int srcSliceY, int srcSliceH,
+                              uint8_t *dst[], int dstStride[])
+@@ -2019,7 +2104,6 @@ void ff_get_unscaled_swscale(SwsContext *c)
+     const enum AVPixelFormat dstFormat = c->dstFormat;
+     const int flags = c->flags;
+     const int dstH = c->dstH;
+-    const int dstW = c->dstW;
+     int needsDither;
+ 
+     needsDither = isAnyRGB(dstFormat) &&
+@@ -2077,8 +2161,34 @@ void ff_get_unscaled_swscale(SwsContext *c)
+     /* bgr24toYV12 */
+     if (srcFormat == AV_PIX_FMT_BGR24 &&
+         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
+-        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
++        !(flags & SWS_ACCURATE_RND))
+         c->convert_unscaled = bgr24ToYv12Wrapper;
++    /* rgb24toYV12 */
++    if (srcFormat == AV_PIX_FMT_RGB24 &&
++        (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
++        !(flags & SWS_ACCURATE_RND))
++        c->convert_unscaled = rgb24ToYv12Wrapper;
++
++    /* bgrxtoYV12 */
++    if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) ||
++         (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
++        !(flags & SWS_ACCURATE_RND))
++        c->convert_unscaled = bgrxToYv12Wrapper;
++    /* rgbx24toYV12 */
++    if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) ||
++         (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
++        !(flags & SWS_ACCURATE_RND))
++        c->convert_unscaled = rgbxToYv12Wrapper;
++    /* xbgrtoYV12 */
++    if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) ||
++         (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
++        !(flags & SWS_ACCURATE_RND))
++        c->convert_unscaled = xbgrToYv12Wrapper;
++    /* xrgb24toYV12 */
++    if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) ||
++         (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
++        !(flags & SWS_ACCURATE_RND))
++        c->convert_unscaled = xrgbToYv12Wrapper;
+ 
+     /* RGB/BGR -> RGB/BGR (no dither needed forms) */
+     if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)
+diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c
+index cf8d04de8983..97d5a4bd2ebc 100644
+--- a/libswscale/tests/swscale.c
++++ b/libswscale/tests/swscale.c
+@@ -23,6 +23,7 @@
+ #include <string.h>
+ #include <inttypes.h>
+ #include <stdarg.h>
++#include <time.h>
+ 
+ #undef HAVE_AV_CONFIG_H
+ #include "libavutil/cpu.h"
+@@ -98,6 +99,15 @@ struct Results {
+     uint32_t crc;
+ };
+ 
++static int time_rep = 0;
++
++static uint64_t utime(void)
++{
++    struct timespec ts;
++    clock_gettime(CLOCK_MONOTONIC, &ts);
++    return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000;
++}
++
+ // test by ref -> src -> dst -> out & compare out against ref
+ // ref & out are YV12
+ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
+@@ -213,7 +223,7 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
+         goto end;
+     }
+ 
+-    printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
++    printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d",
+            desc_src->name, srcW, srcH,
+            desc_dst->name, dstW, dstH,
+            flags);
+@@ -221,6 +231,17 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
+ 
+     sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
+ 
++    if (time_rep != 0)
++    {
++        const uint64_t now = utime();
++        uint64_t done;
++        for (i = 1; i != time_rep; ++i) {
++            sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
++        }
++        done = utime();
++        printf(" T=%7"PRId64"us ", done-now);
++    }
++
+     for (i = 0; i < 4 && dstStride[i]; i++)
+         crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
+                      dstStride[i] * dstH);
+@@ -413,30 +434,31 @@ static int fileTest(const uint8_t * const ref[4], int refStride[4],
+     return 0;
+ }
+ 
+-#define W 96
+-#define H 96
+-
+ int main(int argc, char **argv)
+ {
++    unsigned int W = 96;
++    unsigned int H = 96;
++    unsigned int W2;
++    unsigned int H2;
++    unsigned int S;
+     enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE;
+     enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
+-    uint8_t *rgb_data   = av_malloc(W * H * 4);
+-    const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
+-    int rgb_stride[4]   = { 4 * W, 0, 0, 0 };
+-    uint8_t *data       = av_malloc(4 * W * H);
+-    const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
+-    int stride[4]       = { W, W, W, W };
+     int x, y;
+     struct SwsContext *sws;
+     AVLFG rand;
+     int res = -1;
+     int i;
+     FILE *fp = NULL;
+-
+-    if (!rgb_data || !data)
+-        return -1;
++    uint8_t *rgb_data;
++    uint8_t * rgb_src[4] = { NULL };
++    int rgb_stride[4]   = { 0 };
++    uint8_t *data;
++    uint8_t * src[4] = { NULL };
++    int stride[4]       = { 0 };
+ 
+     for (i = 1; i < argc; i += 2) {
++        const char * const arg2 = argv[i+1];
++
+         if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "--help")) {
+             fprintf(stderr,
+                     "swscale [options...]\n"
+@@ -459,29 +481,50 @@ int main(int argc, char **argv)
+         if (argv[i][0] != '-' || i + 1 == argc)
+             goto bad_option;
+         if (!strcmp(argv[i], "-ref")) {
+-            fp = fopen(argv[i + 1], "r");
++            fp = fopen(arg2, "r");
+             if (!fp) {
+-                fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
++                fprintf(stderr, "could not open '%s'\n", arg2);
+                 goto error;
+             }
+         } else if (!strcmp(argv[i], "-cpuflags")) {
+             unsigned flags = av_get_cpu_flags();
+-            int ret = av_parse_cpu_caps(&flags, argv[i + 1]);
++            int ret = av_parse_cpu_caps(&flags, arg2);
+             if (ret < 0) {
+-                fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]);
++                fprintf(stderr, "invalid cpu flags %s\n", arg2);
+                 return ret;
+             }
+             av_force_cpu_flags(flags);
+         } else if (!strcmp(argv[i], "-src")) {
+-            srcFormat = av_get_pix_fmt(argv[i + 1]);
++            srcFormat = av_get_pix_fmt(arg2);
+             if (srcFormat == AV_PIX_FMT_NONE) {
+-                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
++                fprintf(stderr, "invalid pixel format %s\n", arg2);
+                 return -1;
+             }
+         } else if (!strcmp(argv[i], "-dst")) {
+-            dstFormat = av_get_pix_fmt(argv[i + 1]);
++            dstFormat = av_get_pix_fmt(arg2);
+             if (dstFormat == AV_PIX_FMT_NONE) {
+-                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
++                fprintf(stderr, "invalid pixel format %s\n", arg2);
++                return -1;
++            }
++        } else if (!strcmp(argv[i], "-w")) {
++            char * p = NULL;
++            W = strtoul(arg2, &p, 0);
++            if (!W || *p) {
++                fprintf(stderr, "bad width %s\n", arg2);
++                return -1;
++            }
++        } else if (!strcmp(argv[i], "-h")) {
++            char * p = NULL;
++            H = strtoul(arg2, &p, 0);
++            if (!H || *p) {
++                fprintf(stderr, "bad height '%s'\n", arg2);
++                return -1;
++            }
++        } else if (!strcmp(argv[i], "-t")) {
++            char * p = NULL;
++            time_rep = (int)strtol(arg2, &p, 0);
++            if (*p) {
++                fprintf(stderr, "bad time repetitions '%s'\n", arg2);
+                 return -1;
+             }
+         } else if (!strcmp(argv[i], "-p")) {
+@@ -495,15 +538,34 @@ bad_option:
+ 
+     ff_sfc64_init(&prng_state, 0, 0, 0, 12);
+ 
+-    sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
++    S = (W + 15) & ~15;
++    rgb_data   = av_mallocz(S * H * 4);
++    rgb_src[0] = rgb_data;
++    rgb_stride[0]   = 4 * S;
++    data       = av_mallocz(4 * S * H);
++    src[0] = data;
++    src[1] = data + S * H;
++    src[2] = data + S * H * 2;
++    src[3] = data + S * H * 3;
++    stride[0] = S;
++    stride[1] = S;
++    stride[2] = S;
++    stride[3] = S;
++    H2 = H < 96 ? 8 : H / 12;
++    W2 = W < 96 ? 8 : W / 12;
++
++    if (!rgb_data || !data)
++        return -1;
++
++    sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H,
+                          AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
+ 
+     av_lfg_init(&rand, 1);
+ 
+     for (y = 0; y < H; y++)
+         for (x = 0; x < W * 4; x++)
+-            rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
+-    res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
++            rgb_data[ x + y * 4 * S] = av_lfg_get(&rand);
++    res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride);
+     if (res < 0 || res != H) {
+         res = -1;
+         goto error;
+@@ -512,10 +574,10 @@ bad_option:
+     av_free(rgb_data);
+ 
+     if(fp) {
+-        res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
++        res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat);
+         fclose(fp);
+     } else {
+-        selfTest(src, stride, W, H, srcFormat, dstFormat);
++        selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat);
+         res = 0;
+     }
+ error:
+diff --git a/pi-util/BUILD.txt b/pi-util/BUILD.txt
+new file mode 100644
+index 000000000000..2b62d660c0d1
+--- /dev/null
++++ b/pi-util/BUILD.txt
+@@ -0,0 +1,67 @@
++Building Pi FFmpeg
++==================
++
++Current only building on a Pi is supported.
++This builds ffmpeg the way I've tested it
++
++Get all dependencies - the current package dependencies are good enough
++
++$ sudo apt-get build-dep ffmpeg
++
++Configure using the pi-util/conf_native.sh script
++-------------------------------------------------
++
++This sets the normal release options and creates an ouutput dir to build into
++The directory name will depend on system and options but will be under out/
++
++There are a few choices here
++ --mmal  build including the legacy mmal-based decoders and zero-copy code
++         this requires appropriate libraries which currently will exist for
++         armv7 but not arm64
++ --noshared
++         Build a static image rather than a shared library one.  Static is
++         easier for testing as there is no need to worry about library
++         paths being confused and therefore running the wrong code,  Shared
++         is what is needed, in most cases, when building for use by other
++         programs.
++ --usr   Set install dir to /usr (i.e. system default) rather than in
++         <builddir>/install
++
++So for a static build
++---------------------
++
++$ pi-util/conf_native.sh --noshared
++
++$ make -j8 -C out/<wherever the script said it was building to>
++
++You can now run ffmpeg directly from where it was built
++
++For a shared build
++------------------
++
++There are two choices here
++
++$ pi-util/conf_native.sh
++$ make -j8 -C out/<builddir> install
++
++This sets the install prefix to <builddir>/install and is probably what you
++want if you don't want to overwrite the system files.
++
++You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was
++built. You can copy the contents of <build dir>/install to /usr and that mostly
++works. The only downside is that paths in pkgconfig end up being set to the
++install directory in your build directory which may be less than ideal when
++building other packages.
++
++The alternative if you just want to replace the system libs is:
++
++$ pi-util/conf_native.sh --usr
++$ make -j8 -C out/<builddir>
++$ sudo pi-util/clean_usr_libs.sh
++$ sudo make -j8 -C out/<builddir> install
++
++The clean_usr_libs.sh step wipes any existing libs & includes (for all
++architectures) from the system which helps avoid confusion when running other
++progs as you can be sure you're not running old code which is unfortunately
++easy to do otherwise.
++
+diff --git a/pi-util/NOTES.txt b/pi-util/NOTES.txt
+new file mode 100644
+index 000000000000..fcce72226a32
+--- /dev/null
++++ b/pi-util/NOTES.txt
+@@ -0,0 +1,69 @@
++Notes on the hevc_rpi decoder & associated support code
++-------------------------------------------------------
++
++There are 3 main parts to the existing code:
++
++1) The decoder - this is all in libavcodec as rpi_hevc*.
++
++2) A few filters to deal with Sand frames and a small patch to
++automatically select the sand->i420 converter when required.
++
++3) A kludge in ffmpeg.c to display the decoded video. This could & should
++be converted into a proper ffmpeg display module.
++
++
++Decoder
++-------
++
++The decoder is a modified version of the existing ffmpeg hevc decoder.
++Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder.
++More complex bitstreams can be up to ~200% faster but particularly easy
++streams can cut its advantage down to ~50%.  This means that a Pi3+ can
++display nearly all 8-bit 1080p30 streams and with some overclocking it can
++display most lower bitrate 10-bit 1080p30 streams - this latter case is
++not helped by the requirement to downsample to 8-bit before display on a
++Pi.
++
++It has had co-processor offload added for inter-pred and large block
++residual transform.  Various parts have had optimized ARM NEON assembler
++added and the existing ARM asm sections have been profiled and
++re-optimized for A53. The main C code has been substantially reworked at
++its lower levels in an attempt to optimize it and minimize memory
++bandwidth. To some extent code paths that deal with frame types that it
++doesn't support have been pruned.
++
++It outputs frames in Broadcom Sand format. This is a somewhat annoying
++layout that doesn't fit into ffmpegs standard frame descriptions. It has
++vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for
++the stripe followed by interleaved U & V, that is then followed by the Y
++for the next stripe, etc. The final stripe is always padded to
++stripe-width. This is used in an attempt to help with cache locality and
++cut down on the number of dram bank switches. It is annoying to use for
++inter-pred with conventional processing but the way the Pi QPU (which is
++used for inter-pred) works means that it has negligible downsides here and
++the improved memory performance exceeds the overhead of the increased
++complexity in the rest of the code.
++
++Frames must be allocated out of GPU memory (as otherwise they can't be
++accessed by the co-processors). Utility functions (in rpi_zc.c) have been
++written to make this easier. As the frames are already in GPU memory they
++can be displayed by the Pi h/w without any further copying.
++
++
++Known non-features
++------------------
++
++Frame allocation should probably be done in some other way in order to fit
++into the standard framework better.
++
++Sand frames are currently declared as software frames, there is an
++argument that they should be hardware frames but they aren't really.
++
++There must be a better way of auto-selecting the hevc_rpi decoder over the
++normal s/w hevc decoder, but I became confused by the existing h/w
++acceleration framework and what I wanted to do didn't seem to fit in
++neatly.
++
++Display should be a proper device rather than a kludge in ffmpeg.c
++
++
+diff --git a/pi-util/TESTMESA.txt b/pi-util/TESTMESA.txt
+new file mode 100644
+index 000000000000..92bc13a3dfa1
+--- /dev/null
++++ b/pi-util/TESTMESA.txt
+@@ -0,0 +1,82 @@
++# Setup & Build instructions for testing Argon30 mesa support (on Pi4)
++
++# These assume that the drm_mmal test for Sand8 has been built on this Pi
++# as build relies on many of the same files
++
++# 1st get everything required to build ffmpeg
++# If sources aren't already enabled on your Pi then enable them
++sudo su
++sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list
++sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list
++mv /tmp/sources.list /etc/apt/
++mv /tmp/raspi.list /etc/apt/sources.list.d/
++apt update
++
++# Get dependancies
++sudo apt build-dep ffmpeg
++
++sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev
++
++# Enable H265 V4L2 request decoder
++sudo su
++echo dtoverlay=rpivid-v4l2 >> /boot/config.txt
++# You may also want to add more CMA if you are going to try 4k videos
++# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read
++# dtoverlay=vc4-fkms-v3d,cma-512
++reboot
++# Check it has turned up
++ls -la /dev/video*
++# This should include video19
++# crw-rw----+ 1 root video 81, 7 Aug  4 17:25 /dev/video19
++
++# Currently on the Pi the linux headers from the debian distro don't match
++# the kernel that we ship and we need to update them - hopefully this step
++# will be unneeded in the future
++sudo apt install git bc bison flex libssl-dev make
++git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y
++cd linux
++KERNEL=kernel7l
++make bcm2711_defconfig
++make headers_install
++sudo cp -r usr/include/linux /usr/include
++cd ..
++
++# Config - this builds a staticly linked ffmpeg which is easier for testing
++pi-util/conf_native.sh --noshared
++
++# Build (this is a bit dull)
++# If you want to poke the source the libavdevice/egl_vout.c contains the
++# output code -
++cd out/armv7-static-rel
++
++# Check that you have actually configured V4L2 request
++grep HEVC_V4L2REQUEST config.h
++# You are hoping for
++# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1
++# if you get 0 then the config has failed
++
++make -j6
++
++# Grab test streams
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv
++
++# Test i420 output (works currently)
++./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl -
++
++# Test Sand8 output - doesn't currently work but should once you have
++# Sand8 working in drm_mmal. I can't guarantee that this will work as
++# I can't test this path with a known working format, but the debug looks
++# good.  If this doesn't work & drm_mmal does with sand8 then come back to me
++# The "show_all 1" forces vout to display every frame otherwise it drops any
++# frame that would cause it to block
++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl -
++
++# Test Sand30 - doesn't currently work
++# (Beware that when FFmpeg errors out it often leaves your teminal window
++# in a state where you need to reset it)
++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl -
++
++
++
+diff --git a/pi-util/clean_usr_libs.sh b/pi-util/clean_usr_libs.sh
+new file mode 100755
+index 000000000000..01bd6a6a2254
+--- /dev/null
++++ b/pi-util/clean_usr_libs.sh
+@@ -0,0 +1,42 @@
++set -e
++U=/usr/include/arm-linux-gnueabihf
++rm -rf $U/libavcodec
++rm -rf $U/libavdevice
++rm -rf $U/libavfilter
++rm -rf $U/libavformat
++rm -rf $U/libavutil
++rm -rf $U/libswresample
++rm -rf $U/libswscale
++U=/usr/include/aarch64-linux-gnu
++rm -rf $U/libavcodec
++rm -rf $U/libavdevice
++rm -rf $U/libavfilter
++rm -rf $U/libavformat
++rm -rf $U/libavutil
++rm -rf $U/libswresample
++rm -rf $U/libswscale
++U=/usr/lib/arm-linux-gnueabihf
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavutil.*
++rm -f $U/libswresample.*
++rm -f $U/libswscale.*
++U=/usr/lib/arm-linux-gnueabihf/neon/vfp
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavutil.*
++rm -f $U/libswresample.*
++rm -f $U/libswscale.*
++U=/usr/lib/aarch64-linux-gnu
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavutil.*
++rm -f $U/libswresample.*
++rm -f $U/libswscale.*
++
+diff --git a/pi-util/conf_arm64_native.sh b/pi-util/conf_arm64_native.sh
+new file mode 100644
+index 000000000000..9e3bbfa1908a
+--- /dev/null
++++ b/pi-util/conf_arm64_native.sh
+@@ -0,0 +1,45 @@
++echo "Configure for ARM64 native build"
++
++#RPI_KEEPS="-save-temps=obj"
++
++SHARED_LIBS="--enable-shared"
++if [ "$1" == "--noshared" ]; then
++  SHARED_LIBS="--disable-shared"
++  echo Static libs
++  OUT=out/arm64-static-rel
++else
++  echo Shared libs
++  OUT=out/arm64-shared-rel
++fi
++
++mkdir -p $OUT
++cd $OUT
++
++A=aarch64-linux-gnu
++USR_PREFIX=`pwd`/install
++LIB_PREFIX=$USR_PREFIX/lib/$A
++INC_PREFIX=$USR_PREFIX/include/$A
++
++../../configure \
++ --prefix=$USR_PREFIX\
++ --libdir=$LIB_PREFIX\
++ --incdir=$INC_PREFIX\
++ --disable-stripping\
++ --disable-thumb\
++ --disable-mmal\
++ --enable-sand\
++ --enable-v4l2-request\
++ --enable-libdrm\
++ --enable-epoxy\
++ --enable-libudev\
++ --enable-vout-drm\
++ --enable-vout-egl\
++ $SHARED_LIBS\
++ --extra-cflags="-ggdb"
++
++# --enable-decoder=hevc_rpi\
++# --enable-extra-warnings\
++# --arch=armv71\
++
++# gcc option for getting asm listing
++# -Wa,-ahls
+diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv
+new file mode 100644
+index 000000000000..177f1c8111fd
+--- /dev/null
++++ b/pi-util/conf_h265.2016.csv
+@@ -0,0 +1,195 @@
++1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8
++1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8
++1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8
++1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8
++1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8
++1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8
++1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8
++1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8
++1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8
++1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8
++1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8
++1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8
++1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8
++1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10
++1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8
++1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8
++1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8
++1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8
++1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8
++1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8
++1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8
++1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8
++1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8
++1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8
++1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8
++1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8
++1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10
++1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8
++1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8
++1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8
++1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8
++1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8
++1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8
++1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8
++1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8
++1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8
++1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8
++1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8
++1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8
++1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8
++1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8
++1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8
++1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8
++1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8
++1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8
++1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8
++1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8
++1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8
++1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8
++1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8
++1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8
++1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8
++1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8
++1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8
++1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8
++1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8
++1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8
++1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8
++1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8
++1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8
++1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8
++1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8
++1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8
++1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8
++1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8
++1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8
++1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8
++1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8
++1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8
++1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8
++1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8
++1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8
++1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8
++1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8
++1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8
++1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8
++1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8
++1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8
++1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8
++1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8
++1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8
++1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8
++1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8
++1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8
++1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8
++1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8
++1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8
++1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8
++1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8
++1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8
++1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8
++1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8
++1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8
++1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8
++1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8
++1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8
++1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8
++1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8
++1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8
++1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8
++1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8
++1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8
++1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8
++1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8
++1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8
++1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8
++1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8
++1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8
++3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10
++1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8
++1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8
++2,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
++1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10
++1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8
++1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8
++1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10
++1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8
++1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0
++0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8
++0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8
++0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10
++0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8
++0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8
++1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0
++0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
++0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
++1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10
++1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0
++1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0
++1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0
++0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0
++0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8
++0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8
++1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0
++1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8
++1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0
++1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0
++1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0
++1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0
++1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0
++1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0
++1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0
++0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8
++0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10
++0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10
++0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8
++0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8
++1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8
++1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8
++1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8
++1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8
++1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8
+diff --git a/pi-util/conf_h265.2016_HEVC_v1.csv b/pi-util/conf_h265.2016_HEVC_v1.csv
+new file mode 100644
+index 000000000000..60826412715c
+--- /dev/null
++++ b/pi-util/conf_h265.2016_HEVC_v1.csv
+@@ -0,0 +1,147 @@
++1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5
++1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5
++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
++1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5
++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
++1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5
++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
++1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5
++1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5
++1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5
++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
++1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5
++2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt
++2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
++1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5
++1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5
++1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5
++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
++1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5
++1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5
++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
++3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth
++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
++3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???
++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
+diff --git a/pi-util/conf_h265.csv b/pi-util/conf_h265.csv
+new file mode 100644
+index 000000000000..fc14f2a3c2bb
+--- /dev/null
++++ b/pi-util/conf_h265.csv
+@@ -0,0 +1,144 @@
++1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5
++1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5
++1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5
++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
++1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5
++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
++1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5
++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
++1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5
++1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5
++1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5
++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
++1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5
++1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5
++1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5
++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
++1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5
++1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5
++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
++0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched
++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
+diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
+new file mode 100755
+index 000000000000..0dbaa53e97e0
+--- /dev/null
++++ b/pi-util/conf_native.sh
+@@ -0,0 +1,135 @@
++echo "Configure for native build"
++
++FFSRC=`pwd`
++MC=`dpkg --print-architecture`
++BUILDBASE=$FFSRC/out
++
++#RPI_KEEPS="-save-temps=obj"
++RPI_KEEPS=""
++
++NOSHARED=
++MMAL=
++USR_PREFIX=
++TOOLCHAIN=
++R=rel
++
++while [ "$1" != "" ] ; do
++    case $1 in
++	--noshared)
++	    NOSHARED=1
++	    ;;
++	--mmal)
++	    MMAL=1
++	    ;;
++	--usr)
++	    USR_PREFIX=/usr
++	    ;;
++	--tsan)
++	    TOOLCHAIN="--toolchain=gcc-tsan"
++	    R=tsan
++	    ;;
++	*)
++	    echo "Usage $0: [--noshared] [--mmal] [--usr]"
++	    echo "  noshared  Build static libs and executable - good for testing"
++	    echo "  mmal      Build mmal decoders"
++	    echo "  usr       Set install prefix to /usr [default=<build-dir>/install]"
++	    exit 1
++	    ;;
++    esac
++    shift
++done
++
++
++MCOPTS=
++RPI_INCLUDES=
++RPI_LIBDIRS=
++RPI_DEFINES=
++RPI_EXTRALIBS=
++
++# uname -m gives kernel type which may not have the same
++# 32/64bitness as userspace :-( getconf shoudl provide the answer
++# but use uname to check we are on the right processor
++MC=`uname -m`
++LB=`getconf LONG_BIT`
++if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then
++  if [ "$LB" == "32" ]; then
++    echo "M/C armv7"
++    A=arm-linux-gnueabihf
++    B=armv7
++    MCOPTS="--arch=armv6t2 --cpu=cortex-a7"
++    RPI_DEFINES=-mfpu=neon-vfpv4
++  elif [ "$LB" == "64" ]; then
++    echo "M/C aarch64"
++    A=aarch64-linux-gnu
++    B=arm64
++  else
++    echo "Unknown LONG_BIT name: $LB"
++    exit 1
++  fi
++else
++  echo "Unknown machine name: $MC"
++  exit 1
++fi
++
++if [ $MMAL ]; then
++  RPI_OPT_VC=/opt/vc
++  RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
++  RPI_LIBDIRS="-L$RPI_OPT_VC/lib"
++  RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000"
++  RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group"
++  RPIOPTS="--enable-mmal"
++else
++  RPIOPTS="--disable-mmal"
++fi
++
++C=`lsb_release -sc`
++V=`cat RELEASE`
++
++SHARED_LIBS="--enable-shared"
++if [ $NOSHARED ]; then
++  SHARED_LIBS="--disable-shared"
++  OUT=$BUILDBASE/$B-$C-$V-static-$R
++  echo Static libs
++else
++  echo Shared libs
++  OUT=$BUILDBASE/$B-$C-$V-shared-$R
++fi
++
++if [ ! $USR_PREFIX ]; then
++  USR_PREFIX=$OUT/install
++fi
++LIB_PREFIX=$USR_PREFIX/lib/$A
++INC_PREFIX=$USR_PREFIX/include/$A
++
++echo Destination directory: $OUT
++mkdir -p $OUT
++# Nothing under here need worry git - including this .gitignore!
++echo "**" > $BUILDBASE/.gitignore
++cd $OUT
++
++$FFSRC/configure \
++ --prefix=$USR_PREFIX\
++ --libdir=$LIB_PREFIX\
++ --incdir=$INC_PREFIX\
++ $MCOPTS\
++ $TOOLCHAIN\
++ --disable-stripping\
++ --disable-thumb\
++ --enable-sand\
++ --enable-v4l2-request\
++ --enable-libdrm\
++ --enable-vout-egl\
++ --enable-vout-drm\
++ --enable-gpl\
++ $SHARED_LIBS\
++ $RPIOPTS\
++ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\
++ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\
++ --extra-ldflags="$RPI_LIBDIRS"\
++ --extra-libs="$RPI_EXTRALIBS"\
++ --extra-version="rpi"
++
++echo "Configured into $OUT"
++
++# gcc option for getting asm listing
++# -Wa,-ahls
+diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py
+new file mode 100755
+index 000000000000..573f1e03c0c0
+--- /dev/null
++++ b/pi-util/ffconf.py
+@@ -0,0 +1,276 @@
++#!/usr/bin/env python3
++
++import string
++import os
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++class DecodeType:
++    def __init__(self, textname, hwaccel):
++        self.textname = textname
++        self.hwaccel = hwaccel
++
++hwaccel_rpi = DecodeType("RPI Test/Legacy", "rpi")
++hwaccel_sw = DecodeType("Software", None)
++hwaccel_drm = DecodeType("DRM Prime", "drm")
++hwaccel_vaapi = DecodeType("VAAPI", "vaapi")
++
++def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, args):
++    ffmpeg_exec = args.ffmpeg
++    gen_yuv = args.gen_yuv
++    valgrind = args.valgrind
++    rv = 0
++
++    pix_fmt = []
++    if pix == "8":
++        pix_fmt = ["-pix_fmt", "yuv420p"]
++    elif pix == "10":
++        pix_fmt = ["-pix_fmt", "yuv420p10le"]
++    elif pix == "12":
++        pix_fmt = ["-pix_fmt", "yuv420p12le"]
++
++    tmp_root = "/tmp"
++
++    names = srcname.split('/')
++    while len(names) > 1:
++        tmp_root = os.path.join(tmp_root, names[0])
++        del names[0]
++    name = names[0]
++
++    if not os.path.exists(tmp_root):
++        os.makedirs(tmp_root)
++
++    dec_file = os.path.join(tmp_root, name + ".dec.md5")
++    try:
++        os.remove(dec_file)
++    except:
++        pass
++
++    yuv_file = os.path.join(tmp_root, name + ".dec.yuv")
++    try:
++        os.remove(yuv_file)
++    except:
++        pass
++
++    flog = open(os.path.join(tmp_root, name + ".log"), "w+t")
++
++    ffargs = [ffmpeg_exec, "-flags", "unaligned"] +\
++        (["-hwaccel", dectype.hwaccel] if dectype.hwaccel else []) +\
++        ["-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] +\
++        pix_fmt +\
++        ([yuv_file] if gen_yuv else ["-f", "md5", dec_file])
++
++    if valgrind:
++        ffargs = ['valgrind', '--leak-check=full'] + ffargs
++
++    # Unaligned needed for cropping conformance
++    rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT)
++
++    if gen_yuv:
++        with open(dec_file, 'wt') as f:
++            subprocess.call(["md5sum", yuv_file], stdout=f, stderr=subprocess.STDOUT)
++
++    try:
++        m1 = None
++        m2 = None
++        with open(os.path.join(fileroot, md5_file)) as f:
++            for line in f:
++                m1 = re.search("[0-9a-f]{32}", line.lower())
++                if m1:
++                    break
++
++        with open(dec_file) as f:
++            m2 = re.search("[0-9a-f]{32}", f.readline())
++    except:
++        pass
++
++    if valgrind:
++        flog.seek(0)
++        leak = True
++        valerr = True
++
++        for line in flog:
++            if re.search("^==[0-9]+== All heap blocks were freed", line):
++                leak = False
++            if re.search("^==[0-9]+== ERROR SUMMARY: 0 errors", line):
++                valerr = False
++        if leak or valerr:
++            rv = 4
++
++    if  m1 and m2 and m1.group() == m2.group():
++        print("Match: " + m1.group(), file=flog)
++    elif not m1:
++        print("****** Cannot find m1", file=flog)
++        rv = 3
++    elif not m2:
++        print("****** Cannot find m2", file=flog)
++        rv = 2
++    else:
++        print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog)
++        rv = 1
++    flog.close()
++    return rv
++
++def scandir(root):
++    aconf = []
++    ents = os.listdir(root)
++    ents.sort(key=str.lower)
++    for name in ents:
++        test_path = os.path.join(root, name)
++        if S_ISDIR(os.stat(test_path).st_mode):
++            files = os.listdir(test_path)
++            es_file = "?"
++            md5_file = "?"
++            for f in files:
++                (base, ext) = os.path.splitext(f)
++                if base[0] == '.':
++                    pass
++                elif ext == ".bit" or ext == ".bin":
++                    es_file = f
++                elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")):
++                    if md5_file == "?":
++                        md5_file = f
++                    elif base[-3:] == "yuv":
++                        md5_file = f
++            aconf.append((1, name, es_file, md5_file))
++    return aconf
++
++def runtest(name, tests):
++    if not tests:
++        return True
++    for t in tests:
++        if name[0:len(t)] == t or name.find("/" + t) != -1:
++            return True
++    return False
++
++def doconf(csva, tests, test_root, vcodec, dectype, args):
++    unx_failures = []
++    unx_success = []
++    failures = 0
++    successes = 0
++    for a in csva:
++        exp_test = int(a[0])
++        if (exp_test and runtest(a[1], tests)):
++            name = a[1]
++            print ("==== ", name, end="")
++            sys.stdout.flush()
++
++            rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, args=args)
++            if (rv == 0):
++                successes += 1
++            else:
++                failures += 1
++
++            if (rv == 0):
++                if exp_test == 2:
++                    print(": * OK *")
++                    unx_success.append(name)
++                else:
++                    print(": ok")
++            elif exp_test == 2 and rv == 1:
++                print(": fail")
++            elif exp_test == 3 and rv == 2:
++                # Call an expected "crash" an abort
++                print(": abort")
++            else:
++                unx_failures.append(name)
++                if rv == 1:
++                    print(": * FAIL *")
++                elif (rv == 2) :
++                    print(": * CRASH *")
++                elif (rv == 3) :
++                    print(": * MD5 MISSING *")
++                elif (rv == 4) :
++                    print(": * VALGRIND *")
++                else :
++                    print(": * BANG *")
++
++    print()
++    print("Tested using decode type:", dectype.textname)
++    if unx_failures or unx_success:
++        print("Unexpected Failures:", unx_failures)
++        print("Unexpected Success: ", unx_success)
++    else:
++        print("All tests normal:", successes, "ok,", failures, "failed")
++
++    return unx_failures + unx_success
++
++
++class ConfCSVDialect(csv.Dialect):
++    delimiter = ','
++    doublequote = True
++    lineterminator = '\n'
++    quotechar='"'
++    quoting = csv.QUOTE_MINIMAL
++    skipinitialspace = True
++    strict = True
++
++
++
++if __name__ == '__main__':
++
++    argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester")
++    argp.add_argument("tests", nargs='*')
++    argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line")
++    argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line")
++    argp.add_argument("--sw", action='store_true', help="Use software decode")
++    argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line")
++    argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test")
++    argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir")
++    argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename")
++    argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use")
++    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name; if directory given use <dir>/ffmpeg")
++    argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests")
++    argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)")
++    argp.add_argument("--loop", default=0, type=int, help="Loop n times, or until unexpected result")
++    args = argp.parse_args()
++
++    if not os.path.isdir(args.test_root):
++        print("Test root dir '%s' not found" % args.test_root)
++        exit(1)
++
++    if args.csvgen:
++        csv.writer(sys.stdout).writerows(scandir(args.test_root))
++        exit(0)
++
++    with open(args.csv, 'rt') as csvfile:
++        csva = [a for a in csv.reader(csvfile, ConfCSVDialect())]
++
++    dectype = None
++    if os.path.exists("/dev/rpivid-hevcmem"):
++        dectype = hwaccel_rpi
++    if os.path.exists("/sys/module/rpivid_hevc"):
++        dectype = hwaccel_drm
++
++    if args.pi4:
++        dectype = hwaccel_rpi
++    elif args.drm:
++        dectype = hwaccel_drm
++    elif args.vaapi:
++        dectype = hwaccel_vaapi
++    elif args.sw:
++        dectype = hwaccel_sw
++
++    if os.path.isdir(args.ffmpeg):
++        args.ffmpeg = os.path.join(args.ffmpeg, "ffmpeg")
++    if not os.path.isfile(args.ffmpeg):
++        print("FFmpeg file '%s' not found" % args.ffmpeg)
++        exit(1)
++
++    if not dectype:
++        print("No decode type selected and no h/w detected")
++        exit(1)
++    print("Running test using decode:", dectype.textname)
++
++    i = 0
++    while True:
++        i = i + 1
++        if args.loop:
++            print("== Loop ", i)
++        if doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args) or (args.loop >= 0 and i > args.loop):
++            break
++
+diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py
+new file mode 100755
+index 000000000000..767efe2de2fa
+--- /dev/null
++++ b/pi-util/ffperf.py
+@@ -0,0 +1,140 @@
++#!/usr/bin/env python3
++
++import shlex
++import time
++import string
++import os
++import tempfile
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++class tstats:
++    close_threshold = 0.01
++
++    def __init__(self, stats_dict=None):
++        if stats_dict != None:
++            self.name = stats_dict["name"]
++            self.elapsed = float(stats_dict["elapsed"])
++            self.user = float(stats_dict["user"])
++            self.sys = float(stats_dict["sys"])
++
++    def times_str(self):
++        ctime = self.sys + self.user
++        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
++
++    def dict(self):
++        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
++
++    def is_close(self, other):
++        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
++
++    def __lt__(self, other):
++        return self.elapsed < other.elapsed
++    def __gt__(self, other):
++        return self.elapsed > other.elapsed
++
++    def time_file(name, prefix, args):
++        cmdargs = [args.ffmpeg]
++        for x in args.args :
++            if x == '{INPUT}':
++                cmdargs.append(prefix + name)
++            elif x == '{NULL}':
++                cmdargs.append(os.devnull)
++            else:
++                cmdargs.append(x)
++
++        stats = tstats()
++        stats.name = name
++        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++        cproc = subprocess.Popen(cmdargs, bufsize=-1, stdout=flog, stderr=flog);
++        pinfo = os.wait4(cproc.pid, 0)
++        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++        stats.elapsed = end_time - start_time
++        stats.user = pinfo[2].ru_utime
++        stats.sys = pinfo[2].ru_stime
++        return stats
++
++
++def common_prefix(s1, s2):
++    for i in range(min(len(s1),len(s2))):
++        if s1[i] != s2[i]:
++            return s1[:i]
++    return s1[:i+1]
++
++def main():
++    global flog
++
++    argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog="""
++To blank the screen before starting use "xdg-screensaver activate"
++(For some reason this doesn't seem to work from within python).
++""")
++
++    argp.add_argument("streams", nargs='*')
++    argp.add_argument("--args", default='-t 30 -i {INPUT} -f null {NULL}', help="""
++ffmpeg arguments, default='-t 30 -i {INPUT} -f null {NULL}';
++  {INPUT} is replaced by current inputfile path;
++  {NULL} is replaced by the system null device""")
++    argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename")
++    argp.add_argument("--csv_in", help="CSV input filename")
++    argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).")
++    argp.add_argument("--repeat", default=3, type=int, help="Run repeat count")
++    argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable")
++
++    args = argp.parse_args()
++    args.args = shlex.split(args.args)
++
++    csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"])
++    csv_out.writeheader()
++
++    stats_in = {}
++    if args.csv_in != None:
++        with open(args.csv_in, 'r', newline='') as f_in:
++            stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++
++    flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt")
++
++    streams = args.streams
++    if not streams:
++        if not stats_in:
++            print ("No source streams specified")
++            return 1
++        prefix = "" if args.prefix == None else args.prefix
++        streams = [k for k in stats_in]
++    elif args.prefix != None:
++        prefix = args.prefix
++    else:
++        prefix = streams[0]
++        for f in streams[1:]:
++            prefix = common_prefix(prefix, f)
++        pp = prefix.rpartition(os.sep)
++        prefix = pp[0] + pp[1]
++        streams = [s[len(prefix):] for s in streams]
++
++    for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()):
++        print ("====", f)
++
++        t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999})
++        for i in range(args.repeat):
++            t = tstats.time_file(f, prefix, args)
++            print ("...", t.times_str())
++            if t0 > t:
++                t0 = t
++
++        if t0.name in stats_in:
++            pstat = stats_in[t0.name]
++            print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str())
++
++        csv_out.writerow(t0.dict())
++
++        print ()
++
++    return 0
++
++
++if __name__ == '__main__':
++    exit(main())
++
+diff --git a/pi-util/genpatch.sh b/pi-util/genpatch.sh
+new file mode 100755
+index 000000000000..0948a68a7ad7
+--- /dev/null
++++ b/pi-util/genpatch.sh
+@@ -0,0 +1,35 @@
++set -e
++
++NOPATCH=
++if [ "$1" == "--notag" ]; then
++  shift
++  NOPATCH=1
++fi
++
++if [ "$1" == "" ]; then
++  echo Usage: $0 [--notag] \<patch_tag\>
++  echo e.g.: $0 mmal_4
++  exit 1
++fi
++
++VERSION=`cat RELEASE`
++if [ "$VERSION" == "" ]; then
++  echo Can\'t find version RELEASE
++  exit 1
++fi
++
++PATCHFILE=../ffmpeg-$VERSION-$1.patch
++
++if [ $NOPATCH ]; then
++  echo Not tagged
++else
++  # Only continue if we are all comitted
++  git diff --name-status --exit-code
++
++  PATCHTAG=pi/$VERSION/$1
++  echo Tagging: $PATCHTAG
++
++  git tag $PATCHTAG
++fi
++echo Generating patch: $PATCHFILE
++git diff n$VERSION -- > $PATCHFILE
+diff --git a/pi-util/make_array.py b/pi-util/make_array.py
+new file mode 100755
+index 000000000000..67b22d2d517f
+--- /dev/null
++++ b/pi-util/make_array.py
+@@ -0,0 +1,23 @@
++#!/usr/bin/env python
++
++# Usage
++#   make_array file.bin
++#   Produces file.h with array of bytes.
++#
++import sys
++for file in sys.argv[1:]:
++  prefix,suffix = file.split('.')
++  assert suffix=='bin'
++  name=prefix.split('/')[-1]
++  print 'Converting',file
++  with open(prefix+'.h','wb') as out:
++    print >>out, 'static const unsigned char',name,'[] = {'
++    with open(file,'rb') as fd:
++      i = 0
++      for byte in fd.read():
++        print >>out, '0x%02x, ' % ord(byte),
++        i = i + 1
++        if i % 8 == 0:
++          print >>out, ' // %04x' % (i - 8)
++    print >>out,'};'
++
+diff --git a/pi-util/mkinst.sh b/pi-util/mkinst.sh
+new file mode 100755
+index 000000000000..271a39e8460a
+--- /dev/null
++++ b/pi-util/mkinst.sh
+@@ -0,0 +1,5 @@
++set -e
++
++make install
++
++cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr
+diff --git a/pi-util/patkodi.sh b/pi-util/patkodi.sh
+new file mode 100644
+index 000000000000..dcd05a606e85
+--- /dev/null
++++ b/pi-util/patkodi.sh
+@@ -0,0 +1,9 @@
++set -e
++KODIBASE=/home/jc/rpi/kodi/xbmc
++JOBS=-j20
++make $JOBS
++git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch
++make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS
++make -C $KODIBASE/build install
++
++
+diff --git a/pi-util/perfcmp.py b/pi-util/perfcmp.py
+new file mode 100755
+index 000000000000..e44cfa0c3c4d
+--- /dev/null
++++ b/pi-util/perfcmp.py
+@@ -0,0 +1,101 @@
++#!/usr/bin/env python3
++
++import time
++import string
++import os
++import tempfile
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++class tstats:
++    close_threshold = 0.01
++
++    def __init__(self, stats_dict=None):
++        if stats_dict != None:
++            self.name = stats_dict["name"]
++            self.elapsed = float(stats_dict["elapsed"])
++            self.user = float(stats_dict["user"])
++            self.sys = float(stats_dict["sys"])
++
++    def times_str(self):
++        ctime = self.sys + self.user
++        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
++
++    def dict(self):
++        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
++
++    def is_close(self, other):
++        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
++
++    def __lt__(self, other):
++        return self.elapsed < other.elapsed
++    def __gt__(self, other):
++        return self.elapsed > other.elapsed
++
++    def time_file(name, prefix):
++        stats = tstats()
++        stats.name = name
++        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++        cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name,
++                                  "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
++        pinfo = os.wait4(cproc.pid, 0)
++        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++        stats.elapsed = end_time - start_time
++        stats.user = pinfo[2].ru_utime
++        stats.sys = pinfo[2].ru_stime
++        return stats
++
++
++def common_prefix(s1, s2):
++    for i in range(min(len(s1),len(s2))):
++        if s1[i] != s2[i]:
++            return s1[:i]
++    return s1[:i+1]
++
++def main():
++    argp = argparse.ArgumentParser(description="FFmpeg performance compare")
++
++    argp.add_argument("stream0", help="CSV to compare")
++    argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare")
++
++    args = argp.parse_args()
++
++    with open(args.stream0, 'r', newline='') as f_in:
++        stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++    with open(args.stream1, 'r', newline='') as f_in:
++        stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++
++    print (args.stream0, "<<-->>", args.stream1)
++    print ()
++
++    for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()):
++       if not (f in stats0) :
++           print ("           XX               :", f)
++           continue
++       if not (f in stats1) :
++           print ("       XX                   :", f)
++           continue
++
++       s0 = stats0[f]
++       s1 = stats1[f]
++
++       pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0
++       thresh = 0.3
++       tc = 6
++
++       nchar = min(tc - 1, int(abs(pcent) / thresh))
++       cc = "  --  " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar
++
++       print ("%6.2f %s%6.2f (%+5.2f) : %s" %
++           (s0.elapsed, cc, s1.elapsed, pcent, f))
++
++    return 0
++
++
++if __name__ == '__main__':
++    exit(main())
++
+diff --git a/pi-util/qem.sh b/pi-util/qem.sh
+new file mode 100755
+index 000000000000..a4dbb6eacd18
+--- /dev/null
++++ b/pi-util/qem.sh
+@@ -0,0 +1,9 @@
++TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex
++QASM=python\ ../local/bin/qasm.py
++SRC_FILE=libavcodec/rpi_hevc_shader.qasm
++DST_BASE=shader
++
++cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR
++$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c
++$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h
++
 diff --git a/pi-util/testfilt.py b/pi-util/testfilt.py
 new file mode 100755
 index 000000000000..b322dac0c22d
@@ -31903,3730 +23289,148 @@ index 000000000000..b322dac0c22d
 +#                                    "/home/jc/rpi/streams/jellyfish-3-mbps-hd-h264.mkv",
 +                                   "-c:v", "h264_v4l2m2m", "-b:v", "2M"], ".mkv",
 +            [valid_regex(r'Output stream #0:0 \(video\): 900 frames encoded; 900 packets muxed')])
-
-From 2ac054adfa1e9ebece8a9594ac37b61ccff7e440 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 5 Jan 2023 14:39:30 +0000
-Subject: [PATCH 111/186] pixfmt: Add a #define to indicate presence of SAND
- formats
-
----
- libavutil/pixfmt.h | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
-index 22f70007c3df..5cc780e7d56c 100644
---- a/libavutil/pixfmt.h
-+++ b/libavutil/pixfmt.h
-@@ -378,6 +378,8 @@ enum AVPixelFormat {
-     AV_PIX_FMT_Y210BE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian
-     AV_PIX_FMT_Y210LE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian
- // RPI - not on ifdef so can be got at by calling progs
-+// #define so code that uses this can know it is there
-+#define AVUTIL_HAVE_PIX_FMT_SAND 1
-     AV_PIX_FMT_SAND128,    ///< 4:2:0  8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
-     AV_PIX_FMT_SAND64_10,  ///< 4:2:0 10-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
-     AV_PIX_FMT_SAND64_16,  ///< 4:2:0 16-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
-
-From 426d93c7bd910d9222a5cbeb011ede5d9890dcbf Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 11 Jan 2023 16:30:37 +0000
-Subject: [PATCH 112/186] v4l2_m2m_dec: Fix initial pkt send if no extradata
-
----
- libavcodec/v4l2_m2m_dec.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 4d170572980e..9daf05adfe74 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -240,7 +240,7 @@ copy_extradata(AVCodecContext * const avctx,
-     else
-         len = src_len < 0 ? AVERROR(EINVAL) : src_len;
- 
--    // Zero length is OK but we swant to stop - -ve is error val
-+    // Zero length is OK but we want to stop - -ve is error val
-     if (len <= 0)
-         return len;
- 
-@@ -525,7 +525,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const
- 
-     if (s->extdata_sent)
-         ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0);
--    else if (s->extdata_data)
-+    else
-         ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size);
- 
-     if (ret == AVERROR(EAGAIN)) {
-
-From da6cd7985ffa515607e68116aa923fda23a40beb Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 16 Jan 2023 16:05:09 +0000
-Subject: [PATCH 113/186] v4l2m2m_dec: Make capture timeout long once pending
- count > 31
-
-For some applications (ffmpeg command line) the current heuristic of adding
-a short timeout and preferring DQ over Q once we think we have buffers
-Qed in V4L2 is insufficient to prevent arbitrary buffer growth.
-Unfortunately the current method of guessing the number of Qed buffers isn't
-reliable enough to allow for a long timeout with only a few few buffers
-believed pending so only do so once the number of buffers believed pending
-exceeds plausible inaccuracies caused by buffer reordering.
-
-The limit could be optimised by codec or apparent latency but a simple
-number should reduce the  unexpected consequences.
----
- libavcodec/v4l2_m2m.h     |  3 ++-
- libavcodec/v4l2_m2m_dec.c | 18 ++++++++++++++----
- 2 files changed, 16 insertions(+), 5 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index 0f41f94694d3..ded1478a49da 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -66,7 +66,7 @@ typedef struct pts_stats_s
- 
- typedef struct xlat_track_s {
-     unsigned int track_no;
--    int64_t last_pts;
-+    int64_t last_pts;    // Last valid PTS decoded
-     int64_t last_opaque;
-     V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
- } xlat_track_t;
-@@ -88,6 +88,7 @@ typedef struct V4L2m2mContext {
- 
-     /* null frame/packet received */
-     int draining;
-+    int running;
-     AVPacket buf_pkt;
- 
-     /* Reference to a frame. Only used during encoding */
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 9daf05adfe74..c8ab883d7ef2 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -582,7 +582,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- 
-     do {
-         const int pending = xlat_pending(&s->xlat);
--        const int prefer_dq = (pending > 3);
-+        const int prefer_dq = (pending > 4);
-         const int last_src_rv = src_rv;
- 
-         av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt);
-@@ -611,10 +611,14 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-         // (b) enqueue returned a status indicating that decode should be attempted
-         if (dst_rv != 0 && TRY_DQ(src_rv)) {
-             // Pick a timeout depending on state
-+            // The pending count isn't completely reliable so it is good enough
-+            // hint that we want a frame but not good enough to require it in
-+            // all cases; however if it has got > 31 that exceeds its margin of
-+            // error so require a frame to prevent ridiculous levels of latency
-             const int t =
-                 src_rv == NQ_Q_FULL ? -1 :
-                 src_rv == NQ_DRAINING ? 300 :
--                prefer_dq ? 5 : 0;
-+                prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0;
- 
-             // Dequeue frame will unref any previous contents of frame
-             // if it returns success so we don't need an explicit unref
-@@ -631,8 +635,13 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-                 }
-             }
- 
--            if (dst_rv == 0)
-+            if (dst_rv == 0) {
-                 set_best_effort_pts(avctx, &s->pts_stat, frame);
-+                if (!s->running) {
-+                    s->running = 1;
-+                    av_log(avctx, AV_LOG_VERBOSE, "Decode running\n");
-+                }
-+            }
- 
-             if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) {
-                 av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF");
-@@ -998,7 +1007,8 @@ static void v4l2_decode_flush(AVCodecContext *avctx)
- 
-     // resend extradata
-     s->extdata_sent = 0;
--    // clear EOS status vars
-+    // clear status vars
-+    s->running = 0;
-     s->draining = 0;
-     output->done = 0;
-     capture->done = 0;
-
-From 58854764f365ac020b2d353f1db6b4d7ffa099a4 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 6 Feb 2023 19:23:16 +0000
-Subject: [PATCH 114/186] Initial buffersink alloc callback code
-
-(cherry picked from commit dde8d3c8f3cc279b9b92ed4f10a2e3990f4aadeb)
----
- libavfilter/buffersink.c | 44 ++++++++++++++++++++++++++++++++++++++++
- libavfilter/buffersink.h |  3 +++
- 2 files changed, 47 insertions(+)
-
-diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c
-index 9ab83696ce1b..837579946d65 100644
---- a/libavfilter/buffersink.c
-+++ b/libavfilter/buffersink.c
-@@ -62,6 +62,11 @@ typedef struct BufferSinkContext {
-     int sample_rates_size;
- 
-     AVFrame *peeked_frame;
+diff --git a/pi-util/v3dusage.py b/pi-util/v3dusage.py
+new file mode 100755
+index 000000000000..5935a11ca553
+--- /dev/null
++++ b/pi-util/v3dusage.py
+@@ -0,0 +1,128 @@
++#!/usr/bin/env python
++
++import sys
++import argparse
++import re
++
++def do_logparse(logname):
++
++    rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ')
++    rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$')
++    rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$')
++    rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$')
++
++    ttotal = {'idle':0.0}
++    tstart = {}
++    qctotal = {}
++    qtstotal = {}
++    l2hits = {}
++    l2total = {}
++    time0 = None
++    idle_start = None
++    qpu_op_no = 0
++    op_count = 0
++
++    with open(logname, "rt") as infile:
++        for line in infile:
++            match = rmatch.match(line)
++            if match:
++#                print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":"
++                time = float(match.group(1))
++                unit = match.group(3)
++                opstart = not match.group(2)
++                optype = match.group(7)
++                hascb = match.group(8) != "0"
++
++                if unit == 'qpu1':
++                    unit = unit + "." + str(qpu_op_no)
++                    if not opstart:
++                        if hascb or optype == 'EXECUTE_SYNC':
++                            qpu_op_no = 0
++                        else:
++                            qpu_op_no += 1
++
++                # Ignore sync type
++                if optype == 'EXECUTE_SYNC':
++                    continue
++
++                if not time0:
++                    time0 = time
++
++                if opstart:
++                    tstart[unit] = time;
++                elif unit in tstart:
++                    op_count += 1
++                    if not unit in ttotal:
++                        ttotal[unit] = 0.0
++                    ttotal[unit] += time - tstart[unit]
++                    del tstart[unit]
++
++                if not idle_start and not tstart:
++                    idle_start = time
++                elif idle_start and tstart:
++                    ttotal['idle'] += time - idle_start
++                    idle_start = None
++
++            match = rqcycle.match(line)
++            if match:
++                unit = "qpu1." + str(qpu_op_no)
++                if not unit in qctotal:
++                    qctotal[unit] = 0
++                qctotal[unit] += int(match.group(2))
++
++            match = rqtscycle.match(line)
++            if match:
++                unit = "qpu1." + str(qpu_op_no)
++                if not unit in qtstotal:
++                    qtstotal[unit] = 0
++                qtstotal[unit] += int(match.group(2))
++
++            match = rl2hits.match(line)
++            if match:
++                unit = "qpu1." + str(qpu_op_no)
++                if not unit in l2total:
++                    l2total[unit] = 0
++                    l2hits[unit] = 0
++                l2total[unit] += int(match.group(3))
++                if match.group(2) == "hits":
++                    l2hits[unit] += int(match.group(3))
++
++
++    if not time0:
++        print "No v3d profile records found"
++    else:
++        tlogged = time - time0
++
++        print "Logged time:", tlogged, "  Op count:", op_count
++        for unit in sorted(ttotal):
++            print b'%6s: %10.3f    %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged)
++        print
++        for unit in sorted(qctotal):
++            if not unit in qtstotal:
++                qtstotal[unit] = 0;
++            print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit])
++            if unit in l2total:
++                print b'        L2Total: %10d, hits:      %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit])
++
++
++
++if __name__ == '__main__':
++    argp = argparse.ArgumentParser(
++        formatter_class=argparse.RawDescriptionHelpFormatter,
++        description="QPU/VPU perf summary from VC logging",
++        epilog = """
++Will also summarise TMU stalls if logging requests set in qpu noflush param
++in the profiled code.
++
++Example use:
++  vcgencmd set_logging level=0xc0
++  <command to profile>
++  sudo vcdbg log msg >& t.log
++  v3dusage.py t.log
++""")
++
++    argp.add_argument("logfile")
++    args = argp.parse_args()
++
++    do_logparse(args.logfile)
 +
-+    union {
-+        av_buffersink_alloc_video_frame * video;
-+    } alloc_cb;
-+    void * alloc_v;
- } BufferSinkContext;
- 
- #define NB_ITEMS(list) (list ## _size / sizeof(*list))
-@@ -154,6 +159,44 @@ int attribute_align_arg av_buffersink_get_samples(AVFilterContext *ctx,
-     return get_frame_internal(ctx, frame, 0, nb_samples);
- }
- 
-+static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h)
-+{
-+    AVFilterContext * const ctx = link->dst;
-+    BufferSinkContext * const bs = ctx->priv;
-+    return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) :
-+        ff_default_get_video_buffer(link, w, h);
-+}
-+
-+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v)
-+{
-+    BufferSinkContext * const bs = ctx->priv;
-+    bs->alloc_cb.video = cb;
-+    bs->alloc_v = v;
-+    return 0;
-+}
-+
-+#if FF_API_BUFFERSINK_ALLOC
-+AVBufferSinkParams *av_buffersink_params_alloc(void)
-+{
-+    static const int pixel_fmts[] = { AV_PIX_FMT_NONE };
-+    AVBufferSinkParams *params = av_malloc(sizeof(AVBufferSinkParams));
-+    if (!params)
-+        return NULL;
-+
-+    params->pixel_fmts = pixel_fmts;
-+    return params;
-+}
-+
-+AVABufferSinkParams *av_abuffersink_params_alloc(void)
-+{
-+    AVABufferSinkParams *params = av_mallocz(sizeof(AVABufferSinkParams));
-+
-+    if (!params)
-+        return NULL;
-+    return params;
-+}
-+#endif
-+
- static av_cold int common_init(AVFilterContext *ctx)
- {
-     BufferSinkContext *buf = ctx->priv;
-@@ -381,6 +424,7 @@ static const AVFilterPad avfilter_vsink_buffer_inputs[] = {
-     {
-         .name = "default",
-         .type = AVMEDIA_TYPE_VIDEO,
-+        .get_buffer = {.video = alloc_video_buffer},
-     },
- };
- 
-diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h
-index 64e08de53ee5..09737d322fb7 100644
---- a/libavfilter/buffersink.h
-+++ b/libavfilter/buffersink.h
-@@ -166,6 +166,9 @@ int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame);
-  */
- int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples);
- 
-+typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h);
-+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v);
-+
- /**
-  * @}
-  */
-
-From d6e844180b9f50a33b837ec5cc897d5387edf2be Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 30 Jan 2023 17:23:12 +0000
-Subject: [PATCH 115/186] v4l2_m2m_dec: Add a profile check
-
-Check the profile in avctx aginst what the v4l2 driver advertises. If
-the driver doesn't support the check then just accept anything.
-
-(cherry picked from commit 6dd83dead9ebce419fdea152db0c9f5e9a94e9ef)
----
- libavcodec/v4l2_m2m_dec.c | 125 ++++++++++++++++++++++++++++++++++++++
- 1 file changed, 125 insertions(+)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index c8ab883d7ef2..098adf4821eb 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -715,6 +715,127 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- }
- #endif
- 
-+static uint32_t
-+avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile)
-+{
-+    switch (codec_id) {
-+        case AV_CODEC_ID_H264:
-+            switch (avprofile) {
-+                case FF_PROFILE_H264_BASELINE:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE;
-+                case FF_PROFILE_H264_CONSTRAINED_BASELINE:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE;
-+                case FF_PROFILE_H264_MAIN:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN;
-+                case FF_PROFILE_H264_EXTENDED:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED;
-+                case FF_PROFILE_H264_HIGH:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH;
-+                case FF_PROFILE_H264_HIGH_10:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10;
-+                case FF_PROFILE_H264_HIGH_10_INTRA:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA;
-+                case FF_PROFILE_H264_MULTIVIEW_HIGH:
-+                case FF_PROFILE_H264_HIGH_422:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422;
-+                case FF_PROFILE_H264_HIGH_422_INTRA:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA;
-+                case FF_PROFILE_H264_STEREO_HIGH:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH;
-+                case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE;
-+                case FF_PROFILE_H264_HIGH_444_INTRA:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA;
-+                case FF_PROFILE_H264_CAVLC_444:
-+                    return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA;
-+                case FF_PROFILE_H264_HIGH_444:
-+                default:
-+                    break;
-+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE		= 12,
-+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH		= 13,
-+//                    V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA	= 14,
-+//                    V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH		= 16,
-+//                    V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH		= 17,
-+            }
-+            break;
-+        case AV_CODEC_ID_MPEG2VIDEO:
-+        case AV_CODEC_ID_MPEG4:
-+        case AV_CODEC_ID_VC1:
-+        case AV_CODEC_ID_VP8:
-+        case AV_CODEC_ID_VP9:
-+        case AV_CODEC_ID_AV1:
-+            // Most profiles are a simple number that matches the V4L2 enum
-+            return avprofile;
-+        default:
-+            break;
-+    }
-+    return ~(uint32_t)0;
-+}
-+
-+// This check mirrors Chrome's profile check by testing to see if the profile
-+// exists as a possible value for the V4L2 profile control
-+static int
-+check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s)
-+{
-+    struct v4l2_queryctrl query_ctrl;
-+    struct v4l2_querymenu query_menu;
-+    uint32_t profile_id;
-+
-+    // An unset profile is almost certainly zero or -99 - do not reject
-+    if (avctx->profile <= 0) {
-+        av_log(avctx, AV_LOG_VERBOSE, "Profile <= 0 - check skipped\n");
-+        return 0;
-+    }
-+
-+    memset(&query_ctrl, 0, sizeof(query_ctrl));
-+    switch (avctx->codec_id) {
-+        case AV_CODEC_ID_MPEG2VIDEO:
-+            profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE;
-+            break;
-+        case AV_CODEC_ID_MPEG4:
-+            profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE;
-+            break;
-+        case AV_CODEC_ID_H264:
-+            profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE;
-+            break;
-+        case AV_CODEC_ID_VP8:
-+            profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE;
-+            break;
-+        case AV_CODEC_ID_VP9:
-+            profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE;
-+            break;
-+#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE
-+        case AV_CODEC_ID_AV1:
-+            profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE;
-+            break;
-+#endif
-+        default:
-+            av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id);
-+            return 0;
-+    }
-+
-+    query_ctrl = (struct v4l2_queryctrl){.id = profile_id};
-+    if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) {
-+        av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id);
-+    }
-+    else {
-+        av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id);
-+
-+        query_menu = (struct v4l2_querymenu){
-+            .id = query_ctrl.id,
-+            .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile),
-+        };
-+
-+        if (query_menu.index > query_ctrl.maximum ||
-+            query_menu.index < query_ctrl.minimum ||
-+            ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) {
-+            return AVERROR(ENOENT);
-+        }
-+    }
-+
-+    return 0;
-+};
-+
- static int
- check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
- {
-@@ -955,6 +1076,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     if ((ret = check_size(avctx, s)) != 0)
-         return ret;
- 
-+    if ((ret = check_profile(avctx, s)) != 0) {
-+        av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile);
-+        return ret;
-+    }
-     return 0;
- }
- 
-
-From d0992e458d9017cbb0383961b92589a015337aa3 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 1 Feb 2023 17:24:39 +0000
-Subject: [PATCH 116/186] v4l2_m2m_dec: Add extradata parse for h264 & hevc
-
-If we have extradata we can extract profile & level and potentailly
-other useful info from it. Use the codec parser to get it if the decoder
-is configured.
-
-(cherry picked from commit 6d431e79adeb246c2ed8cebce9011d81175a3906)
----
- libavcodec/v4l2_m2m_dec.c | 84 ++++++++++++++++++++++++++++++++++++++-
- 1 file changed, 83 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 098adf4821eb..e64bc707d3c6 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -21,6 +21,8 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
-+#include "config.h"
-+
- #include <linux/videodev2.h>
- #include <sys/ioctl.h>
- 
-@@ -43,6 +45,13 @@
- #include "v4l2_fmt.h"
- #include "v4l2_req_dmabufs.h"
- 
-+#if CONFIG_H264_DECODER
-+#include "h264_parse.h"
-+#endif
-+#if CONFIG_HEVC_DECODER
-+#include "hevc_parse.h"
-+#endif
-+
- // Pick 64 for max last count - that is >1sec at 60fps
- #define STATS_LAST_COUNT_MAX 64
- #define STATS_INTERVAL_MAX (1 << 30)
-@@ -956,6 +965,78 @@ static uint32_t max_coded_size(const AVCodecContext * const avctx)
-     return size + (1 << 16);
- }
- 
-+static void
-+parse_extradata(AVCodecContext *avctx)
-+{
-+    if (!avctx->extradata || !avctx->extradata_size)
-+        return;
-+
-+    switch (avctx->codec_id) {
-+#if CONFIG_H264_DECODER
-+        case AV_CODEC_ID_H264:
-+        {
-+            H264ParamSets ps = {{NULL}};
-+            int is_avc = 0;
-+            int nal_length_size = 0;
-+            int ret;
-+
-+            ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
-+                                           &ps, &is_avc, &nal_length_size,
-+                                           avctx->err_recognition, avctx);
-+            if (ret > 0) {
-+                const SPS * sps = NULL;
-+                unsigned int i;
-+                for (i = 0; i != MAX_SPS_COUNT; ++i) {
-+                    if (ps.sps_list[i]) {
-+                        sps = (const SPS *)ps.sps_list[i]->data;
-+                        break;
-+                    }
-+                }
-+                if (sps) {
-+                    avctx->profile = ff_h264_get_profile(sps);
-+                    avctx->level = sps->level_idc;
-+                }
-+            }
-+            ff_h264_ps_uninit(&ps);
-+            break;
-+        }
-+#endif
-+#if CONFIG_HEVC_DECODER
-+        case AV_CODEC_ID_HEVC:
-+        {
-+            HEVCParamSets ps = {{NULL}};
-+            HEVCSEI sei = {{{{0}}}};
-+            int is_nalff = 0;
-+            int nal_length_size = 0;
-+            int ret;
-+
-+            ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size,
-+                                           &ps, &sei, &is_nalff, &nal_length_size,
-+                                           avctx->err_recognition, 0, avctx);
-+            if (ret > 0) {
-+                const HEVCSPS * sps = NULL;
-+                unsigned int i;
-+                for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) {
-+                    if (ps.sps_list[i]) {
-+                        sps = (const HEVCSPS *)ps.sps_list[i]->data;
-+                        break;
-+                    }
-+                }
-+                if (sps) {
-+                    avctx->profile = sps->ptl.general_ptl.profile_idc;
-+                    avctx->level   = sps->ptl.general_ptl.level_idc;
-+                }
-+            }
-+            ff_hevc_ps_uninit(&ps);
-+            ff_hevc_reset_sei(&sei);
-+            break;
-+        }
-+#endif
-+        default:
-+            break;
-+    }
-+}
-+
- static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- {
-     V4L2Context *capture, *output;
-@@ -976,7 +1057,8 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-         avctx->ticks_per_frame = 2;
-     }
- 
--    av_log(avctx, AV_LOG_INFO, "level=%d\n", avctx->level);
-+    parse_extradata(avctx);
-+
-     ret = ff_v4l2_m2m_create_context(priv, &s);
-     if (ret < 0)
-         return ret;
-
-From 7753c3a64821de5104f46068e9753d7ac86b8a5d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 20 Mar 2023 18:12:51 +0000
-Subject: [PATCH 117/186] clean_usr_libs: Now wipes the include files too
-
-When swapping ffmpeg versions obsolete makefiles could confuse
-configure utilities.
----
- pi-util/clean_usr_libs.sh | 16 ++++++++++++++++
- 1 file changed, 16 insertions(+)
-
-diff --git a/pi-util/clean_usr_libs.sh b/pi-util/clean_usr_libs.sh
-index b3b2d5509de0..01bd6a6a2254 100755
---- a/pi-util/clean_usr_libs.sh
-+++ b/pi-util/clean_usr_libs.sh
-@@ -1,4 +1,20 @@
- set -e
-+U=/usr/include/arm-linux-gnueabihf
-+rm -rf $U/libavcodec
-+rm -rf $U/libavdevice
-+rm -rf $U/libavfilter
-+rm -rf $U/libavformat
-+rm -rf $U/libavutil
-+rm -rf $U/libswresample
-+rm -rf $U/libswscale
-+U=/usr/include/aarch64-linux-gnu
-+rm -rf $U/libavcodec
-+rm -rf $U/libavdevice
-+rm -rf $U/libavfilter
-+rm -rf $U/libavformat
-+rm -rf $U/libavutil
-+rm -rf $U/libswresample
-+rm -rf $U/libswscale
- U=/usr/lib/arm-linux-gnueabihf
- rm -f $U/libavcodec.*
- rm -f $U/libavdevice.*
-
-From 4116d51b48e57cfbe9a7986d38aa6818cb65bfbb Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 20 Mar 2023 18:15:08 +0000
-Subject: [PATCH 118/186] vulkan: Add missing decode extension defines
-
-When building on bookworm the video decode extension names
-were missing. This adds them. I expect this patch will be
-obsolete shortly but it solves a current problem.
----
- libavutil/hwcontext_vulkan.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
-index ffd4f5dec4ec..d59f9409dd7c 100644
---- a/libavutil/hwcontext_vulkan.c
-+++ b/libavutil/hwcontext_vulkan.c
-@@ -57,6 +57,14 @@
- #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
- #endif
- 
-+// Sometimes missing definitions
-+#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME
-+#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264"
-+#endif
-+#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME
-+#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265"
-+#endif
-+
- typedef struct VulkanQueueCtx {
-     VkFence fence;
-     VkQueue queue;
-
-From cebdcff3a25a64706c9a863e543260923c823237 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 21 Mar 2023 14:20:05 +0000
-Subject: [PATCH 119/186] v4l2_m2m_dec: Fix config file for finding if decoder
- enabled
-
-Fixes parsing of extradata for profile testing. 5.x changed where that
-info is defined.
----
- libavcodec/v4l2_m2m_dec.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index e64bc707d3c6..91136f03da80 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -21,7 +21,7 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
--#include "config.h"
-+#include "config_components.h"
- 
- #include <linux/videodev2.h>
- #include <sys/ioctl.h>
-
-From d091812c90f3fc6bca97e6efbb1fd30ebc112043 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 21 Mar 2023 14:23:20 +0000
-Subject: [PATCH 120/186] v4l2_m2m_dec: Display profile given if skipped in
- debug
-
----
- libavcodec/v4l2_m2m_dec.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 91136f03da80..d124c7b1fc43 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -792,7 +792,7 @@ check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s)
- 
-     // An unset profile is almost certainly zero or -99 - do not reject
-     if (avctx->profile <= 0) {
--        av_log(avctx, AV_LOG_VERBOSE, "Profile <= 0 - check skipped\n");
-+        av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile);
-         return 0;
-     }
- 
-
-From cbc083d57efdadef70b0b218cc252b24ed501596 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 22 Mar 2023 16:08:08 +0000
-Subject: [PATCH 121/186] conf_native: Fix for 64-bit kernel with 32-bit
- userspace
-
-(cherry picked from commit 5bb1e09cea95b4215c6904b9b1a726e83bc5d327)
----
- pi-util/conf_native.sh | 32 +++++++++++++++++++++-----------
- 1 file changed, 21 insertions(+), 11 deletions(-)
-
-diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
-index 082d9b58320e..0a7d230f1b70 100755
---- a/pi-util/conf_native.sh
-+++ b/pi-util/conf_native.sh
-@@ -33,18 +33,28 @@ RPI_LIBDIRS=
- RPI_DEFINES=
- RPI_EXTRALIBS=
- 
--if [ "$MC" == "arm64" ]; then
--  echo "M/C aarch64"
--  A=aarch64-linux-gnu
--  B=arm64
--elif [ "$MC" == "armhf" ]; then
--  echo "M/C armv7"
--  A=arm-linux-gnueabihf
--  B=armv7
--  MCOPTS="--arch=armv6t2 --cpu=cortex-a7"
--  RPI_DEFINES=-mfpu=neon-vfpv4
-+# uname -m gives kernel type which may not have the same
-+# 32/64bitness as userspace :-( getconf shoudl provide the answer
-+# but use uname to check we are on the right processor
-+MC=`uname -m`
-+LB=`getconf LONG_BIT`
-+if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then
-+  if [ "$LB" == "32" ]; then
-+    echo "M/C armv7"
-+    A=arm-linux-gnueabihf
-+    B=armv7
-+    MCOPTS="--arch=armv6t2 --cpu=cortex-a7"
-+    RPI_DEFINES=-mfpu=neon-vfpv4
-+  elif [ "$LB" == "64" ]; then
-+    echo "M/C aarch64"
-+    A=aarch64-linux-gnu
-+    B=arm64
-+  else
-+    echo "Unknown LONG_BIT name: $LB"
-+    exit 1
-+  fi
- else
--  echo Unexpected architecture $MC
-+  echo "Unknown machine name: $MC"
-   exit 1
- fi
- 
-
-From 17d3d7a9750f8cb7954d0f396ea68eb408411c7e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 20 Apr 2023 11:48:25 +0000
-Subject: [PATCH 122/186] conf_native: Add install prefix variation
-
-(cherry picked from commit 73c3019b534cb8f4b4e4c21995653f6ce440086d)
----
- pi-util/BUILD.txt      | 32 ++++++++++++++++++++------------
- pi-util/conf_native.sh | 14 ++++++++++++--
- 2 files changed, 32 insertions(+), 14 deletions(-)
-
-diff --git a/pi-util/BUILD.txt b/pi-util/BUILD.txt
-index b050971f63c5..2b62d660c0d1 100644
---- a/pi-util/BUILD.txt
-+++ b/pi-util/BUILD.txt
-@@ -24,6 +24,8 @@ There are a few choices here
-          paths being confused and therefore running the wrong code,  Shared
-          is what is needed, in most cases, when building for use by other
-          programs.
-+ --usr   Set install dir to /usr (i.e. system default) rather than in
-+         <builddir>/install
- 
- So for a static build
- ---------------------
-@@ -37,23 +39,29 @@ You can now run ffmpeg directly from where it was built
- For a shared build
- ------------------
- 
--$ pi-util/conf_native.sh
--
--You will normally want an install target if shared. Note that the script has
--set this up to be generated in out/<builddir>/install, you don't have to worry
--about overwriting your system libs.
-+There are two choices here
- 
-+$ pi-util/conf_native.sh
- $ make -j8 -C out/<builddir> install
- 
--You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was
--built or install the image on the system - you have to be careful to get rid
--of all other ffmpeg libs or confusion may result.  There is a little script
--that wipes all other versions - obviously use with care!
-+This sets the install prefix to <builddir>/install and is probably what you
-+want if you don't want to overwrite the system files.
- 
--$ sudo pi-util/clean_usr_libs.sh
-+You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was
-+built. You can copy the contents of <build dir>/install to /usr and that mostly
-+works. The only downside is that paths in pkgconfig end up being set to the
-+install directory in your build directory which may be less than ideal when
-+building other packages.
- 
--Then simply copying from the install to /usr works
-+The alternative if you just want to replace the system libs is:
- 
--$ sudo cp -r out/<builddir>/install/* /usr
-+$ pi-util/conf_native.sh --usr
-+$ make -j8 -C out/<builddir>
-+$ sudo pi-util/clean_usr_libs.sh
-+$ sudo make -j8 -C out/<builddir> install
- 
-+The clean_usr_libs.sh step wipes any existing libs & includes (for all
-+architectures) from the system which helps avoid confusion when running other
-+progs as you can be sure you're not running old code which is unfortunately
-+easy to do otherwise.
- 
-diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
-index 0a7d230f1b70..f0ed1595948b 100755
---- a/pi-util/conf_native.sh
-+++ b/pi-util/conf_native.sh
-@@ -9,6 +9,7 @@ RPI_KEEPS=""
- 
- NOSHARED=
- MMAL=
-+USR_PREFIX=
- 
- while [ "$1" != "" ] ; do
-     case $1 in
-@@ -18,8 +19,14 @@ while [ "$1" != "" ] ; do
- 	--mmal)
- 	    MMAL=1
- 	    ;;
-+	--usr)
-+	    USR_PREFIX=/usr
-+	    ;;
- 	*)
--	    echo "Usage $0: [--noshared] [--mmal]"
-+	    echo "Usage $0: [--noshared] [--mmal] [--usr]"
-+	    echo "  noshared  Build static libs and executable - good for testing"
-+	    echo "  mmal      Build mmal decoders"
-+	    echo "  usr       Set install prefix to /usr [default=<build-dir>/install]"
- 	    exit 1
- 	    ;;
-     esac
-@@ -82,7 +89,9 @@ else
-   OUT=$BUILDBASE/$B-$C-$V-shared-rel
- fi
- 
--USR_PREFIX=$OUT/install
-+if [ ! $USR_PREFIX ]; then
-+  USR_PREFIX=$OUT/install
-+fi
- LIB_PREFIX=$USR_PREFIX/lib/$A
- INC_PREFIX=$USR_PREFIX/include/$A
- 
-@@ -113,6 +122,7 @@ $FFSRC/configure \
-  --extra-libs="$RPI_EXTRALIBS"\
-  --extra-version="rpi"
- 
-+echo "Configured into $OUT"
- 
- # gcc option for getting asm listing
- # -Wa,-ahls
-
-From 5c43e72236ef247a0a68e5ca9417496ee2e179a5 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 19 Apr 2023 10:47:58 +0000
-Subject: [PATCH 123/186] swcale: Add explicit bgr24->yv12 conversion
-
-(cherry picked from commit 9a22d429f46a038321c66a0cd54737177641b434)
----
- libswscale/rgb2rgb.c          |  5 +++++
- libswscale/rgb2rgb.h          |  7 +++++++
- libswscale/rgb2rgb_template.c | 36 ++++++++++++++++++++++++++++++-----
- libswscale/swscale_unscaled.c | 22 +++++++++++++++++++++
- 4 files changed, 65 insertions(+), 5 deletions(-)
-
-diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
-index e98fdac8ead6..84bb56e60e94 100644
---- a/libswscale/rgb2rgb.c
-+++ b/libswscale/rgb2rgb.c
-@@ -83,6 +83,11 @@ void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst,
-                        int width, int height,
-                        int lumStride, int chromStride, int srcStride,
-                        int32_t *rgb2yuv);
-+void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
-+                       uint8_t *udst, uint8_t *vdst,
-+                       int width, int height,
-+                       int lumStride, int chromStride, int srcStride,
-+                       int32_t *rgb2yuv);
- void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
-                  int srcStride, int dstStride);
- void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
-index f3951d523ef7..0028ab345fc9 100644
---- a/libswscale/rgb2rgb.h
-+++ b/libswscale/rgb2rgb.h
-@@ -79,6 +79,9 @@ void    rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
- void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                       uint8_t *vdst, int width, int height, int lumStride,
-                       int chromStride, int srcStride, int32_t *rgb2yuv);
-+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                      uint8_t *vdst, int width, int height, int lumStride,
-+                      int chromStride, int srcStride, int32_t *rgb2yuv);
- 
- /**
-  * Height should be a multiple of 2 and width should be a multiple of 16.
-@@ -128,6 +131,10 @@ extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                               int width, int height,
-                               int lumStride, int chromStride, int srcStride,
-                               int32_t *rgb2yuv);
-+extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-+                              int width, int height,
-+                              int lumStride, int chromStride, int srcStride,
-+                              int32_t *rgb2yuv);
- extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
-                         int srcStride, int dstStride);
- 
-diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
-index 42c69801ba40..e2437826dd41 100644
---- a/libswscale/rgb2rgb_template.c
-+++ b/libswscale/rgb2rgb_template.c
-@@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst,
-  * others are ignored in the C version.
-  * FIXME: Write HQ version.
-  */
--void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                    uint8_t *vdst, int width, int height, int lumStride,
--                   int chromStride, int srcStride, int32_t *rgb2yuv)
-+                   int chromStride, int srcStride, int32_t *rgb2yuv,
-+                   const uint8_t x[9])
- {
--    int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
--    int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
--    int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
-+    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
-+    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
-+    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
-     int y;
-     const int chromWidth = width >> 1;
- 
-@@ -707,6 +708,30 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-     }
- }
- 
-+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv)
-+{
-+    static const uint8_t x[9] = {
-+        RY_IDX, GY_IDX, BY_IDX,
-+        RU_IDX, GU_IDX, BU_IDX,
-+        RV_IDX, GV_IDX, BV_IDX,
-+    };
-+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x);
-+}
-+
-+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv)
-+{
-+    static const uint8_t x[9] = {
-+         BY_IDX, GY_IDX, RY_IDX,
-+         BU_IDX, GU_IDX, RU_IDX,
-+         BV_IDX, GV_IDX, RV_IDX,
-+    };
-+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x);
-+}
-+
- static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
-                               uint8_t *dest, int width, int height,
-                               int src1Stride, int src2Stride, int dstStride)
-@@ -980,6 +1005,7 @@ static av_cold void rgb2rgb_init_c(void)
-     yuy2toyv12         = yuy2toyv12_c;
-     planar2x           = planar2x_c;
-     ff_rgb24toyv12     = ff_rgb24toyv12_c;
-+    ff_bgr24toyv12     = ff_bgr24toyv12_c;
-     interleaveBytes    = interleaveBytes_c;
-     deinterleaveBytes  = deinterleaveBytes_c;
-     vu9_to_vu12        = vu9_to_vu12_c;
-diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
-index 9af2e7ecc30d..9047030ae426 100644
---- a/libswscale/swscale_unscaled.c
-+++ b/libswscale/swscale_unscaled.c
-@@ -1654,6 +1654,23 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-     return srcSliceH;
- }
- 
-+static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-+                              int srcStride[], int srcSliceY, int srcSliceH,
-+                              uint8_t *dst[], int dstStride[])
-+{
-+    ff_bgr24toyv12(
-+        src[0],
-+        dst[0] +  srcSliceY       * dstStride[0],
-+        dst[1] + (srcSliceY >> 1) * dstStride[1],
-+        dst[2] + (srcSliceY >> 1) * dstStride[2],
-+        c->srcW, srcSliceH,
-+        dstStride[0], dstStride[1], srcStride[0],
-+        c->input_rgb2yuv_table);
-+    if (dst[3])
-+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-+    return srcSliceH;
-+}
-+
- static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-                              int srcStride[], int srcSliceY, int srcSliceH,
-                              uint8_t *dst[], int dstStride[])
-@@ -2037,6 +2054,11 @@ void ff_get_unscaled_swscale(SwsContext *c)
-         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
-         !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-         c->convert_unscaled = bgr24ToYv12Wrapper;
-+    /* rgb24toYV12 */
-+    if (srcFormat == AV_PIX_FMT_RGB24 &&
-+        (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
-+        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        c->convert_unscaled = rgb24ToYv12Wrapper;
- 
-     /* RGB/BGR -> RGB/BGR (no dither needed forms) */
-     if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)
-
-From 9161e42a5f914181fa97d86f20498632e9827556 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 20 Apr 2023 11:26:10 +0000
-Subject: [PATCH 124/186] swscale: Add unscaled XRGB->YUV420P functions
-
-(cherry picked from commit 04cc32ee3f390de513ad8c6156c0c66b2c60abc8)
----
- libswscale/rgb2rgb.c          |  20 ++++++
- libswscale/rgb2rgb.h          |  16 +++++
- libswscale/rgb2rgb_template.c | 123 ++++++++++++++++++++++++++++++----
- libswscale/swscale_unscaled.c |  89 ++++++++++++++++++++++++
- 4 files changed, 236 insertions(+), 12 deletions(-)
-
-diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
-index 84bb56e60e94..c3b9079d2b3e 100644
---- a/libswscale/rgb2rgb.c
-+++ b/libswscale/rgb2rgb.c
-@@ -88,6 +88,26 @@ void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst,
-                        int width, int height,
-                        int lumStride, int chromStride, int srcStride,
-                        int32_t *rgb2yuv);
-+void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst,
-+					  uint8_t *udst, uint8_t *vdst,
-+					  int width, int height,
-+					  int lumStride, int chromStride, int srcStride,
-+					  int32_t *rgb2yuv);
-+void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst,
-+					  uint8_t *udst, uint8_t *vdst,
-+					  int width, int height,
-+					  int lumStride, int chromStride, int srcStride,
-+					  int32_t *rgb2yuv);
-+void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst,
-+					  uint8_t *udst, uint8_t *vdst,
-+					  int width, int height,
-+					  int lumStride, int chromStride, int srcStride,
-+					  int32_t *rgb2yuv);
-+void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst,
-+					  uint8_t *udst, uint8_t *vdst,
-+					  int width, int height,
-+					  int lumStride, int chromStride, int srcStride,
-+					  int32_t *rgb2yuv);
- void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
-                  int srcStride, int dstStride);
- void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst,
-diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
-index 0028ab345fc9..a0dd3ffb79ab 100644
---- a/libswscale/rgb2rgb.h
-+++ b/libswscale/rgb2rgb.h
-@@ -135,6 +135,22 @@ extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                               int width, int height,
-                               int lumStride, int chromStride, int srcStride,
-                               int32_t *rgb2yuv);
-+extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-+                             int width, int height,
-+                             int lumStride, int chromStride, int srcStride,
-+                             int32_t *rgb2yuv);
-+extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-+                             int width, int height,
-+                             int lumStride, int chromStride, int srcStride,
-+                             int32_t *rgb2yuv);
-+extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-+                             int width, int height,
-+                             int lumStride, int chromStride, int srcStride,
-+                             int32_t *rgb2yuv);
-+extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
-+                             int width, int height,
-+                             int lumStride, int chromStride, int srcStride,
-+                             int32_t *rgb2yuv);
- extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height,
-                         int srcStride, int dstStride);
- 
-diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
-index e2437826dd41..703de90690d3 100644
---- a/libswscale/rgb2rgb_template.c
-+++ b/libswscale/rgb2rgb_template.c
-@@ -708,30 +708,125 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-     }
- }
- 
-+static const uint8_t x_rgb[9] = {
-+    RY_IDX, GY_IDX, BY_IDX,
-+    RU_IDX, GU_IDX, BU_IDX,
-+    RV_IDX, GV_IDX, BV_IDX,
-+};
-+
-+static const uint8_t x_bgr[9] = {
-+     BY_IDX, GY_IDX, RY_IDX,
-+     BU_IDX, GU_IDX, RU_IDX,
-+     BV_IDX, GV_IDX, RV_IDX,
-+};
-+
- void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                    uint8_t *vdst, int width, int height, int lumStride,
-                    int chromStride, int srcStride, int32_t *rgb2yuv)
- {
--    static const uint8_t x[9] = {
--        RY_IDX, GY_IDX, BY_IDX,
--        RU_IDX, GU_IDX, BU_IDX,
--        RV_IDX, GV_IDX, BV_IDX,
--    };
--    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x);
-+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
- }
- 
- void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                    uint8_t *vdst, int width, int height, int lumStride,
-                    int chromStride, int srcStride, int32_t *rgb2yuv)
- {
--    static const uint8_t x[9] = {
--         BY_IDX, GY_IDX, RY_IDX,
--         BU_IDX, GU_IDX, RU_IDX,
--         BV_IDX, GV_IDX, RV_IDX,
--    };
--    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x);
-+    rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
- }
- 
-+static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv,
-+                   const uint8_t x[9])
-+{
-+    int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]];
-+    int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]];
-+    int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]];
-+    int y;
-+    const int chromWidth = width >> 1;
-+
-+    for (y = 0; y < height; y += 2) {
-+        int i;
-+        for (i = 0; i < chromWidth; i++) {
-+            unsigned int b = src[8 * i + 2];
-+            unsigned int g = src[8 * i + 1];
-+            unsigned int r = src[8 * i + 0];
-+
-+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
-+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
-+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
-+
-+            udst[i]     = U;
-+            vdst[i]     = V;
-+            ydst[2 * i] = Y;
-+
-+            b = src[8 * i + 6];
-+            g = src[8 * i + 5];
-+            r = src[8 * i + 4];
-+
-+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-+            ydst[2 * i + 1] = Y;
-+        }
-+        ydst += lumStride;
-+        src  += srcStride;
-+
-+        if (y+1 == height)
-+            break;
-+
-+        for (i = 0; i < chromWidth; i++) {
-+            unsigned int b = src[8 * i + 2];
-+            unsigned int g = src[8 * i + 1];
-+            unsigned int r = src[8 * i + 0];
-+
-+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-+
-+            ydst[2 * i] = Y;
-+
-+            b = src[8 * i + 6];
-+            g = src[8 * i + 5];
-+            r = src[8 * i + 4];
-+
-+            Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-+            ydst[2 * i + 1] = Y;
-+        }
-+        udst += chromStride;
-+        vdst += chromStride;
-+        ydst += lumStride;
-+        src  += srcStride;
-+    }
-+}
-+
-+static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv)
-+{
-+    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
-+}
-+
-+static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv)
-+{
-+    rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
-+}
-+
-+// As the general code does no SIMD-like ops simply adding 1 to the src address
-+// will fix the ignored alpha position
-+static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv)
-+{
-+    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb);
-+}
-+
-+static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv)
-+{
-+    rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr);
-+}
-+
-+
- static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2,
-                               uint8_t *dest, int width, int height,
-                               int src1Stride, int src2Stride, int dstStride)
-@@ -1006,6 +1101,10 @@ static av_cold void rgb2rgb_init_c(void)
-     planar2x           = planar2x_c;
-     ff_rgb24toyv12     = ff_rgb24toyv12_c;
-     ff_bgr24toyv12     = ff_bgr24toyv12_c;
-+    ff_rgbxtoyv12      = ff_rgbxtoyv12_c;
-+    ff_bgrxtoyv12      = ff_bgrxtoyv12_c;
-+    ff_xrgbtoyv12      = ff_xrgbtoyv12_c;
-+    ff_xbgrtoyv12      = ff_xbgrtoyv12_c;
-     interleaveBytes    = interleaveBytes_c;
-     deinterleaveBytes  = deinterleaveBytes_c;
-     vu9_to_vu12        = vu9_to_vu12_c;
-diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
-index 9047030ae426..053c06adf5d1 100644
---- a/libswscale/swscale_unscaled.c
-+++ b/libswscale/swscale_unscaled.c
-@@ -1671,6 +1671,74 @@ static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-     return srcSliceH;
- }
- 
-+static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-+                             int srcStride[], int srcSliceY, int srcSliceH,
-+                             uint8_t *dst[], int dstStride[])
-+{
-+    ff_bgrxtoyv12(
-+        src[0],
-+        dst[0] +  srcSliceY       * dstStride[0],
-+        dst[1] + (srcSliceY >> 1) * dstStride[1],
-+        dst[2] + (srcSliceY >> 1) * dstStride[2],
-+        c->srcW, srcSliceH,
-+        dstStride[0], dstStride[1], srcStride[0],
-+        c->input_rgb2yuv_table);
-+    if (dst[3])
-+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-+    return srcSliceH;
-+}
-+
-+static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-+                             int srcStride[], int srcSliceY, int srcSliceH,
-+                             uint8_t *dst[], int dstStride[])
-+{
-+    ff_rgbxtoyv12(
-+        src[0],
-+        dst[0] +  srcSliceY       * dstStride[0],
-+        dst[1] + (srcSliceY >> 1) * dstStride[1],
-+        dst[2] + (srcSliceY >> 1) * dstStride[2],
-+        c->srcW, srcSliceH,
-+        dstStride[0], dstStride[1], srcStride[0],
-+        c->input_rgb2yuv_table);
-+    if (dst[3])
-+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-+    return srcSliceH;
-+}
-+
-+static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-+                             int srcStride[], int srcSliceY, int srcSliceH,
-+                             uint8_t *dst[], int dstStride[])
-+{
-+    ff_xbgrtoyv12(
-+        src[0],
-+        dst[0] +  srcSliceY       * dstStride[0],
-+        dst[1] + (srcSliceY >> 1) * dstStride[1],
-+        dst[2] + (srcSliceY >> 1) * dstStride[2],
-+        c->srcW, srcSliceH,
-+        dstStride[0], dstStride[1], srcStride[0],
-+        c->input_rgb2yuv_table);
-+    if (dst[3])
-+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-+    return srcSliceH;
-+}
-+
-+static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-+                             int srcStride[], int srcSliceY, int srcSliceH,
-+                             uint8_t *dst[], int dstStride[])
-+{
-+    ff_xrgbtoyv12(
-+        src[0],
-+        dst[0] +  srcSliceY       * dstStride[0],
-+        dst[1] + (srcSliceY >> 1) * dstStride[1],
-+        dst[2] + (srcSliceY >> 1) * dstStride[2],
-+        c->srcW, srcSliceH,
-+        dstStride[0], dstStride[1], srcStride[0],
-+        c->input_rgb2yuv_table);
-+    if (dst[3])
-+        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
-+    return srcSliceH;
-+}
-+
- static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[],
-                              int srcStride[], int srcSliceY, int srcSliceH,
-                              uint8_t *dst[], int dstStride[])
-@@ -2060,6 +2128,27 @@ void ff_get_unscaled_swscale(SwsContext *c)
-         !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-         c->convert_unscaled = rgb24ToYv12Wrapper;
- 
-+    /* bgrxtoYV12 */
-+    if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) ||
-+         (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
-+        !(flags & SWS_ACCURATE_RND))
-+        c->convert_unscaled = bgrxToYv12Wrapper;
-+    /* rgbx24toYV12 */
-+    if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) ||
-+         (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
-+        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        c->convert_unscaled = rgbxToYv12Wrapper;
-+    /* xbgrtoYV12 */
-+    if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) ||
-+         (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
-+        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        c->convert_unscaled = xbgrToYv12Wrapper;
-+    /* xrgb24toYV12 */
-+    if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) ||
-+         (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
-+        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        c->convert_unscaled = xrgbToYv12Wrapper;
-+
-     /* RGB/BGR -> RGB/BGR (no dither needed forms) */
-     if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c)
-         && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
-
-From 2452146f774e85a1f0fe1c8c2551811cd98adc87 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 20 Apr 2023 11:35:44 +0000
-Subject: [PATCH 125/186] swscale: Add aarch64 unscaled RGB24->YUV420P
-
-(cherry picked from commit 0cf416312095ce5bea3d2f7e9b14736d4b3ed160)
----
- libswscale/aarch64/rgb2rgb.c      |  40 +++++++
- libswscale/aarch64/rgb2rgb_neon.S | 181 ++++++++++++++++++++++++++++++
- 2 files changed, 221 insertions(+)
-
-diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c
-index a9bf6ff9e0a8..6d3e0000dc9a 100644
---- a/libswscale/aarch64/rgb2rgb.c
-+++ b/libswscale/aarch64/rgb2rgb.c
-@@ -30,6 +30,44 @@
- void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2,
-                               uint8_t *dest, int width, int height,
-                               int src1Stride, int src2Stride, int dstStride);
-+void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv);
-+void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv);
-+
-+// RGB to YUV asm fns process 16 pixels at once so ensure that the output
-+// will fit into the stride. ARM64 should cope with unaligned SIMD r/w so
-+// don't test for that
-+// Fall back to C if we cannot use asm
-+
-+static inline int chkw(const int width, const int lumStride, const int chromStride)
-+{
-+    const int aw = FFALIGN(width, 16);
-+    return aw <= FFABS(lumStride) && aw <= FFABS(chromStride) * 2;
-+}
-+
-+static void rgb24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *rgb2yuv)
-+{
-+    if (chkw(width, lumStride, chromStride))
-+        ff_rgb24toyv12_aarch64(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv);
-+    else
-+        ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv);
-+}
-+
-+static void bgr24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-+                   uint8_t *vdst, int width, int height, int lumStride,
-+                   int chromStride, int srcStride, int32_t *bgr2yuv)
-+{
-+    if (chkw(width, lumStride, chromStride))
-+        ff_bgr24toyv12_aarch64(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, bgr2yuv);
-+    else
-+        ff_bgr24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, bgr2yuv);
-+}
-+
- 
- av_cold void rgb2rgb_init_aarch64(void)
- {
-@@ -37,5 +75,7 @@ av_cold void rgb2rgb_init_aarch64(void)
- 
-     if (have_neon(cpu_flags)) {
-         interleaveBytes = ff_interleave_bytes_neon;
-+        ff_rgb24toyv12 = rgb24toyv12_check;
-+        ff_bgr24toyv12 = bgr24toyv12_check;
-     }
- }
-diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
-index d81110ec5714..8cf40b65f520 100644
---- a/libswscale/aarch64/rgb2rgb_neon.S
-+++ b/libswscale/aarch64/rgb2rgb_neon.S
-@@ -77,3 +77,184 @@ function ff_interleave_bytes_neon, export=1
- 0:
-         ret
- endfunc
-+
-+// void ff_rgb24toyv12_aarch64(
-+//              const uint8_t *src,             // x0
-+//              uint8_t *ydst,                  // x1
-+//              uint8_t *udst,                  // x2
-+//              uint8_t *vdst,                  // x3
-+//              int width,                      // w4
-+//              int height,                     // w5
-+//              int lumStride,                  // w6
-+//              int chromStride,                // w7
-+//              int srcStr,                     // [sp, #0]
-+//              int32_t *rgb2yuv);              // [sp, #8]
-+
-+function ff_rgb24toyv12_aarch64, export=1
-+        ldr             x15, [sp, #8]
-+        ld1             {v3.s}[2], [x15], #4
-+        ld1             {v3.s}[1], [x15], #4
-+        ld1             {v3.s}[0], [x15], #4
-+        ld1             {v4.s}[2], [x15], #4
-+        ld1             {v4.s}[1], [x15], #4
-+        ld1             {v4.s}[0], [x15], #4
-+        ld1             {v5.s}[2], [x15], #4
-+        ld1             {v5.s}[1], [x15], #4
-+        ld1             {v5.s}[0], [x15]
-+        b               99f
-+endfunc
-+
-+// void ff_bgr24toyv12_aarch64(
-+//              const uint8_t *src,             // x0
-+//              uint8_t *ydst,                  // x1
-+//              uint8_t *udst,                  // x2
-+//              uint8_t *vdst,                  // x3
-+//              int width,                      // w4
-+//              int height,                     // w5
-+//              int lumStride,                  // w6
-+//              int chromStride,                // w7
-+//              int srcStr,                     // [sp, #0]
-+//              int32_t *rgb2yuv);              // [sp, #8]
-+
-+function ff_bgr24toyv12_aarch64, export=1
-+        ldr             x15, [sp, #8]
-+        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
-+        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
-+        ld3             {v3.s, v4.s, v5.s}[2], [x15]
-+99:
-+        ldr             w14, [sp, #0]
-+        movi            v18.8b, #128
-+        uxtl            v17.8h, v18.8b
-+
-+        // Even line - YUV
-+1:
-+        mov             x10, x0
-+        mov             x11, x1
-+        mov             x12, x2
-+        mov             x13, x3
-+        mov             w9,  w4
-+
-+0:
-+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
-+
-+        uxtl2           v20.8h, v0.16b
-+        uxtl2           v21.8h, v1.16b
-+        uxtl2           v22.8h, v2.16b
-+
-+        uxtl            v0.8h, v0.8b
-+        uxtl            v1.8h, v1.8b
-+        uxtl            v2.8h, v2.8b
-+        // Y0
-+        smull           v6.4s, v0.4h, v3.h[0]
-+        smull2          v7.4s, v0.8h, v3.h[0]
-+        smlal           v6.4s, v1.4h, v4.h[0]
-+        smlal2          v7.4s, v1.8h, v4.h[0]
-+        smlal           v6.4s, v2.4h, v5.h[0]
-+        smlal2          v7.4s, v2.8h, v5.h[0]
-+        shrn            v6.4h, v6.4s, #12
-+        shrn2           v6.8h, v7.4s, #12
-+        add             v6.8h, v6.8h, v17.8h     // +128 (>> 3 = 16)
-+        uqrshrn         v16.8b, v6.8h, #3
-+        // Y1
-+        smull           v6.4s, v20.4h, v3.h[0]
-+        smull2          v7.4s, v20.8h, v3.h[0]
-+        smlal           v6.4s, v21.4h, v4.h[0]
-+        smlal2          v7.4s, v21.8h, v4.h[0]
-+        smlal           v6.4s, v22.4h, v5.h[0]
-+        smlal2          v7.4s, v22.8h, v5.h[0]
-+        shrn            v6.4h, v6.4s, #12
-+        shrn2           v6.8h, v7.4s, #12
-+        add             v6.8h, v6.8h, v17.8h
-+        uqrshrn2        v16.16b, v6.8h, #3
-+        // Y0/Y1
-+        st1             {v16.16b}, [x11], #16
-+
-+        uzp1            v0.8h, v0.8h, v20.8h
-+        uzp1            v1.8h, v1.8h, v21.8h
-+        uzp1            v2.8h, v2.8h, v22.8h
-+
-+        // U
-+        // Vector subscript *2 as we loaded into S but are only using H
-+        smull           v6.4s, v0.4h, v3.h[2]
-+        smull2          v7.4s, v0.8h, v3.h[2]
-+        smlal           v6.4s, v1.4h, v4.h[2]
-+        smlal2          v7.4s, v1.8h, v4.h[2]
-+        smlal           v6.4s, v2.4h, v5.h[2]
-+        smlal2          v7.4s, v2.8h, v5.h[2]
-+        shrn            v6.4h, v6.4s, #14
-+        shrn2           v6.8h, v7.4s, #14
-+        sqrshrn         v6.8b, v6.8h, #1
-+        add             v6.8b, v6.8b, v18.8b     // +128
-+        st1             {v6.8b}, [x12], #8
-+
-+        // V
-+        smull           v6.4s, v0.4h, v3.h[4]
-+        smull2          v7.4s, v0.8h, v3.h[4]
-+        smlal           v6.4s, v1.4h, v4.h[4]
-+        smlal2          v7.4s, v1.8h, v4.h[4]
-+        smlal           v6.4s, v2.4h, v5.h[4]
-+        smlal2          v7.4s, v2.8h, v5.h[4]
-+        shrn            v6.4h, v6.4s, #14
-+        shrn2           v6.8h, v7.4s, #14
-+        sqrshrn         v6.8b, v6.8h, #1
-+        add             v6.8b, v6.8b, v18.8b     // +128
-+        st1             {v6.8b}, [x13], #8
-+
-+        subs            w9, w9, #16
-+        b.gt            0b
-+
-+        // Odd line - Y only
-+
-+        add             x0, x0, w14, SXTX
-+        add             x1, x1, w6, SXTX
-+        mov             x10, x0
-+        mov             x11, x1
-+        mov             w9,  w4
-+
-+0:
-+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
-+
-+        uxtl2           v20.8h, v0.16b
-+        uxtl2           v21.8h, v1.16b
-+        uxtl2           v22.8h, v2.16b
-+
-+        uxtl            v0.8h, v0.8b
-+        uxtl            v1.8h, v1.8b
-+        uxtl            v2.8h, v2.8b
-+        // Y0
-+        smull           v6.4s, v0.4h, v3.h[0]
-+        smull2          v7.4s, v0.8h, v3.h[0]
-+        smlal           v6.4s, v1.4h, v4.h[0]
-+        smlal2          v7.4s, v1.8h, v4.h[0]
-+        smlal           v6.4s, v2.4h, v5.h[0]
-+        smlal2          v7.4s, v2.8h, v5.h[0]
-+        shrn            v6.4h, v6.4s, #12
-+        shrn2           v6.8h, v7.4s, #12
-+        add             v6.8h, v6.8h, v17.8h
-+        uqrshrn         v16.8b, v6.8h, #3
-+        // Y1
-+        smull           v6.4s, v20.4h, v3.h[0]
-+        smull2          v7.4s, v20.8h, v3.h[0]
-+        smlal           v6.4s, v21.4h, v4.h[0]
-+        smlal2          v7.4s, v21.8h, v4.h[0]
-+        smlal           v6.4s, v22.4h, v5.h[0]
-+        smlal2          v7.4s, v22.8h, v5.h[0]
-+        shrn            v6.4h, v6.4s, #12
-+        shrn2           v6.8h, v7.4s, #12
-+        add             v6.8h, v6.8h, v17.8h
-+        uqrshrn2        v16.16b, v6.8h, #3
-+        // Y0/Y1
-+        st1             {v16.16b}, [x11], #16
-+
-+        subs            w9, w9, #16
-+        b.gt            0b
-+
-+        add             x0, x0, w14, SXTX
-+        add             x1, x1, w6, SXTX
-+        add             x2, x2, w7, SXTX
-+        add             x3, x3, w7, SXTX
-+        subs            w5, w5, #2
-+        b.gt            1b
-+
-+        ret
-+endfunc
-
-From 95900ef928a5254db60ce7182f4903ad6d27a181 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 27 Apr 2023 13:03:52 +0000
-Subject: [PATCH 126/186] rgb2rgb: Fix rgb24->yuv420p with arbitrary wxh
-
-(cherry picked from commit 58771fdf0218dc670d8a343824f540e2f6e8785d)
----
- libswscale/aarch64/rgb2rgb.c      |   5 +-
- libswscale/aarch64/rgb2rgb_neon.S | 440 ++++++++++++++++++++++++------
- 2 files changed, 355 insertions(+), 90 deletions(-)
-
-diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c
-index 6d3e0000dc9a..f10c4ef2ded9 100644
---- a/libswscale/aarch64/rgb2rgb.c
-+++ b/libswscale/aarch64/rgb2rgb.c
-@@ -44,8 +44,9 @@ void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
- 
- static inline int chkw(const int width, const int lumStride, const int chromStride)
- {
--    const int aw = FFALIGN(width, 16);
--    return aw <= FFABS(lumStride) && aw <= FFABS(chromStride) * 2;
-+//    const int aw = FFALIGN(width, 16);
-+//    return aw <= FFABS(lumStride) && aw <= FFABS(chromStride) * 2;
-+    return 1;
- }
- 
- static void rgb24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
-index 8cf40b65f520..978ab443ea52 100644
---- a/libswscale/aarch64/rgb2rgb_neon.S
-+++ b/libswscale/aarch64/rgb2rgb_neon.S
-@@ -116,6 +116,25 @@ endfunc
- //              int srcStr,                     // [sp, #0]
- //              int32_t *rgb2yuv);              // [sp, #8]
- 
-+// regs
-+// v0-2         Src bytes - reused as chroma src
-+// v3-5         Coeffs (packed very inefficiently - could be squashed)
-+// v6           128b
-+// v7           128h
-+// v8-15        Reserved
-+// v16-18       Lo Src expanded as H
-+// v19          -
-+// v20-22       Hi Src expanded as H
-+// v23          -
-+// v24          U out
-+// v25          U tmp
-+// v26          Y out
-+// v27-29       Y tmp
-+// v30          V out
-+// v31          V tmp
-+
-+// Assumes Little Endian in tail stores & conversion matrix
-+
- function ff_bgr24toyv12_aarch64, export=1
-         ldr             x15, [sp, #8]
-         ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
-@@ -123,138 +142,383 @@ function ff_bgr24toyv12_aarch64, export=1
-         ld3             {v3.s, v4.s, v5.s}[2], [x15]
- 99:
-         ldr             w14, [sp, #0]
--        movi            v18.8b, #128
--        uxtl            v17.8h, v18.8b
--
--        // Even line - YUV
-+        movi            v7.8b, #128
-+        uxtl            v6.8h, v7.8b
-+        // Ensure if nothing to do then we do nothing
-+        cmp             w4, #0
-+        b.le            90f
-+        cmp             w5, #0
-+        b.le            90f
-+        // If w % 16 != 0 then -16 so we do main loop 1 fewer times with
-+        // the remainder done in the tail
-+        tst             w4, #15
-+        b.eq            1f
-+        sub             w4, w4, #16
- 1:
-+
-+// -------------------- Even line body - YUV
-+11:
-+        subs            w9,  w4, #0
-         mov             x10, x0
-         mov             x11, x1
-         mov             x12, x2
-         mov             x13, x3
--        mov             w9,  w4
-+        b.lt            12f
- 
--0:
-         ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
-+        subs            w9, w9, #16
-+        b.le            13f
-+
-+10:
-+        uxtl            v16.8h, v0.8b
-+        uxtl            v17.8h, v1.8b
-+        uxtl            v18.8h, v2.8b
- 
-         uxtl2           v20.8h, v0.16b
-         uxtl2           v21.8h, v1.16b
-         uxtl2           v22.8h, v2.16b
- 
--        uxtl            v0.8h, v0.8b
--        uxtl            v1.8h, v1.8b
--        uxtl            v2.8h, v2.8b
-+        bic             v0.8h, #0xff, LSL #8
-+        bic             v1.8h, #0xff, LSL #8
-+        bic             v2.8h, #0xff, LSL #8
-+
-+        // Testing shows it is faster to stack the smull/smlal ops together
-+        // rather than interleave them between channels and indeed even the
-+        // shift/add sections seem happier not interleaved
-+
-         // Y0
--        smull           v6.4s, v0.4h, v3.h[0]
--        smull2          v7.4s, v0.8h, v3.h[0]
--        smlal           v6.4s, v1.4h, v4.h[0]
--        smlal2          v7.4s, v1.8h, v4.h[0]
--        smlal           v6.4s, v2.4h, v5.h[0]
--        smlal2          v7.4s, v2.8h, v5.h[0]
--        shrn            v6.4h, v6.4s, #12
--        shrn2           v6.8h, v7.4s, #12
--        add             v6.8h, v6.8h, v17.8h     // +128 (>> 3 = 16)
--        uqrshrn         v16.8b, v6.8h, #3
-+        smull           v26.4s, v16.4h, v3.h[0]
-+        smlal           v26.4s, v17.4h, v4.h[0]
-+        smlal           v26.4s, v18.4h, v5.h[0]
-+        smull2          v27.4s, v16.8h, v3.h[0]
-+        smlal2          v27.4s, v17.8h, v4.h[0]
-+        smlal2          v27.4s, v18.8h, v5.h[0]
-         // Y1
--        smull           v6.4s, v20.4h, v3.h[0]
--        smull2          v7.4s, v20.8h, v3.h[0]
--        smlal           v6.4s, v21.4h, v4.h[0]
--        smlal2          v7.4s, v21.8h, v4.h[0]
--        smlal           v6.4s, v22.4h, v5.h[0]
--        smlal2          v7.4s, v22.8h, v5.h[0]
--        shrn            v6.4h, v6.4s, #12
--        shrn2           v6.8h, v7.4s, #12
--        add             v6.8h, v6.8h, v17.8h
--        uqrshrn2        v16.16b, v6.8h, #3
-+        smull           v28.4s, v20.4h, v3.h[0]
-+        smlal           v28.4s, v21.4h, v4.h[0]
-+        smlal           v28.4s, v22.4h, v5.h[0]
-+        smull2          v29.4s, v20.8h, v3.h[0]
-+        smlal2          v29.4s, v21.8h, v4.h[0]
-+        smlal2          v29.4s, v22.8h, v5.h[0]
-+        shrn            v26.4h, v26.4s, #12
-+        shrn2           v26.8h, v27.4s, #12
-+        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
-+        uqrshrn         v26.8b, v26.8h, #3
-+        shrn            v28.4h, v28.4s, #12
-+        shrn2           v28.8h, v29.4s, #12
-+        add             v28.8h, v28.8h, v6.8h
-+        uqrshrn2        v26.16b, v28.8h, #3
-         // Y0/Y1
--        st1             {v16.16b}, [x11], #16
--
--        uzp1            v0.8h, v0.8h, v20.8h
--        uzp1            v1.8h, v1.8h, v21.8h
--        uzp1            v2.8h, v2.8h, v22.8h
- 
-         // U
-         // Vector subscript *2 as we loaded into S but are only using H
--        smull           v6.4s, v0.4h, v3.h[2]
--        smull2          v7.4s, v0.8h, v3.h[2]
--        smlal           v6.4s, v1.4h, v4.h[2]
--        smlal2          v7.4s, v1.8h, v4.h[2]
--        smlal           v6.4s, v2.4h, v5.h[2]
--        smlal2          v7.4s, v2.8h, v5.h[2]
--        shrn            v6.4h, v6.4s, #14
--        shrn2           v6.8h, v7.4s, #14
--        sqrshrn         v6.8b, v6.8h, #1
--        add             v6.8b, v6.8b, v18.8b     // +128
--        st1             {v6.8b}, [x12], #8
-+        smull           v24.4s, v0.4h, v3.h[2]
-+        smlal           v24.4s, v1.4h, v4.h[2]
-+        smlal           v24.4s, v2.4h, v5.h[2]
-+        smull2          v25.4s, v0.8h, v3.h[2]
-+        smlal2          v25.4s, v1.8h, v4.h[2]
-+        smlal2          v25.4s, v2.8h, v5.h[2]
- 
-         // V
--        smull           v6.4s, v0.4h, v3.h[4]
--        smull2          v7.4s, v0.8h, v3.h[4]
--        smlal           v6.4s, v1.4h, v4.h[4]
--        smlal2          v7.4s, v1.8h, v4.h[4]
--        smlal           v6.4s, v2.4h, v5.h[4]
--        smlal2          v7.4s, v2.8h, v5.h[4]
--        shrn            v6.4h, v6.4s, #14
--        shrn2           v6.8h, v7.4s, #14
--        sqrshrn         v6.8b, v6.8h, #1
--        add             v6.8b, v6.8b, v18.8b     // +128
--        st1             {v6.8b}, [x13], #8
-+        smull           v30.4s, v0.4h, v3.h[4]
-+        smlal           v30.4s, v1.4h, v4.h[4]
-+        smlal           v30.4s, v2.4h, v5.h[4]
-+        smull2          v31.4s, v0.8h, v3.h[4]
-+        smlal2          v31.4s, v1.8h, v4.h[4]
-+        smlal2          v31.4s, v2.8h, v5.h[4]
-+
-+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
-+
-+        shrn            v24.4h, v24.4s, #14
-+        shrn2           v24.8h, v25.4s, #14
-+        sqrshrn         v24.8b, v24.8h, #1
-+        add             v24.8b, v24.8b, v7.8b     // +128
-+        shrn            v30.4h, v30.4s, #14
-+        shrn2           v30.8h, v31.4s, #14
-+        sqrshrn         v30.8b, v30.8h, #1
-+        add             v30.8b, v30.8b, v7.8b     // +128
- 
-         subs            w9, w9, #16
--        b.gt            0b
- 
--        // Odd line - Y only
-+        st1             {v26.16b}, [x11], #16
-+        st1             {v24.8b}, [x12], #8
-+        st1             {v30.8b}, [x13], #8
-+
-+        b.gt            10b
-+
-+// -------------------- Even line tail - YUV
-+// If width % 16 == 0 then simply runs once with preloaded RGB
-+// If other then deals with preload & then does remaining tail
-+
-+13:
-+        // Body is simple copy of main loop body minus preload
-+
-+        uxtl            v16.8h, v0.8b
-+        uxtl            v17.8h, v1.8b
-+        uxtl            v18.8h, v2.8b
-+
-+        uxtl2           v20.8h, v0.16b
-+        uxtl2           v21.8h, v1.16b
-+        uxtl2           v22.8h, v2.16b
-+
-+        bic             v0.8h, #0xff, LSL #8
-+        bic             v1.8h, #0xff, LSL #8
-+        bic             v2.8h, #0xff, LSL #8
-+
-+        // Y0
-+        smull           v26.4s, v16.4h, v3.h[0]
-+        smlal           v26.4s, v17.4h, v4.h[0]
-+        smlal           v26.4s, v18.4h, v5.h[0]
-+        smull2          v27.4s, v16.8h, v3.h[0]
-+        smlal2          v27.4s, v17.8h, v4.h[0]
-+        smlal2          v27.4s, v18.8h, v5.h[0]
-+        // Y1
-+        smull           v28.4s, v20.4h, v3.h[0]
-+        smlal           v28.4s, v21.4h, v4.h[0]
-+        smlal           v28.4s, v22.4h, v5.h[0]
-+        smull2          v29.4s, v20.8h, v3.h[0]
-+        smlal2          v29.4s, v21.8h, v4.h[0]
-+        smlal2          v29.4s, v22.8h, v5.h[0]
-+        shrn            v26.4h, v26.4s, #12
-+        shrn2           v26.8h, v27.4s, #12
-+        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
-+        uqrshrn         v26.8b, v26.8h, #3
-+        shrn            v28.4h, v28.4s, #12
-+        shrn2           v28.8h, v29.4s, #12
-+        add             v28.8h, v28.8h, v6.8h
-+        uqrshrn2        v26.16b, v28.8h, #3
-+        // Y0/Y1
-+
-+        // U
-+        // Vector subscript *2 as we loaded into S but are only using H
-+        smull           v24.4s, v0.4h, v3.h[2]
-+        smlal           v24.4s, v1.4h, v4.h[2]
-+        smlal           v24.4s, v2.4h, v5.h[2]
-+        smull2          v25.4s, v0.8h, v3.h[2]
-+        smlal2          v25.4s, v1.8h, v4.h[2]
-+        smlal2          v25.4s, v2.8h, v5.h[2]
- 
-+        // V
-+        smull           v30.4s, v0.4h, v3.h[4]
-+        smlal           v30.4s, v1.4h, v4.h[4]
-+        smlal           v30.4s, v2.4h, v5.h[4]
-+        smull2          v31.4s, v0.8h, v3.h[4]
-+        smlal2          v31.4s, v1.8h, v4.h[4]
-+        smlal2          v31.4s, v2.8h, v5.h[4]
-+
-+        cmp             w9, #-16
-+
-+        shrn            v24.4h, v24.4s, #14
-+        shrn2           v24.8h, v25.4s, #14
-+        sqrshrn         v24.8b, v24.8h, #1
-+        add             v24.8b, v24.8b, v7.8b     // +128
-+        shrn            v30.4h, v30.4s, #14
-+        shrn2           v30.8h, v31.4s, #14
-+        sqrshrn         v30.8b, v30.8h, #1
-+        add             v30.8b, v30.8b, v7.8b     // +128
-+
-+        // Here:
-+        // w9 == 0      width % 16 == 0, tail done
-+        // w9 > -16     1st tail done (16 pels), remainder still to go
-+        // w9 == -16    shouldn't happen
-+        // w9 > -32     2nd tail done
-+        // w9 <= -32    shouldn't happen
-+
-+        b.lt            2f
-+        st1             {v26.16b}, [x11], #16
-+        st1             {v24.8b}, [x12], #8
-+        st1             {v30.8b}, [x13], #8
-+        cbz             w9, 3f
-+
-+12:
-+        sub             w9, w9, #16
-+
-+        tbz             w9, #3, 1f
-+        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
-+1:      tbz             w9, #2, 1f
-+        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
-+        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
-+        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
-+        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
-+1:      tbz             w9, #1, 1f
-+        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
-+        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
-+1:      tbz             w9, #0, 13b
-+        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
-+        b               13b
-+
-+2:
-+        tbz             w9, #3, 1f
-+        st1             {v26.8b},    [x11], #8
-+        st1             {v24.s}[0],  [x12], #4
-+        st1             {v30.s}[0],  [x13], #4
-+1:      tbz             w9, #2, 1f
-+        st1             {v26.s}[2],  [x11], #4
-+        st1             {v24.h}[2],  [x12], #2
-+        st1             {v30.h}[2],  [x13], #2
-+1:      tbz             w9, #1, 1f
-+        st1             {v26.h}[6],  [x11], #2
-+        st1             {v24.b}[6],  [x12], #1
-+        st1             {v30.b}[6],  [x13], #1
-+1:      tbz             w9, #0, 1f
-+        st1             {v26.b}[14], [x11]
-+        st1             {v24.b}[7],  [x12]
-+        st1             {v30.b}[7],  [x13]
-+1:
-+3:
-+
-+// -------------------- Odd line body - Y only
-+
-+        subs            w5, w5, #1
-+        b.eq            90f
-+
-+        subs            w9,  w4, #0
-         add             x0, x0, w14, SXTX
-         add             x1, x1, w6, SXTX
-         mov             x10, x0
-         mov             x11, x1
--        mov             w9,  w4
-+        b.lt            12f
- 
--0:
-         ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
-+        subs            w9, w9, #16
-+        b.le            13f
-+
-+10:
-+        uxtl            v16.8h, v0.8b
-+        uxtl            v17.8h, v1.8b
-+        uxtl            v18.8h, v2.8b
- 
-         uxtl2           v20.8h, v0.16b
-         uxtl2           v21.8h, v1.16b
-         uxtl2           v22.8h, v2.16b
- 
--        uxtl            v0.8h, v0.8b
--        uxtl            v1.8h, v1.8b
--        uxtl            v2.8h, v2.8b
-+        // Testing shows it is faster to stack the smull/smlal ops together
-+        // rather than interleave them between channels and indeed even the
-+        // shift/add sections seem happier not interleaved
-+
-         // Y0
--        smull           v6.4s, v0.4h, v3.h[0]
--        smull2          v7.4s, v0.8h, v3.h[0]
--        smlal           v6.4s, v1.4h, v4.h[0]
--        smlal2          v7.4s, v1.8h, v4.h[0]
--        smlal           v6.4s, v2.4h, v5.h[0]
--        smlal2          v7.4s, v2.8h, v5.h[0]
--        shrn            v6.4h, v6.4s, #12
--        shrn2           v6.8h, v7.4s, #12
--        add             v6.8h, v6.8h, v17.8h
--        uqrshrn         v16.8b, v6.8h, #3
-+        smull           v26.4s, v16.4h, v3.h[0]
-+        smlal           v26.4s, v17.4h, v4.h[0]
-+        smlal           v26.4s, v18.4h, v5.h[0]
-+        smull2          v27.4s, v16.8h, v3.h[0]
-+        smlal2          v27.4s, v17.8h, v4.h[0]
-+        smlal2          v27.4s, v18.8h, v5.h[0]
-         // Y1
--        smull           v6.4s, v20.4h, v3.h[0]
--        smull2          v7.4s, v20.8h, v3.h[0]
--        smlal           v6.4s, v21.4h, v4.h[0]
--        smlal2          v7.4s, v21.8h, v4.h[0]
--        smlal           v6.4s, v22.4h, v5.h[0]
--        smlal2          v7.4s, v22.8h, v5.h[0]
--        shrn            v6.4h, v6.4s, #12
--        shrn2           v6.8h, v7.4s, #12
--        add             v6.8h, v6.8h, v17.8h
--        uqrshrn2        v16.16b, v6.8h, #3
-+        smull           v28.4s, v20.4h, v3.h[0]
-+        smlal           v28.4s, v21.4h, v4.h[0]
-+        smlal           v28.4s, v22.4h, v5.h[0]
-+        smull2          v29.4s, v20.8h, v3.h[0]
-+        smlal2          v29.4s, v21.8h, v4.h[0]
-+        smlal2          v29.4s, v22.8h, v5.h[0]
-+
-+        ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
-+
-+        shrn            v26.4h, v26.4s, #12
-+        shrn2           v26.8h, v27.4s, #12
-+        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
-+        uqrshrn         v26.8b, v26.8h, #3
-+        shrn            v28.4h, v28.4s, #12
-+        shrn2           v28.8h, v29.4s, #12
-+        add             v28.8h, v28.8h, v6.8h
-+        uqrshrn2        v26.16b, v28.8h, #3
-         // Y0/Y1
--        st1             {v16.16b}, [x11], #16
- 
-         subs            w9, w9, #16
--        b.gt            0b
-+
-+        st1             {v26.16b}, [x11], #16
-+
-+        b.gt            10b
-+
-+// -------------------- Odd line tail - Y
-+// If width % 16 == 0 then simply runs once with preloaded RGB
-+// If other then deals with preload & then does remaining tail
-+
-+13:
-+        // Body is simple copy of main loop body minus preload
-+
-+        uxtl            v16.8h, v0.8b
-+        uxtl            v17.8h, v1.8b
-+        uxtl            v18.8h, v2.8b
-+
-+        uxtl2           v20.8h, v0.16b
-+        uxtl2           v21.8h, v1.16b
-+        uxtl2           v22.8h, v2.16b
-+
-+        // Y0
-+        smull           v26.4s, v16.4h, v3.h[0]
-+        smlal           v26.4s, v17.4h, v4.h[0]
-+        smlal           v26.4s, v18.4h, v5.h[0]
-+        smull2          v27.4s, v16.8h, v3.h[0]
-+        smlal2          v27.4s, v17.8h, v4.h[0]
-+        smlal2          v27.4s, v18.8h, v5.h[0]
-+        // Y1
-+        smull           v28.4s, v20.4h, v3.h[0]
-+        smlal           v28.4s, v21.4h, v4.h[0]
-+        smlal           v28.4s, v22.4h, v5.h[0]
-+        smull2          v29.4s, v20.8h, v3.h[0]
-+        smlal2          v29.4s, v21.8h, v4.h[0]
-+        smlal2          v29.4s, v22.8h, v5.h[0]
-+
-+        cmp             w9, #-16
-+
-+        shrn            v26.4h, v26.4s, #12
-+        shrn2           v26.8h, v27.4s, #12
-+        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
-+        uqrshrn         v26.8b, v26.8h, #3
-+        shrn            v28.4h, v28.4s, #12
-+        shrn2           v28.8h, v29.4s, #12
-+        add             v28.8h, v28.8h, v6.8h
-+        uqrshrn2        v26.16b, v28.8h, #3
-+        // Y0/Y1
-+
-+        // Here:
-+        // w9 == 0      width % 16 == 0, tail done
-+        // w9 > -16     1st tail done (16 pels), remainder still to go
-+        // w9 == -16    shouldn't happen
-+        // w9 > -32     2nd tail done
-+        // w9 <= -32    shouldn't happen
-+
-+        b.lt            2f
-+        st1             {v26.16b}, [x11], #16
-+        cbz             w9, 3f
-+
-+12:
-+        sub             w9, w9, #16
-+
-+        tbz             w9, #3, 1f
-+        ld3             {v0.8b, v1.8b, v2.8b},  [x10], #24
-+1:      tbz             w9, #2, 1f
-+        ld3             {v0.b, v1.b, v2.b}[8],  [x10], #3
-+        ld3             {v0.b, v1.b, v2.b}[9],  [x10], #3
-+        ld3             {v0.b, v1.b, v2.b}[10], [x10], #3
-+        ld3             {v0.b, v1.b, v2.b}[11], [x10], #3
-+1:      tbz             w9, #1, 1f
-+        ld3             {v0.b, v1.b, v2.b}[12], [x10], #3
-+        ld3             {v0.b, v1.b, v2.b}[13], [x10], #3
-+1:      tbz             w9, #0, 13b
-+        ld3             {v0.b, v1.b, v2.b}[14], [x10], #3
-+        b               13b
-+
-+2:
-+        tbz             w9, #3, 1f
-+        st1             {v26.8b},    [x11], #8
-+1:      tbz             w9, #2, 1f
-+        st1             {v26.s}[2],  [x11], #4
-+1:      tbz             w9, #1, 1f
-+        st1             {v26.h}[6],  [x11], #2
-+1:      tbz             w9, #0, 1f
-+        st1             {v26.b}[14], [x11]
-+1:
-+3:
-+
-+// ------------------- Loop to start
- 
-         add             x0, x0, w14, SXTX
-         add             x1, x1, w6, SXTX
-         add             x2, x2, w7, SXTX
-         add             x3, x3, w7, SXTX
--        subs            w5, w5, #2
--        b.gt            1b
--
-+        subs            w5, w5, #1
-+        b.gt            11b
-+90:
-         ret
- endfunc
-
-From d33e534ad9a45463b0433767a28256d737827b8c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 26 Apr 2023 15:36:07 +0000
-Subject: [PATCH 127/186] rgb2rgb: Use asm unconditionally
-
-(cherry picked from commit 7c216c0804836b31c0ea093bb1dde5ab387724b1)
----
- libswscale/aarch64/rgb2rgb.c | 37 ++----------------------------------
- 1 file changed, 2 insertions(+), 35 deletions(-)
-
-diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c
-index f10c4ef2ded9..6a0e2dcc09f8 100644
---- a/libswscale/aarch64/rgb2rgb.c
-+++ b/libswscale/aarch64/rgb2rgb.c
-@@ -37,46 +37,13 @@ void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-                    uint8_t *vdst, int width, int height, int lumStride,
-                    int chromStride, int srcStride, int32_t *rgb2yuv);
- 
--// RGB to YUV asm fns process 16 pixels at once so ensure that the output
--// will fit into the stride. ARM64 should cope with unaligned SIMD r/w so
--// don't test for that
--// Fall back to C if we cannot use asm
--
--static inline int chkw(const int width, const int lumStride, const int chromStride)
--{
--//    const int aw = FFALIGN(width, 16);
--//    return aw <= FFABS(lumStride) && aw <= FFABS(chromStride) * 2;
--    return 1;
--}
--
--static void rgb24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
--                   uint8_t *vdst, int width, int height, int lumStride,
--                   int chromStride, int srcStride, int32_t *rgb2yuv)
--{
--    if (chkw(width, lumStride, chromStride))
--        ff_rgb24toyv12_aarch64(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv);
--    else
--        ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv);
--}
--
--static void bgr24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
--                   uint8_t *vdst, int width, int height, int lumStride,
--                   int chromStride, int srcStride, int32_t *bgr2yuv)
--{
--    if (chkw(width, lumStride, chromStride))
--        ff_bgr24toyv12_aarch64(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, bgr2yuv);
--    else
--        ff_bgr24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, bgr2yuv);
--}
--
--
- av_cold void rgb2rgb_init_aarch64(void)
- {
-     int cpu_flags = av_get_cpu_flags();
- 
-     if (have_neon(cpu_flags)) {
-         interleaveBytes = ff_interleave_bytes_neon;
--        ff_rgb24toyv12 = rgb24toyv12_check;
--        ff_bgr24toyv12 = bgr24toyv12_check;
-+        ff_rgb24toyv12 = ff_rgb24toyv12_aarch64;
-+        ff_bgr24toyv12 = ff_bgr24toyv12_aarch64;
-     }
- }
-
-From 79640085d62275d96b2c53f18776cfd773d9fde4 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 27 Apr 2023 13:01:43 +0000
-Subject: [PATCH 128/186] tests/swscale: Add options for width and height on
- the command line
-
-(cherry picked from commit eb8a09779688fc05bf204fdfcd063b04cda07271)
----
- libswscale/tests/swscale.c | 84 ++++++++++++++++++++++++++------------
- 1 file changed, 59 insertions(+), 25 deletions(-)
-
-diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c
-index 6c38041ddb81..4cf41d9f64a4 100644
---- a/libswscale/tests/swscale.c
-+++ b/libswscale/tests/swscale.c
-@@ -355,56 +355,71 @@ static int fileTest(const uint8_t * const ref[4], int refStride[4],
-     return 0;
- }
- 
--#define W 96
--#define H 96
--
- int main(int argc, char **argv)
- {
-+    unsigned int W = 96;
-+    unsigned int H = 96;
-+    unsigned int W2;
-+    unsigned int H2;
-+    unsigned int S;
-     enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE;
-     enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE;
--    uint8_t *rgb_data   = av_malloc(W * H * 4);
--    const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL };
--    int rgb_stride[4]   = { 4 * W, 0, 0, 0 };
--    uint8_t *data       = av_malloc(4 * W * H);
--    const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 };
--    int stride[4]       = { W, W, W, W };
-     int x, y;
-     struct SwsContext *sws;
-     AVLFG rand;
-     int res = -1;
-     int i;
-     FILE *fp = NULL;
--
--    if (!rgb_data || !data)
--        return -1;
-+    uint8_t *rgb_data;
-+    uint8_t * rgb_src[4] = { NULL };
-+    int rgb_stride[4]   = { 0 };
-+    uint8_t *data;
-+    uint8_t * src[4] = { NULL };
-+    int stride[4]       = { 0 };
- 
-     for (i = 1; i < argc; i += 2) {
-+        const char * const arg2 = argv[i+1];
-+
-         if (argv[i][0] != '-' || i + 1 == argc)
-             goto bad_option;
-         if (!strcmp(argv[i], "-ref")) {
--            fp = fopen(argv[i + 1], "r");
-+            fp = fopen(arg2, "r");
-             if (!fp) {
--                fprintf(stderr, "could not open '%s'\n", argv[i + 1]);
-+                fprintf(stderr, "could not open '%s'\n", arg2);
-                 goto error;
-             }
-         } else if (!strcmp(argv[i], "-cpuflags")) {
-             unsigned flags = av_get_cpu_flags();
--            int ret = av_parse_cpu_caps(&flags, argv[i + 1]);
-+            int ret = av_parse_cpu_caps(&flags, arg2);
-             if (ret < 0) {
--                fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]);
-+                fprintf(stderr, "invalid cpu flags %s\n", arg2);
-                 return ret;
-             }
-             av_force_cpu_flags(flags);
-         } else if (!strcmp(argv[i], "-src")) {
--            srcFormat = av_get_pix_fmt(argv[i + 1]);
-+            srcFormat = av_get_pix_fmt(arg2);
-             if (srcFormat == AV_PIX_FMT_NONE) {
--                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
-+                fprintf(stderr, "invalid pixel format %s\n", arg2);
-                 return -1;
-             }
-         } else if (!strcmp(argv[i], "-dst")) {
--            dstFormat = av_get_pix_fmt(argv[i + 1]);
-+            dstFormat = av_get_pix_fmt(arg2);
-             if (dstFormat == AV_PIX_FMT_NONE) {
--                fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]);
-+                fprintf(stderr, "invalid pixel format %s\n", arg2);
-+                return -1;
-+            }
-+        } else if (!strcmp(argv[i], "-w")) {
-+            char * p = NULL;
-+            W = strtoul(arg2, &p, 0);
-+            if (!W || *p) {
-+                fprintf(stderr, "bad width %s\n", arg2);
-+                return -1;
-+            }
-+        } else if (!strcmp(argv[i], "-h")) {
-+            char * p = NULL;
-+            H = strtoul(arg2, &p, 0);
-+            if (!H || *p) {
-+                fprintf(stderr, "bad height '%s' (H=%d, *p=%d)\n", arg2, H, *p);
-                 return -1;
-             }
-         } else {
-@@ -414,15 +429,34 @@ bad_option:
-         }
-     }
- 
--    sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H,
-+    S = (W + 15) & ~15;
-+    rgb_data   = av_mallocz(S * H * 4);
-+    rgb_src[0] = rgb_data;
-+    rgb_stride[0]   = 4 * S;
-+    data       = av_mallocz(4 * S * H);
-+    src[0] = data;
-+    src[1] = data + S * H;
-+    src[2] = data + S * H * 2;
-+    src[3] = data + S * H * 3;
-+    stride[0] = S;
-+    stride[1] = S;
-+    stride[2] = S;
-+    stride[3] = S;
-+    H2 = H < 96 ? 8 : H / 12;
-+    W2 = W < 96 ? 8 : W / 12;
-+
-+    if (!rgb_data || !data)
-+        return -1;
-+
-+    sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H,
-                          AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL);
- 
-     av_lfg_init(&rand, 1);
- 
-     for (y = 0; y < H; y++)
-         for (x = 0; x < W * 4; x++)
--            rgb_data[ x + y * 4 * W] = av_lfg_get(&rand);
--    res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride);
-+            rgb_data[ x + y * 4 * S] = av_lfg_get(&rand);
-+    res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride);
-     if (res < 0 || res != H) {
-         res = -1;
-         goto error;
-@@ -431,10 +465,10 @@ bad_option:
-     av_free(rgb_data);
- 
-     if(fp) {
--        res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat);
-+        res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat);
-         fclose(fp);
-     } else {
--        selfTest(src, stride, W, H, srcFormat, dstFormat);
-+        selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat);
-         res = 0;
-     }
- error:
-
-From 7fcd6aa72879fad5f0a4d2144b65c37c7a50ecfc Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 26 Apr 2023 16:31:23 +0000
-Subject: [PATCH 129/186] tests/swscale: Add a timing option
-
--t <n>   Where n is the number of time to loop the scale op.
-         Often useful to do it 10 times or so for better resolution
-
-(cherry picked from commit 50cd60a23a66254f911376602d07b30fcafbde96)
----
- libswscale/tests/swscale.c | 32 ++++++++++++++++++++++++++++++--
- 1 file changed, 30 insertions(+), 2 deletions(-)
-
-diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c
-index 4cf41d9f64a4..12776ffec7ae 100644
---- a/libswscale/tests/swscale.c
-+++ b/libswscale/tests/swscale.c
-@@ -23,6 +23,7 @@
- #include <string.h>
- #include <inttypes.h>
- #include <stdarg.h>
-+#include <time.h>
- 
- #undef HAVE_AV_CONFIG_H
- #include "libavutil/cpu.h"
-@@ -78,6 +79,15 @@ struct Results {
-     uint32_t crc;
- };
- 
-+static int time_rep = 0;
-+
-+static uint64_t utime(void)
-+{
-+    struct timespec ts;
-+    clock_gettime(CLOCK_MONOTONIC, &ts);
-+    return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000;
-+}
-+
- // test by ref -> src -> dst -> out & compare out against ref
- // ref & out are YV12
- static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
-@@ -174,7 +184,7 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
-         goto end;
-     }
- 
--    printf(" %s %dx%d -> %s %3dx%3d flags=%2d",
-+    printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d",
-            desc_src->name, srcW, srcH,
-            desc_dst->name, dstW, dstH,
-            flags);
-@@ -182,6 +192,17 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h,
- 
-     sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
- 
-+    if (time_rep != 0)
-+    {
-+        const uint64_t now = utime();
-+        uint64_t done;
-+        for (i = 1; i != time_rep; ++i) {
-+            sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride);
-+        }
-+        done = utime();
-+        printf(" T=%7"PRId64"us ", done-now);
-+    }
-+
-     for (i = 0; i < 4 && dstStride[i]; i++)
-         crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i],
-                      dstStride[i] * dstH);
-@@ -419,7 +440,14 @@ int main(int argc, char **argv)
-             char * p = NULL;
-             H = strtoul(arg2, &p, 0);
-             if (!H || *p) {
--                fprintf(stderr, "bad height '%s' (H=%d, *p=%d)\n", arg2, H, *p);
-+                fprintf(stderr, "bad height '%s'\n", arg2);
-+                return -1;
-+            }
-+        } else if (!strcmp(argv[i], "-t")) {
-+            char * p = NULL;
-+            time_rep = (int)strtol(arg2, &p, 0);
-+            if (*p) {
-+                fprintf(stderr, "bad time repetitions '%s'\n", arg2);
-                 return -1;
-             }
-         } else {
-
-From fabd4e3e197737f27684a17e817c186a350213d4 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 20 Apr 2023 13:40:36 +0000
-Subject: [PATCH 130/186] swscale: RGB->YUV420 fix C template to allow odd
- widths
-
-(cherry picked from commit 08b2023e7b5292df0adc6593e4d20087f9cef5c8)
----
- libswscale/rgb2rgb_template.c | 44 +++++++++++++++++++++++++++++++++++
- libswscale/swscale_unscaled.c | 11 ++++-----
- 2 files changed, 49 insertions(+), 6 deletions(-)
-
-diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
-index 703de90690d3..e711589e1e1a 100644
---- a/libswscale/rgb2rgb_template.c
-+++ b/libswscale/rgb2rgb_template.c
-@@ -679,6 +679,19 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-             ydst[2 * i + 1] = Y;
-         }
-+        if ((width & 1) != 0) {
-+            unsigned int b = src[6 * i + 0];
-+            unsigned int g = src[6 * i + 1];
-+            unsigned int r = src[6 * i + 2];
-+
-+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
-+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
-+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
-+
-+            udst[i]     = U;
-+            vdst[i]     = V;
-+            ydst[2 * i] = Y;
-+        }
-         ydst += lumStride;
-         src  += srcStride;
- 
-@@ -701,6 +714,15 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-             ydst[2 * i + 1] = Y;
-         }
-+        if ((width & 1) != 0) {
-+            unsigned int b = src[6 * i + 0];
-+            unsigned int g = src[6 * i + 1];
-+            unsigned int r = src[6 * i + 2];
-+
-+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-+
-+            ydst[2 * i] = Y;
-+        }
-         udst += chromStride;
-         vdst += chromStride;
-         ydst += lumStride;
-@@ -767,6 +789,19 @@ static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-             ydst[2 * i + 1] = Y;
-         }
-+        if ((width & 1) != 0) {
-+            unsigned int b = src[8 * i + 2];
-+            unsigned int g = src[8 * i + 1];
-+            unsigned int r = src[8 * i + 0];
-+
-+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) +  16;
-+            unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128;
-+            unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128;
-+
-+            udst[i]     = U;
-+            vdst[i]     = V;
-+            ydst[2 * i] = Y;
-+        }
-         ydst += lumStride;
-         src  += srcStride;
- 
-@@ -789,6 +824,15 @@ static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
-             Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-             ydst[2 * i + 1] = Y;
-         }
-+        if ((width & 1) != 0) {
-+            unsigned int b = src[8 * i + 2];
-+            unsigned int g = src[8 * i + 1];
-+            unsigned int r = src[8 * i + 0];
-+
-+            unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16;
-+
-+            ydst[2 * i] = Y;
-+        }
-         udst += chromStride;
-         vdst += chromStride;
-         ydst += lumStride;
-diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
-index 053c06adf5d1..52469b2e4a7b 100644
---- a/libswscale/swscale_unscaled.c
-+++ b/libswscale/swscale_unscaled.c
-@@ -2062,7 +2062,6 @@ void ff_get_unscaled_swscale(SwsContext *c)
-     const enum AVPixelFormat dstFormat = c->dstFormat;
-     const int flags = c->flags;
-     const int dstH = c->dstH;
--    const int dstW = c->dstW;
-     int needsDither;
- 
-     needsDither = isAnyRGB(dstFormat) &&
-@@ -2120,12 +2119,12 @@ void ff_get_unscaled_swscale(SwsContext *c)
-     /* bgr24toYV12 */
-     if (srcFormat == AV_PIX_FMT_BGR24 &&
-         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
--        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        !(flags & SWS_ACCURATE_RND))
-         c->convert_unscaled = bgr24ToYv12Wrapper;
-     /* rgb24toYV12 */
-     if (srcFormat == AV_PIX_FMT_RGB24 &&
-         (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) &&
--        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        !(flags & SWS_ACCURATE_RND))
-         c->convert_unscaled = rgb24ToYv12Wrapper;
- 
-     /* bgrxtoYV12 */
-@@ -2136,17 +2135,17 @@ void ff_get_unscaled_swscale(SwsContext *c)
-     /* rgbx24toYV12 */
-     if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) ||
-          (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
--        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        !(flags & SWS_ACCURATE_RND))
-         c->convert_unscaled = rgbxToYv12Wrapper;
-     /* xbgrtoYV12 */
-     if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) ||
-          (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
--        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        !(flags & SWS_ACCURATE_RND))
-         c->convert_unscaled = xbgrToYv12Wrapper;
-     /* xrgb24toYV12 */
-     if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) ||
-          (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) &&
--        !(flags & SWS_ACCURATE_RND) && !(dstW&1))
-+        !(flags & SWS_ACCURATE_RND))
-         c->convert_unscaled = xrgbToYv12Wrapper;
- 
-     /* RGB/BGR -> RGB/BGR (no dither needed forms) */
-
-From 7d6f3a7ede0f4bf03a410bc2a8a8f38a47ac15a9 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 4 May 2023 14:26:14 +0000
-Subject: [PATCH 131/186] rtpenc: Add code to send H264 new extradata in
- sidedata
-
-Fixes issue with pi V4L2 H264 encode which cannot create extradata
-at init time.
-
-(cherry picked from commit 4f852b4b093f841b64b4934a6f1720e98e4e0f2c)
----
- libavformat/rtpenc.c | 18 ++++++++++++++++++
- 1 file changed, 18 insertions(+)
-
-diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
-index a8d296a1542f..f67dc2a15ae1 100644
---- a/libavformat/rtpenc.c
-+++ b/libavformat/rtpenc.c
-@@ -19,6 +19,7 @@
-  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-  */
- 
-+#include "avc.h"
- #include "avformat.h"
- #include "mpegts.h"
- #include "internal.h"
-@@ -585,8 +586,25 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt)
-         ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0);
-         break;
-     case AV_CODEC_ID_H264:
-+    {
-+        uint8_t *side_data;
-+        int side_data_size = 0;
-+
-+        side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
-+                                            &side_data_size);
-+
-+        if (side_data_size != 0) {
-+            int ps_size = side_data_size;
-+            uint8_t * ps_buf = NULL;
-+
-+            ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size);
-+            av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size);
-+            ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size);
-+            av_free(ps_buf);
-+        }
-         ff_rtp_send_h264_hevc(s1, pkt->data, size);
-         break;
-+    }
-     case AV_CODEC_ID_H261:
-         ff_rtp_send_h261(s1, pkt->data, size);
-         break;
-
-From 7ba7eb37305530b4eef1637c87088da8d93911aa Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 5 Jun 2023 08:34:38 +0000
-Subject: [PATCH 132/186] rgb2rgb: Fix luma narrow+saturation instruction
-
-(cherry picked from commit 9cdac1c08ad5c0aea28907d1d3fd0bdda387955a)
----
- libswscale/aarch64/rgb2rgb_neon.S | 16 ++++++++--------
- 1 file changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
-index 978ab443ea52..476ca723a0ef 100644
---- a/libswscale/aarch64/rgb2rgb_neon.S
-+++ b/libswscale/aarch64/rgb2rgb_neon.S
-@@ -203,11 +203,11 @@ function ff_bgr24toyv12_aarch64, export=1
-         shrn            v26.4h, v26.4s, #12
-         shrn2           v26.8h, v27.4s, #12
-         add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
--        uqrshrn         v26.8b, v26.8h, #3
-+        sqrshrun        v26.8b, v26.8h, #3
-         shrn            v28.4h, v28.4s, #12
-         shrn2           v28.8h, v29.4s, #12
-         add             v28.8h, v28.8h, v6.8h
--        uqrshrn2        v26.16b, v28.8h, #3
-+        sqrshrun2       v26.16b, v28.8h, #3
-         // Y0/Y1
- 
-         // U
-@@ -282,11 +282,11 @@ function ff_bgr24toyv12_aarch64, export=1
-         shrn            v26.4h, v26.4s, #12
-         shrn2           v26.8h, v27.4s, #12
-         add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
--        uqrshrn         v26.8b, v26.8h, #3
-+        sqrshrun        v26.8b, v26.8h, #3
-         shrn            v28.4h, v28.4s, #12
-         shrn2           v28.8h, v29.4s, #12
-         add             v28.8h, v28.8h, v6.8h
--        uqrshrn2        v26.16b, v28.8h, #3
-+        sqrshrun2       v26.16b, v28.8h, #3
-         // Y0/Y1
- 
-         // U
-@@ -416,11 +416,11 @@ function ff_bgr24toyv12_aarch64, export=1
-         shrn            v26.4h, v26.4s, #12
-         shrn2           v26.8h, v27.4s, #12
-         add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
--        uqrshrn         v26.8b, v26.8h, #3
-+        sqrshrun        v26.8b, v26.8h, #3
-         shrn            v28.4h, v28.4s, #12
-         shrn2           v28.8h, v29.4s, #12
-         add             v28.8h, v28.8h, v6.8h
--        uqrshrn2        v26.16b, v28.8h, #3
-+        sqrshrun2       v26.16b, v28.8h, #3
-         // Y0/Y1
- 
-         subs            w9, w9, #16
-@@ -464,11 +464,11 @@ function ff_bgr24toyv12_aarch64, export=1
-         shrn            v26.4h, v26.4s, #12
-         shrn2           v26.8h, v27.4s, #12
-         add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
--        uqrshrn         v26.8b, v26.8h, #3
-+        sqrshrun        v26.8b, v26.8h, #3
-         shrn            v28.4h, v28.4s, #12
-         shrn2           v28.8h, v29.4s, #12
-         add             v28.8h, v28.8h, v6.8h
--        uqrshrn2        v26.16b, v28.8h, #3
-+        sqrshrun2       v26.16b, v28.8h, #3
-         // Y0/Y1
- 
-         // Here:
-
-From 0d553f498626e936ef1f48505ee260dbe2478d0c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Sun, 4 Jun 2023 13:37:59 +0000
-Subject: [PATCH 133/186] v4l2_m2m_dec: Tweak pending count to use dts &
- reorder size
-
-(cherry picked from commit ca438b382c90f9a5f58f4708205e6ac25395db2a)
----
- libavcodec/v4l2_m2m.h     |  1 +
- libavcodec/v4l2_m2m_dec.c | 53 +++++++++++++++++++++++++++++++--------
- 2 files changed, 43 insertions(+), 11 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h
-index ded1478a49da..a506e69d674b 100644
---- a/libavcodec/v4l2_m2m.h
-+++ b/libavcodec/v4l2_m2m.h
-@@ -115,6 +115,7 @@ typedef struct V4L2m2mContext {
- 
-     /* req pkt */
-     int req_pkt;
-+    int reorder_size;
- 
-     /* Ext data sent */
-     int extdata_sent;
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index d124c7b1fc43..13af62e819bc 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -121,13 +121,18 @@ log_dump(void * logctx, int lvl, const void * const data, const size_t len)
- }
- #endif
- 
--static int64_t pts_stats_guess(const pts_stats_t * const stats)
-+static unsigned int pts_stats_interval(const pts_stats_t * const stats)
-+{
-+    return stats->last_interval;
-+}
-+
-+static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess)
- {
-     if (stats->last_count <= 1)
-         return stats->last_pts;
-     if (stats->last_pts == AV_NOPTS_VALUE ||
--            stats->last_interval == 0 ||
--            stats->last_count >= STATS_LAST_COUNT_MAX)
-+            fail_bad_guess && (stats->last_interval == 0 ||
-+                               stats->last_count >= STATS_LAST_COUNT_MAX))
-         return AV_NOPTS_VALUE;
-     return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval;
- }
-@@ -345,7 +350,7 @@ set_best_effort_pts(AVCodecContext *const avctx,
- {
-     pts_stats_add(ps, frame->pts);
- 
--    frame->best_effort_timestamp = pts_stats_guess(ps);
-+    frame->best_effort_timestamp = pts_stats_guess(ps, 1);
-     // If we can't guess from just PTS - try DTS
-     if (frame->best_effort_timestamp == AV_NOPTS_VALUE)
-         frame->best_effort_timestamp = frame->pkt_dts;
-@@ -380,15 +385,25 @@ xlat_init(xlat_track_t * const x)
- }
- 
- static int
--xlat_pending(const xlat_track_t * const x)
-+xlat_pending(const V4L2m2mContext * const s)
- {
-+    const xlat_track_t *const x = &s->xlat;
-     unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE;
-     int i;
--    const int64_t now = x->last_pts;
-+    const int64_t now = pts_stats_guess(&s->pts_stat, 0);
-+    int64_t first_dts = AV_NOPTS_VALUE;
-+    int no_dts_count = 0;
-+    unsigned int interval = pts_stats_interval(&s->pts_stat);
- 
-     for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) {
-         const V4L2m2mTrackEl * const t = x->track_els + n;
- 
-+        if (first_dts == AV_NOPTS_VALUE)
-+            if (t->dts == AV_NOPTS_VALUE)
-+                ++no_dts_count;
-+            else
-+                first_dts = t->dts;
-+
-         // Discard only set on never-set or flushed entries
-         // So if we get here we've never successfully decoded a frame so allow
-         // more frames into the buffer before stalling
-@@ -408,6 +423,18 @@ xlat_pending(const xlat_track_t * const x)
-             break;
-     }
- 
-+    if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) {
-+        const int iframes = (first_dts - now) / (int)interval;
-+        const int t = iframes - s->reorder_size + no_dts_count;
-+
-+//        av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n",
-+//               x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count);
-+
-+        if (iframes > 0 && iframes < 64 && t < i) {
-+            return t;
-+        }
-+    }
-+
-     return i;
- }
- 
-@@ -585,12 +612,12 @@ static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx)
- static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
- {
-     V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
--    int src_rv = NQ_OK;
-+    int src_rv = -1;
-     int dst_rv = 1;  // Non-zero (done), non-negative (error) number
-     unsigned int i = 0;
- 
-     do {
--        const int pending = xlat_pending(&s->xlat);
-+        const int pending = xlat_pending(s);
-         const int prefer_dq = (pending > 4);
-         const int last_src_rv = src_rv;
- 
-@@ -966,8 +993,10 @@ static uint32_t max_coded_size(const AVCodecContext * const avctx)
- }
- 
- static void
--parse_extradata(AVCodecContext *avctx)
-+parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
- {
-+    s->reorder_size = 0;
-+
-     if (!avctx->extradata || !avctx->extradata_size)
-         return;
- 
-@@ -996,6 +1025,7 @@ parse_extradata(AVCodecContext *avctx)
-                     avctx->profile = ff_h264_get_profile(sps);
-                     avctx->level = sps->level_idc;
-                 }
-+                s->reorder_size = sps->num_reorder_frames;
-             }
-             ff_h264_ps_uninit(&ps);
-             break;
-@@ -1025,6 +1055,7 @@ parse_extradata(AVCodecContext *avctx)
-                 if (sps) {
-                     avctx->profile = sps->ptl.general_ptl.profile_idc;
-                     avctx->level   = sps->ptl.general_ptl.level_idc;
-+                    s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering;
-                 }
-             }
-             ff_hevc_ps_uninit(&ps);
-@@ -1057,12 +1088,12 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-         avctx->ticks_per_frame = 2;
-     }
- 
--    parse_extradata(avctx);
--
-     ret = ff_v4l2_m2m_create_context(priv, &s);
-     if (ret < 0)
-         return ret;
- 
-+    parse_extradata(avctx, s);
-+
-     xlat_init(&s->xlat);
-     pts_stats_init(&s->pts_stat, avctx, "decoder");
- 
-
-From 244b56393e0f6f1d63b894d942d148c6dd9a3862 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 7 Jun 2023 11:14:52 +0000
-Subject: [PATCH 134/186] v4l2_m2m: Add encode size check
-
-Previously an out of bounds size would fail whilst trying to copy the
-buffer with an unhelpful message. This produces a better error at init
-time.
-
-(cherry picked from commit 0b61c4617e26f043d28d44c8767f7b9fd4882f97)
----
- libavcodec/v4l2_m2m.c | 43 +++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 43 insertions(+)
-
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index f802687b1bb2..28d9ed49887e 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -109,6 +109,44 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe)
-     return AVERROR(EINVAL);
- }
- 
-+static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
-+{
-+    struct v4l2_format fmt = {.type = s->output.type};
-+    int rv;
-+    uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt);
-+    unsigned int w;
-+    unsigned int h;
-+
-+    if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) {
-+        fmt.fmt.pix_mp.pixelformat = pixfmt;
-+        fmt.fmt.pix_mp.width = avctx->width;
-+        fmt.fmt.pix_mp.height = avctx->height;
-+    }
-+    else {
-+        fmt.fmt.pix.pixelformat = pixfmt;
-+        fmt.fmt.pix.width = avctx->width;
-+        fmt.fmt.pix.height = avctx->height;
-+    }
-+
-+    rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt);
-+
-+    if (rv != 0) {
-+        rv = AVERROR(errno);
-+        av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv));
-+        return rv;
-+    }
-+
-+    w = ff_v4l2_get_format_width(&fmt);
-+    h = ff_v4l2_get_format_height(&fmt);
-+
-+    if (w < avctx->width || h < avctx->height) {
-+        av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h);
-+        return AVERROR(EINVAL);
-+    }
-+
-+    return 0;
-+}
-+
- static int v4l2_probe_driver(V4L2m2mContext *s)
- {
-     void *log_ctx = s->avctx;
-@@ -128,6 +166,11 @@ static int v4l2_probe_driver(V4L2m2mContext *s)
-         goto done;
-     }
- 
-+    // If being given frames (encode) check that V4L2 can cope with the size
-+    if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO &&
-+        (ret = check_size(s->avctx, s)) != 0)
-+        goto done;
-+
-     ret = ff_v4l2_context_get_format(&s->capture, 1);
-     if (ret) {
-         av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n");
-
-From 834a78de7eda652d34ad72ff6d63d4ba86d22fa3 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 9 Jun 2023 10:28:12 +0000
-Subject: [PATCH 135/186] vf_bwdif: Add attributes to ask for vectorization
-
-(cherry picked from commit 281250290ba5c2dcd8676e9a261050e65c10bcb7)
----
- libavfilter/vf_bwdif.c | 29 +++++++++++++++--------------
- 1 file changed, 15 insertions(+), 14 deletions(-)
-
-diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
-index 65c617ebb335..09e68523bbfa 100644
---- a/libavfilter/vf_bwdif.c
-+++ b/libavfilter/vf_bwdif.c
-@@ -74,10 +74,10 @@ typedef struct ThreadData {
-         int temporal_diff1 =(FFABS(prev[mrefs] - c) + FFABS(prev[prefs] - e)) >> 1; \
-         int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e)) >> 1; \
-         int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \
-- \
-+ {/*\
-         if (!diff) { \
-             dst[0] = d; \
--        } else {
-+        } else {*/
- 
- #define SPAT_CHECK() \
-             int b = ((prev2[mrefs2] + next2[mrefs2]) >> 1) - c; \
-@@ -89,15 +89,16 @@ typedef struct ThreadData {
-             diff = FFMAX3(diff, min, -max);
- 
- #define FILTER_LINE() \
-+            int i1, i2; \
-             SPAT_CHECK() \
--            if (FFABS(c - e) > temporal_diff0) { \
--                interpol = (((coef_hf[0] * (prev2[0] + next2[0]) \
-+            /*if (FFABS(c - e) > temporal_diff0)*/ { \
-+                i1 = (((coef_hf[0] * (prev2[0] + next2[0]) \
-                     - coef_hf[1] * (prev2[mrefs2] + next2[mrefs2] + prev2[prefs2] + next2[prefs2]) \
-                     + coef_hf[2] * (prev2[mrefs4] + next2[mrefs4] + prev2[prefs4] + next2[prefs4])) >> 2) \
-                     + coef_lf[0] * (c + e) - coef_lf[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
--            } else { \
--                interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
--            }
-+            } /*else*/ { \
-+                i2 = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
-+            }interpol = FFABS(c - e) > temporal_diff0 ? i1:i2;\
- 
- #define FILTER_EDGE() \
-             if (spat) { \
-@@ -111,7 +112,7 @@ typedef struct ThreadData {
-             else if (interpol < d - diff) \
-                 interpol = d - diff; \
-  \
--            dst[0] = av_clip(interpol, 0, clip_max); \
-+            dst[0] = !diff ? d : av_clip(interpol, 0, clip_max); \
-         } \
-  \
-         dst++; \
-@@ -122,7 +123,7 @@ typedef struct ThreadData {
-         next2++; \
-     }
- 
--static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs,
-+static void __attribute__((optimize("tree-vectorize"))) filter_intra(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs,
-                          int prefs3, int mrefs3, int parity, int clip_max)
- {
-     uint8_t *dst = dst1;
-@@ -132,7 +133,7 @@ static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs,
-     FILTER_INTRA()
- }
- 
--static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
-+static void __attribute__((optimize("tree-vectorize"))) filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
-                           int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                           int prefs3, int mrefs3, int prefs4, int mrefs4,
-                           int parity, int clip_max)
-@@ -150,7 +151,7 @@ static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
-     FILTER2()
- }
- 
--static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1,
-+static void __attribute__((optimize("tree-vectorize"))) filter_edge(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
-                         int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                         int parity, int clip_max, int spat)
- {
-@@ -167,7 +168,7 @@ static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1,
-     FILTER2()
- }
- 
--static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mrefs,
-+static void __attribute__((optimize("tree-vectorize"))) filter_intra_16bit(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs,
-                                int prefs3, int mrefs3, int parity, int clip_max)
- {
-     uint16_t *dst = dst1;
-@@ -177,7 +178,7 @@ static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mre
-     FILTER_INTRA()
- }
- 
--static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1,
-+static void __attribute__((optimize("tree-vectorize"))) filter_line_c_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
-                                 int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                                 int prefs3, int mrefs3, int prefs4, int mrefs4,
-                                 int parity, int clip_max)
-@@ -195,7 +196,7 @@ static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1
-     FILTER2()
- }
- 
--static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1,
-+static void __attribute__((optimize("tree-vectorize"))) filter_edge_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
-                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                               int parity, int clip_max, int spat)
- {
-
-From b4f4f90c9bda485a07a894f2700aecad25ff1781 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 13 Jun 2023 13:07:55 +0000
-Subject: [PATCH 136/186] v4l2m2m_dec: Fix h264 reorder size if no sps
- initially
-
-(cherry picked from commit 8832f7924bf47cbca0de251d7b406917f958ebf4)
----
- libavcodec/v4l2_m2m_dec.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 13af62e819bc..11c83b2d6643 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -1024,8 +1024,8 @@ parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
-                 if (sps) {
-                     avctx->profile = ff_h264_get_profile(sps);
-                     avctx->level = sps->level_idc;
-+                    s->reorder_size = sps->num_reorder_frames;
-                 }
--                s->reorder_size = sps->num_reorder_frames;
-             }
-             ff_h264_ps_uninit(&ps);
-             break;
-
-From f9124edee3874fc5ac9633f59248a9122f22e9a1 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 30 Jun 2023 18:03:29 +0000
-Subject: [PATCH 137/186] sand_fns: Add missing uxtw for neon stride
-
----
- libavutil/aarch64/rpi_sand_neon.S | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
-index 2f07d9674c9f..19411cf3f19a 100644
---- a/libavutil/aarch64/rpi_sand_neon.S
-+++ b/libavutil/aarch64/rpi_sand_neon.S
-@@ -469,6 +469,7 @@ endfunc
- function ff_rpi_sand30_lines_to_planar_y16, export=1
-                 lsl             w4,  w4,  #7
-                 sub             w4,  w4,  #64
-+                uxtw            x4,  w4
-                 sub             w1,  w1,  w7, lsl #1
-                 uxtw            x6,  w6
-                 add             x8,  x2,  x6, lsl #7
-@@ -634,6 +635,7 @@ endfunc
- function ff_rpi_sand30_lines_to_planar_y8, export=1
-                 lsl             w4,  w4,  #7
-                 sub             w4,  w4,  #64
-+                uxtw            x4,  w4
-                 sub             w1,  w1,  w7
-                 uxtw            x6,  w6
-                 add             x8,  x2,  x6, lsl #7
-
-From feed60f18216ea49b2ec7d54c71d342cb7c16c6e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 30 Jun 2023 18:12:16 +0000
-Subject: [PATCH 138/186] sand_fns: Rework aarch64 neon
- sand30_lines_to_planar_c16
-
-Previous version could overflow its write buffer on small buffers
-which sometimes crashed WPP_F_ericsson_MAIN10_2.
-
-This version is probably faster too
----
- libavutil/aarch64/rpi_sand_neon.S | 329 ++++++++++++++----------------
- 1 file changed, 151 insertions(+), 178 deletions(-)
-
-diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
-index 19411cf3f19a..af7e2a88c44b 100644
---- a/libavutil/aarch64/rpi_sand_neon.S
-+++ b/libavutil/aarch64/rpi_sand_neon.S
-@@ -248,199 +248,172 @@ incomplete_block_loop_end_c8:
-     ret
- endfunc
- 
--//void ff_rpi_sand30_lines_to_planar_c16(
--//  uint8_t * dst_u,            // [x0]
--//  unsigned int dst_stride_u,  // [w1] == _w*2
--//  uint8_t * dst_v,            // [x2]
--//  unsigned int dst_stride_v,  // [w3] == _w*2
--//  const uint8_t * src,        // [x4]
--//  unsigned int stride1,       // [w5] == 128
--//  unsigned int stride2,       // [w6] 
--//  unsigned int _x,            // [w7] == 0
--//  unsigned int y,             // [sp, #0] == 0
--//  unsigned int _w,            // [sp, #8] -> w3
--//  unsigned int h);            // [sp, #16] -> w7
--
--.macro rpi_sand30_lines_to_planar_c16_block_half
--    ld1 { v0.4s,  v1.4s, v2.4s, v3.4s }, [x13], #64
--
--    xtn v4.4h, v0.4s
--    ushr v0.4s, v0.4s, #10
--    xtn v5.4h, v0.4s
--    ushr v0.4s, v0.4s, #10
--    xtn v6.4h, v0.4s
--    xtn2 v4.8h, v1.4s
--    ushr v1.4s, v1.4s, #10
--    xtn2 v5.8h, v1.4s
--    ushr v1.4s, v1.4s, #10
--    xtn2 v6.8h, v1.4s
--    and v4.16b, v4.16b, v16.16b
--    and v5.16b, v5.16b, v16.16b
--    and v6.16b, v6.16b, v16.16b
--    st3 { v4.8h, v5.8h, v6.8h }, [sp], #48
--    
--    xtn v4.4h, v2.4s
--    ushr v2.4s, v2.4s, #10
--    xtn v5.4h, v2.4s
--    ushr v2.4s, v2.4s, #10
--    xtn v6.4h, v2.4s
--    xtn2 v4.8h, v3.4s
--    ushr v3.4s, v3.4s, #10
--    xtn2 v5.8h, v3.4s
--    ushr v3.4s, v3.4s, #10
--    xtn2 v6.8h, v3.4s
--    and v4.16b, v4.16b, v16.16b
--    and v5.16b, v5.16b, v16.16b
--    and v6.16b, v6.16b, v16.16b
--    st3 { v4.8h, v5.8h, v6.8h }, [sp]
--    sub sp, sp, #48
--.endm
--
--function ff_rpi_sand30_lines_to_planar_c16, export=1
--    stp x19, x20, [sp, #-48]!
--    stp x21, x22, [sp, #16]
--    stp x23, x24, [sp, #32]
--
--    ldr w3, [sp, #48+8]    // w3 = width
--    ldr w7, [sp, #48+16]   // w7 = height
--
--    // reserve space on the stack for intermediate results
--    sub sp, sp, #256
-+// Unzip chroma
-+//
-+// On entry:
-+// a0 = V0, U2,  ...
-+// a1 = U0, V1,  ...
-+// a2 = U1, V2,  ...
-+// b0 = V8, U10, ...
-+// b1 = U8, V9,  ...
-+// b2 = U9, V10, ...
-+//
-+// On exit:
-+// d0 = U0, U3, ...
-+// ...
-+// a0 = V0, V3, ..
-+// ...
-+//
-+// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs)
- 
--    // number of 128byte blocks per row, w8 = width / 48
--    mov w9, #48
--    udiv w8, w3, w9
-+.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2
-+                uzp1            \d0\().8h, \a1\().8h, \b1\().8h
-+                uzp1            \d1\().8h, \a2\().8h, \b2\().8h
-+                uzp2            \d2\().8h, \a0\().8h, \b0\().8h
- 
--    // remaining pixels (rem_pix) per row, w9 = width - w8 * 48
--    mul w9, w8, w9
--    sub w9, w3, w9
-+                uzp1            \a0\().8h, \a0\().8h, \b0\().8h
-+                uzp2            \a1\().8h, \a1\().8h, \b1\().8h
-+                uzp2            \a2\().8h, \a2\().8h, \b2\().8h
-+.endm
- 
--    // row offset, the beginning of the next row to process
--    eor w10, w10, w10
-+// SAND30 -> 10bit
-+.macro USAND10 d0, d1, d2, a0, a1
-+                shrn            \d2\().4h, \a0\().4s, #14
-+                xtn             \d0\().4h, \a0\().4s
-+                shrn            \d1\().4h, \a0\().4s, #10
- 
--    // offset to the beginning of the next block, w11 = stride2 * 128 - 128
--    lsl w11, w6, #7
--    sub w11, w11, #128
-+                shrn2           \d2\().8h, \a1\().4s, #14
-+                xtn2            \d0\().8h, \a1\().4s
-+                shrn2           \d1\().8h, \a1\().4s, #10
- 
--    // decrease the height by one and in case of remaining pixels increase the block count by one
--    sub w7, w7, #1
--    cmp w9, #0
--    cset w19, ne    // w19 == 1 iff reamining pixels != 0
--    add w8, w8, w19
-+                ushr            \d2\().8h, \d2\().8h, #6
-+                bic             \d0\().8h, #0xfc,     lsl #8
-+                bic             \d1\().8h, #0xfc,     lsl #8
-+.endm
- 
--    // bytes we have to move dst back by at the end of every row
--    mov w21, #48*2
--    mul w21, w21, w8
--    sub w21, w1, w21
-+// void ff_rpi_sand30_lines_to_planar_c16(
-+//   uint8_t * dst_u,            // [x0]
-+//   unsigned int dst_stride_u,  // [w1]
-+//   uint8_t * dst_v,            // [x2]
-+//   unsigned int dst_stride_v,  // [w3]
-+//   const uint8_t * src,        // [x4]
-+//   unsigned int stride1,       // [w5]      128
-+//   unsigned int stride2,       // [w6]
-+//   unsigned int _x,            // [w7]      0
-+//   unsigned int y,             // [sp, #0]
-+//   unsigned int _w,            // [sp, #8]  w9
-+//   unsigned int h);            // [sp, #16] w10
- 
--    mov w20, #0     // w20 = flag, last row processed
-+function ff_rpi_sand30_lines_to_planar_c16, export=1
-+                ldr             w7,  [sp, #0]                   // y
-+                ldr             w8,  [sp, #8]                   // _w
-+                ldr             w10, [sp, #16]                  // h
-+                lsl             w6,  w6,  #7                    // Fixup stride2
-+                sub             w6,  w6,  #64
-+                uxtw            x6,  w6
-+                sub             w1,  w1,  w8,  LSL #1           // Fixup chroma strides
-+                sub             w3,  w3,  w8,  LSL #1
-+                lsl             w7,  w7,  #7                    // Add y to src
-+                add             x4,  x4,  w7,  UXTW
-+10:
-+                mov             w13, #0
-+                mov             x5,  x4
-+                mov             w9,  w8
-+1:
-+                ld1             {v0.4s-v3.4s}, [x5], #64
-+                ld1             {v4.4s-v7.4s}, [x5], x6
- 
--    mov x12, #0x03ff03ff03ff03ff
--    dup v16.2d, x12
-+                USAND10         v17, v16, v18, v0, v1
-+                USAND10         v20, v19, v21, v2, v3
-+                UZPH_C          v0, v1, v2, v16, v17, v18, v19, v20, v21
-+                USAND10         v23, v22, v24, v4, v5
-+                USAND10         v26, v25, v27, v6, v7
-+                UZPH_C          v4, v5, v6, v22, v23, v24, v25, v26, v27
- 
--    // iterate through rows, row counter = w12 = 0
--    eor w12, w12, w12
--row_loop_c16:
--    cmp w12, w7
--    bge row_loop_c16_fin
-+                subs            w9,  w9,  #48
-+                blt             2f
- 
--    // address of row data = src + row_offset
--    mov x13, x4
--    add x13, x13, x10
-+                st3             {v0.8h-v2.8h},   [x0], #48
-+                st3             {v4.8h-v6.8h},   [x0], #48
-+                st3             {v16.8h-v18.8h}, [x2], #48
-+                st3             {v22.8h-v24.8h}, [x2], #48
- 
--    eor w14, w14, w14
--block_loop_c16:
--    cmp w14, w8
--    bge block_loop_c16_fin
--
--    rpi_sand30_lines_to_planar_c16_block_half
--
--    ld2 { v0.8h, v1.8h }, [sp], #32
--    ld2 { v2.8h, v3.8h }, [sp], #32
--    ld2 { v4.8h, v5.8h }, [sp]
--    sub sp, sp, #64
--
--    st1 { v0.8h }, [x0], #16
--    st1 { v2.8h }, [x0], #16
--    st1 { v4.8h }, [x0], #16
--    st1 { v1.8h }, [x2], #16
--    st1 { v3.8h }, [x2], #16
--    st1 { v5.8h }, [x2], #16
--
--    rpi_sand30_lines_to_planar_c16_block_half
--
--    ld2 { v0.8h, v1.8h }, [sp], #32
--    ld2 { v2.8h, v3.8h }, [sp], #32
--    ld2 { v4.8h, v5.8h }, [sp]
--    sub sp, sp, #64
--
--    st1 { v0.8h }, [x0], #16
--    st1 { v2.8h }, [x0], #16
--    st1 { v4.8h }, [x0], #16
--    st1 { v1.8h }, [x2], #16
--    st1 { v3.8h }, [x2], #16
--    st1 { v5.8h }, [x2], #16
--
--    add x13, x13, x11 // offset to next block
--    add w14, w14, #1
--    b block_loop_c16
--block_loop_c16_fin:
-+                bne             1b
-+11:
-+                subs            w10, w10, #1
-+                add             x4,  x4,  #128
-+                add             x0,  x0,  w1,  UXTW
-+                add             x2,  x2,  w3,  UXTW
-+                bne             10b
-+99:
-+                ret
- 
--    add w10, w10, #128
--    add w12, w12, #1
--    add x0, x0, w21, sxtw  // move dst pointers back by x21
--    add x2, x2, w21, sxtw
--    b row_loop_c16
--row_loop_c16_fin:
--
--    cmp w20, #1
--    beq row_loop_c16_fin2
--    mov w20, #1
--    sub w8, w8, w19 // decrease block count by w19
--    add w7, w7, #1 // increase height
--    b row_loop_c16
--
--row_loop_c16_fin2:
--    sub x0, x0, w21, sxtw // readd x21 in case of the last row
--    sub x2, x2, w21, sxtw // so that we can write out the few remaining pixels
--
--    // last incomplete block to be finished
--    // read operations are fine, stride2 is more than large enough even if rem_pix is 0
--    rpi_sand30_lines_to_planar_c16_block_half
--    ld2 { v0.8h, v1.8h }, [sp], #32
--    ld2 { v2.8h, v3.8h }, [sp], #32
--    ld2 { v4.8h, v5.8h }, [sp], #32
--    rpi_sand30_lines_to_planar_c16_block_half
--    ld2 { v0.8h, v1.8h }, [sp], #32
--    ld2 { v2.8h, v3.8h }, [sp], #32
--    ld2 { v4.8h, v5.8h }, [sp]
--    sub sp, sp, #160
--
--    mov x4, sp
--    eor w20, w20, w20
--rem_pix_c16_loop:
--    cmp w20, w9
--    bge rem_pix_c16_fin
--
--    ldr w22, [x4], #4
--    str w22, [x0], #2
--    lsr w22, w22, #16
--    str w22, [x2], #2 
--
--    add w20, w20, #1
--    b rem_pix_c16_loop
--rem_pix_c16_fin:
--
--    add sp, sp, #256
--
--    ldp x23, x24, [sp, #32]
--    ldp x21, x22, [sp, #16]
--    ldp x19, x20, [sp], #48
--    ret
-+// Partial final write
-+2:
-+                cmp             w9,  #24-48
-+                blt             1f
-+                st3             {v0.8h  - v2.8h},  [x0], #48
-+                st3             {v16.8h - v18.8h}, [x2], #48
-+                beq             11b
-+                mov             v0.16b,  v4.16b
-+                mov             v1.16b,  v5.16b
-+                sub             w9,  w9,  #24
-+                mov             v2.16b,  v6.16b
-+                mov             v16.16b, v22.16b
-+                mov             v17.16b, v23.16b
-+                mov             v18.16b, v24.16b
-+1:
-+                cmp             w9,  #12-48
-+                blt             1f
-+                st3             {v0.4h  - v2.4h},  [x0], #24
-+                st3             {v16.4h - v18.4h}, [x2], #24
-+                beq             11b
-+                mov             v0.2d[0],  v0.2d[1]
-+                sub             w9,  w9,  #12
-+                mov             v1.2d[0],  v1.2d[1]
-+                mov             v2.2d[0],  v2.2d[1]
-+                mov             v16.2d[0], v16.2d[1]
-+                mov             v17.2d[0], v17.2d[1]
-+                mov             v18.2d[0], v18.2d[1]
-+1:
-+                cmp             w9,  #6-48
-+                blt             1f
-+                st3             {v0.h  - v2.h}[0],  [x0], #6
-+                st3             {v0.h  - v2.h}[1],  [x0], #6
-+                st3             {v16.h - v18.h}[0], [x2], #6
-+                st3             {v16.h - v18.h}[1], [x2], #6
-+                beq             11b
-+                mov             v0.s[0],  v0.s[1]
-+                sub             w9,  w9,  #6
-+                mov             v1.s[0],  v1.s[1]
-+                mov             v2.s[0],  v2.s[1]
-+                mov             v16.s[0], v16.s[1]
-+                mov             v17.s[0], v17.s[1]
-+                mov             v18.s[0], v18.s[1]
-+1:
-+                cmp             w9,  #3-48
-+                blt             1f
-+                st3             {v0.h  - v2.h}[0],  [x0], #6
-+                st3             {v16.h - v18.h}[0], [x2], #6
-+                beq             11b
-+                mov             v0.h[0],  v0.h[1]
-+                sub             w9,  w9,  #3
-+                mov             v1.h[0],  v1.h[1]
-+                mov             v16.h[0], v16.h[1]
-+                mov             v17.h[0], v17.h[1]
-+1:
-+                cmp             w9,  #2-48
-+                blt             1f
-+                st2             {v0.h  - v1.h}[0],  [x0], #4
-+                st2             {v16.h - v17.h}[0], [x2], #4
-+                b               11b
-+1:
-+                st1             {v0.h}[0],  [x0], #2
-+                st1             {v16.h}[0], [x2], #2
-+                b               11b
- endfunc
- 
- 
--
- //void ff_rpi_sand30_lines_to_planar_p010(
- //  uint8_t * dest,
- //  unsigned int dst_stride,
-
-From 7f8e8bb693607117f5f7bff2ee7ac7f841f3f726 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 30 Jun 2023 19:41:06 +0000
-Subject: [PATCH 139/186] sand_fns: Minor optimisations to aarch64 neon
-
----
- libavutil/aarch64/rpi_sand_neon.S | 140 ++++++------------------------
- 1 file changed, 28 insertions(+), 112 deletions(-)
-
-diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
-index af7e2a88c44b..11658de0c8c2 100644
---- a/libavutil/aarch64/rpi_sand_neon.S
-+++ b/libavutil/aarch64/rpi_sand_neon.S
-@@ -279,18 +279,37 @@ endfunc
- // SAND30 -> 10bit
- .macro USAND10 d0, d1, d2, a0, a1
-                 shrn            \d2\().4h, \a0\().4s, #14
--                xtn             \d0\().4h, \a0\().4s
-                 shrn            \d1\().4h, \a0\().4s, #10
- 
-                 shrn2           \d2\().8h, \a1\().4s, #14
--                xtn2            \d0\().8h, \a1\().4s
-                 shrn2           \d1\().8h, \a1\().4s, #10
-+                uzp1            \d0\().8h, \a0\().8h, \a1\().8h
- 
-                 ushr            \d2\().8h, \d2\().8h, #6
-                 bic             \d0\().8h, #0xfc,     lsl #8
-                 bic             \d1\().8h, #0xfc,     lsl #8
- .endm
- 
-+// SAND30 -> 8bit
-+.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2
-+                shrn            \d1\().4h,  \a0\().4s,  #12
-+                shrn2           \d1\().8h,  \a1\().4s,  #12
-+                uzp1            \d0\().8h,  \a0\().8h,  \a1\().8h
-+                uzp2            \d2\().8h,  \a0\().8h,  \a1\().8h
-+
-+                shrn            \t1\().4h,  \a2\().4s,  #12
-+                shrn2           \t1\().8h,  \a3\().4s,  #12
-+                uzp1            \t0\().8h,  \a2\().8h,  \a3\().8h
-+                uzp2            \t2\().8h,  \a2\().8h,  \a3\().8h
-+
-+                shrn            \d0\().8b,  \d0\().8h,  #2
-+                shrn2           \d0\().16b, \t0\().8h,  #2
-+                shrn            \d2\().8b,  \d2\().8h,  #6
-+                shrn2           \d2\().16b, \t2\().8h,  #6
-+                uzp1            \d1\().16b, \d1\().16b, \t1\().16b
-+.endm
-+
-+
- // void ff_rpi_sand30_lines_to_planar_c16(
- //   uint8_t * dst_u,            // [x0]
- //   unsigned int dst_stride_u,  // [w1]
-@@ -322,6 +341,7 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1
- 1:
-                 ld1             {v0.4s-v3.4s}, [x5], #64
-                 ld1             {v4.4s-v7.4s}, [x5], x6
-+                subs            w9,  w9,  #48
- 
-                 USAND10         v17, v16, v18, v0, v1
-                 USAND10         v20, v19, v21, v2, v3
-@@ -330,7 +350,6 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1
-                 USAND10         v26, v25, v27, v6, v7
-                 UZPH_C          v4, v5, v6, v22, v23, v24, v25, v26, v27
- 
--                subs            w9,  w9,  #48
-                 blt             2f
- 
-                 st3             {v0.8h-v2.8h},   [x0], #48
-@@ -457,61 +476,10 @@ function ff_rpi_sand30_lines_to_planar_y16, export=1
- 
-                 subs            w5,  w5,  #96
- 
--                // v0, v1
--
--                shrn            v18.4h,  v0.4s,   #14
--                xtn             v16.4h,  v0.4s
--                shrn            v17.4h,  v0.4s,   #10
--
--                shrn2           v18.8h,  v1.4s,   #14
--                xtn2            v16.8h,  v1.4s
--                shrn2           v17.8h,  v1.4s,   #10
--
--                ushr            v18.8h,  v18.8h,  #6
--                bic             v16.8h,  #0xfc,   lsl #8
--                bic             v17.8h,  #0xfc,   lsl #8
--
--                // v2, v3
--
--                shrn            v21.4h,  v2.4s,   #14
--                xtn             v19.4h,  v2.4s
--                shrn            v20.4h,  v2.4s,   #10
--
--                shrn2           v21.8h,  v3.4s,   #14
--                xtn2            v19.8h,  v3.4s
--                shrn2           v20.8h,  v3.4s,   #10
--
--                ushr            v21.8h,  v21.8h,  #6
--                bic             v19.8h,  #0xfc,   lsl #8
--                bic             v20.8h,  #0xfc,   lsl #8
--
--                // v4, v5
--
--                shrn            v24.4h,  v4.4s,   #14
--                xtn             v22.4h,  v4.4s
--                shrn            v23.4h,  v4.4s,   #10
--
--                shrn2           v24.8h,  v5.4s,   #14
--                xtn2            v22.8h,  v5.4s
--                shrn2           v23.8h,  v5.4s,   #10
--
--                ushr            v24.8h,  v24.8h,  #6
--                bic             v22.8h,  #0xfc,   lsl #8
--                bic             v23.8h,  #0xfc,   lsl #8
--
--                // v6, v7
--
--                shrn            v27.4h,  v6.4s,   #14
--                xtn             v25.4h,  v6.4s
--                shrn            v26.4h,  v6.4s,   #10
--
--                shrn2           v27.8h,  v7.4s,   #14
--                xtn2            v25.8h,  v7.4s
--                shrn2           v26.8h,  v7.4s,   #10
--
--                ushr            v27.8h,  v27.8h,  #6
--                bic             v25.8h,  #0xfc,   lsl #8
--                bic             v26.8h,  #0xfc,   lsl #8
-+                USAND10         v16, v17, v18, v0, v1
-+                USAND10         v19, v20, v21, v2, v3
-+                USAND10         v22, v23, v24, v4, v5
-+                USAND10         v25, v26, v27, v6, v7
- 
-                 blt             2f
- 
-@@ -624,60 +592,8 @@ function ff_rpi_sand30_lines_to_planar_y8, export=1
-                 subs            w5,  w5,  #96
- 
-                 // v0, v1
--
--                shrn            v18.4h,  v0.4s,   #16
--                xtn             v16.4h,  v0.4s
--                shrn            v17.4h,  v0.4s,   #12
--
--                shrn2           v18.8h,  v1.4s,   #16
--                xtn2            v16.8h,  v1.4s
--                shrn2           v17.8h,  v1.4s,   #12
--
--                shrn            v18.8b,  v18.8h,  #6
--                shrn            v16.8b,  v16.8h,  #2
--                xtn             v17.8b,  v17.8h
--
--                // v2, v3
--
--                shrn            v21.4h,  v2.4s,   #16
--                xtn             v19.4h,  v2.4s
--                shrn            v20.4h,  v2.4s,   #12
--
--                shrn2           v21.8h,  v3.4s,   #16
--                xtn2            v19.8h,  v3.4s
--                shrn2           v20.8h,  v3.4s,   #12
--
--                shrn2           v18.16b, v21.8h,  #6
--                shrn2           v16.16b, v19.8h,  #2
--                xtn2            v17.16b, v20.8h
--
--                // v4, v5
--
--                shrn            v24.4h,  v4.4s,   #16
--                xtn             v22.4h,  v4.4s
--                shrn            v23.4h,  v4.4s,   #12
--
--                shrn2           v24.8h,  v5.4s,   #16
--                xtn2            v22.8h,  v5.4s
--                shrn2           v23.8h,  v5.4s,   #12
--
--                shrn            v21.8b,  v24.8h,  #6
--                shrn            v19.8b,  v22.8h,  #2
--                xtn             v20.8b,  v23.8h
--
--                // v6, v7
--
--                shrn            v27.4h,  v6.4s,   #16
--                xtn             v25.4h,  v6.4s
--                shrn            v26.4h,  v6.4s,   #12
--
--                shrn2           v27.8h,  v7.4s,   #16
--                xtn2            v25.8h,  v7.4s
--                shrn2           v26.8h,  v7.4s,   #12
--
--                shrn2           v21.16b, v27.8h,  #6
--                shrn2           v19.16b, v25.8h,  #2
--                xtn2            v20.16b, v26.8h
-+                USAND8          v16, v17, v18, v0, v1, v2, v3, v22, v23, v24
-+                USAND8          v19, v20, v21, v4, v5, v6, v7, v22, v23, v24
- 
-                 blt             2f
- 
-
-From 700b43043a725509ef9cb6e1d51b28d1b96a6914 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Sat, 1 Jul 2023 18:43:32 +0000
-Subject: [PATCH 140/186] sand_fns: Add test for neon to sand30 fns so they can
- be tested by checkasm
-
----
- libavutil/rpi_sand_fns.c | 10 ++++++----
- 1 file changed, 6 insertions(+), 4 deletions(-)
-
-diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c
-index b6071e2928f7..0626bb06cb1b 100644
---- a/libavutil/rpi_sand_fns.c
-+++ b/libavutil/rpi_sand_fns.c
-@@ -35,10 +35,12 @@ Authors: John Cox
- #include "frame.h"
- 
- #if ARCH_ARM && HAVE_NEON
--#include "arm/rpi_sand_neon.h"
-+#include "libavutil/arm/cpu.h"
-+#include "libavutil/arm/rpi_sand_neon.h"
- #define HAVE_SAND_ASM 1
- #elif ARCH_AARCH64 && HAVE_NEON
--#include "aarch64/rpi_sand_neon.h"
-+#include "libavutil/aarch64/cpu.h"
-+#include "libavutil/aarch64/rpi_sand_neon.h"
- #define HAVE_SAND_ASM 1
- #else
- #define HAVE_SAND_ASM 0
-@@ -97,7 +99,7 @@ void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
-     const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
- 
- #if HAVE_SAND_ASM
--    if (_x == 0) {
-+    if (_x == 0 && have_neon(av_get_cpu_flags())) {
-         ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
-         return;
-     }
-@@ -163,7 +165,7 @@ void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_
-     const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
- 
- #if HAVE_SAND_ASM
--    if (_x == 0) {
-+    if (_x == 0 && have_neon(av_get_cpu_flags())) {
-         ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v,
-                                        src, stride1, stride2, _x, y, _w, h);
-         return;
-
-From a0a5898d3d19aaa5324e5c64e526c6bb7f39f62b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Sat, 1 Jul 2023 18:43:57 +0000
-Subject: [PATCH 141/186] checkasm: Add tests for rpi_sand sand30 fns
-
-Something of a kludge for function selection as, at the moment, the
-rpi_sand fns don't have a jump table that we could use for selection.
----
- tests/checkasm/Makefile   |   3 +-
- tests/checkasm/checkasm.c |   3 +
- tests/checkasm/checkasm.h |   1 +
- tests/checkasm/rpi_sand.c | 118 ++++++++++++++++++++++++++++++++++++++
- tests/fate/checkasm.mak   |   1 +
- 5 files changed, 125 insertions(+), 1 deletion(-)
- create mode 100644 tests/checkasm/rpi_sand.c
-
 diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
-index a6f06c7007c2..66291baf3375 100644
+index ae324ced3f0c..3d1004f934f9 100644
 --- a/tests/checkasm/Makefile
 +++ b/tests/checkasm/Makefile
-@@ -59,8 +59,9 @@ CHECKASMOBJS-$(CONFIG_SWSCALE)  += $(SWSCALEOBJS)
- AVUTILOBJS                              += av_tx.o
+@@ -73,8 +73,9 @@ AVUTILOBJS                              += av_tx.o
  AVUTILOBJS                              += fixed_dsp.o
  AVUTILOBJS                              += float_dsp.o
+ AVUTILOBJS                              += lls.o
 +AVUTILOBJS-$(CONFIG_SAND)               += rpi_sand.o
  
 -CHECKASMOBJS-$(CONFIG_AVUTIL)  += $(AVUTILOBJS)
@@ -35635,12 +23439,12 @@ index a6f06c7007c2..66291baf3375 100644
  CHECKASMOBJS-$(ARCH_AARCH64)            += aarch64/checkasm.o
  CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL)   += arm/checkasm.o
 diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
-index e96d84a7daef..57e0091b806a 100644
+index 73a998ae3a94..ed703c1956bd 100644
 --- a/tests/checkasm/checkasm.c
 +++ b/tests/checkasm/checkasm.c
-@@ -210,6 +210,9 @@ static const struct {
-         { "fixed_dsp", checkasm_check_fixed_dsp },
+@@ -290,6 +290,9 @@ static const struct {
          { "float_dsp", checkasm_check_float_dsp },
+         { "lls",       checkasm_check_lls },
          { "av_tx",     checkasm_check_av_tx },
 +    #if CONFIG_SAND
 +        { "rpi_sand",  checkasm_check_rpi_sand },
@@ -35649,23 +23453,23 @@ index e96d84a7daef..57e0091b806a 100644
      { NULL }
  };
 diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
-index 8744a81218a8..f4a0d20358c0 100644
+index 866eef01e98d..17584bf9c4e1 100644
 --- a/tests/checkasm/checkasm.h
 +++ b/tests/checkasm/checkasm.h
-@@ -73,6 +73,7 @@ void checkasm_check_motion(void);
+@@ -114,6 +114,7 @@ void checkasm_check_mpegvideoencdsp(void);
  void checkasm_check_nlmeans(void);
  void checkasm_check_opusdsp(void);
  void checkasm_check_pixblockdsp(void);
 +void checkasm_check_rpi_sand(void);
  void checkasm_check_sbrdsp(void);
- void checkasm_check_synth_filter(void);
- void checkasm_check_sw_gbrp(void);
+ void checkasm_check_rv34dsp(void);
+ void checkasm_check_rv40dsp(void);
 diff --git a/tests/checkasm/rpi_sand.c b/tests/checkasm/rpi_sand.c
 new file mode 100644
-index 000000000000..0888714c4c5c
+index 000000000000..dd9c1350bc2a
 --- /dev/null
 +++ b/tests/checkasm/rpi_sand.c
-@@ -0,0 +1,118 @@
+@@ -0,0 +1,122 @@
 +/*
 + * Copyright (c) 2023 John Cox
 + *
@@ -35697,6 +23501,10 @@ index 000000000000..0888714c4c5c
 +#elif ARCH_AARCH64
 +#include "libavutil/aarch64/cpu.h"
 +#include "libavutil/aarch64/rpi_sand_neon.h"
++#else
++#define have_neon(flags) 0
++#define ff_rpi_sand30_lines_to_planar_y16 NULL
++#define ff_rpi_sand30_lines_to_planar_c16 NULL
 +#endif
 +
 +static inline uint32_t pack30(unsigned int a, unsigned int b, unsigned int c)
@@ -35785,4721 +23593,118 @@ index 000000000000..0888714c4c5c
 +}
 +
 diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
-index a4e95541f56c..6fda6d227e13 100644
+index d1396cb64161..7121d5461ef9 100644
 --- a/tests/fate/checkasm.mak
 +++ b/tests/fate/checkasm.mak
-@@ -27,6 +27,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
-                 fate-checkasm-motion                                    \
+@@ -36,6 +36,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp                                 \
+                 fate-checkasm-mpegvideoencdsp                           \
                  fate-checkasm-opusdsp                                   \
                  fate-checkasm-pixblockdsp                               \
 +                fate-checkasm-rpi_sand                                  \
                  fate-checkasm-sbrdsp                                    \
-                 fate-checkasm-synth_filter                              \
-                 fate-checkasm-sw_gbrp                                   \
-
-From 3d9471725138933bc137fff7ed85d2fcc277f2af Mon Sep 17 00:00:00 2001
-From: James Darnley <jdarnley@obe.tv>
-Date: Mon, 20 Feb 2023 20:55:08 +0100
-Subject: [PATCH 142/186] avfilter/bwdif: move filter_line init to a dedicated
- function
-
-(cherry picked from commit b503b5a0cf80f38ecf4737c012b621b7e94f242a)
----
- libavfilter/bwdif.h             |  3 ++-
- libavfilter/vf_bwdif.c          | 13 +++++++++----
- libavfilter/x86/vf_bwdif_init.c |  4 +---
- 3 files changed, 12 insertions(+), 8 deletions(-)
-
-diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
-index 889ff772edd8..5749345f784e 100644
---- a/libavfilter/bwdif.h
-+++ b/libavfilter/bwdif.h
-@@ -37,6 +37,7 @@ typedef struct BWDIFContext {
-                         int parity, int clip_max, int spat);
- } BWDIFContext;
- 
--void ff_bwdif_init_x86(BWDIFContext *bwdif);
-+void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
-+void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
- 
- #endif /* AVFILTER_BWDIF_H */
-diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
-index 09e68523bbfa..539fabbd4686 100644
---- a/libavfilter/vf_bwdif.c
-+++ b/libavfilter/vf_bwdif.c
-@@ -341,7 +341,14 @@ static int config_props(AVFilterLink *link)
- 
-     yadif->csp = av_pix_fmt_desc_get(link->format);
-     yadif->filter = filter;
--    if (yadif->csp->comp[0].depth > 8) {
-+    ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth);
-+
-+    return 0;
-+}
-+
-+av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
-+{
-+    if (bit_depth > 8) {
-         s->filter_intra = filter_intra_16bit;
-         s->filter_line  = filter_line_c_16bit;
-         s->filter_edge  = filter_edge_16bit;
-@@ -352,10 +359,8 @@ static int config_props(AVFilterLink *link)
-     }
- 
- #if ARCH_X86
--    ff_bwdif_init_x86(s);
-+    ff_bwdif_init_x86(s, bit_depth);
- #endif
--
--    return 0;
- }
- 
- 
-diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c
-index e24e5cd9b1c5..ba7bc40c3d30 100644
---- a/libavfilter/x86/vf_bwdif_init.c
-+++ b/libavfilter/x86/vf_bwdif_init.c
-@@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne
-                                       int mrefs2, int prefs3, int mrefs3, int prefs4,
-                                       int mrefs4, int parity, int clip_max);
- 
--av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif)
-+av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth)
- {
--    YADIFContext *yadif = &bwdif->yadif;
-     int cpu_flags = av_get_cpu_flags();
--    int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth;
- 
-     if (bit_depth <= 8) {
-         if (EXTERNAL_SSE2(cpu_flags))
-
-From be14915902bc8d4a9aff0776dc976a17f0a27215 Mon Sep 17 00:00:00 2001
-From: James Darnley <jdarnley@obe.tv>
-Date: Mon, 20 Feb 2023 20:55:08 +0100
-Subject: [PATCH 143/186] checkasm: add test for bwdif
-
-(cherry picked from commit 087faf8cac51e5e20a5f41b36b8d4c2705a10039)
----
- tests/checkasm/Makefile   |  1 +
- tests/checkasm/checkasm.c |  3 ++
- tests/checkasm/checkasm.h |  1 +
- tests/checkasm/vf_bwdif.c | 84 +++++++++++++++++++++++++++++++++++++++
- tests/fate/checkasm.mak   |  1 +
- 5 files changed, 90 insertions(+)
- create mode 100644 tests/checkasm/vf_bwdif.c
-
-diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
-index 66291baf3375..2c80d8e66116 100644
---- a/tests/checkasm/Makefile
-+++ b/tests/checkasm/Makefile
-@@ -40,6 +40,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC)          += $(AVCODECOBJS-yes)
- # libavfilter tests
- AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o
- AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o
-+AVFILTEROBJS-$(CONFIG_BWDIF_FILTER)      += vf_bwdif.o
- AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o
- AVFILTEROBJS-$(CONFIG_EQ_FILTER)         += vf_eq.o
- AVFILTEROBJS-$(CONFIG_GBLUR_FILTER)      += vf_gblur.o
-diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
-index 57e0091b806a..4f983d7fbc9a 100644
---- a/tests/checkasm/checkasm.c
-+++ b/tests/checkasm/checkasm.c
-@@ -179,6 +179,9 @@ static const struct {
-     #if CONFIG_BLEND_FILTER
-         { "vf_blend", checkasm_check_blend },
-     #endif
-+    #if CONFIG_BWDIF_FILTER
-+        { "vf_bwdif", checkasm_check_vf_bwdif },
-+    #endif
-     #if CONFIG_COLORSPACE_FILTER
-         { "vf_colorspace", checkasm_check_colorspace },
-     #endif
-diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
-index f4a0d20358c0..d69bc43999f5 100644
---- a/tests/checkasm/checkasm.h
-+++ b/tests/checkasm/checkasm.h
-@@ -83,6 +83,7 @@ void checkasm_check_utvideodsp(void);
- void checkasm_check_v210dec(void);
- void checkasm_check_v210enc(void);
- void checkasm_check_vc1dsp(void);
-+void checkasm_check_vf_bwdif(void);
- void checkasm_check_vf_eq(void);
- void checkasm_check_vf_gblur(void);
- void checkasm_check_vf_hflip(void);
-diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
-new file mode 100644
-index 000000000000..46224bb57572
---- /dev/null
-+++ b/tests/checkasm/vf_bwdif.c
-@@ -0,0 +1,84 @@
-+/*
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License along
-+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
-+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-+ */
-+
-+#include <string.h>
-+#include "checkasm.h"
-+#include "libavcodec/internal.h"
-+#include "libavfilter/bwdif.h"
-+
-+#define WIDTH 256
-+
-+#define randomize_buffers(buf0, buf1, mask, count) \
-+    for (size_t i = 0; i < count; i++) \
-+        buf0[i] = buf1[i] = rnd() & mask
-+
-+#define BODY(type, depth)                                                      \
-+    do {                                                                       \
-+        type prev0[9*WIDTH], prev1[9*WIDTH];                                   \
-+        type next0[9*WIDTH], next1[9*WIDTH];                                   \
-+        type cur0[9*WIDTH], cur1[9*WIDTH];                                     \
-+        type dst0[WIDTH], dst1[WIDTH];                                         \
-+        const int stride = WIDTH;                                              \
-+        const int mask = (1<<depth)-1;                                         \
-+                                                                               \
-+        declare_func(void, void *dst, void *prev, void *cur, void *next,       \
-+                        int w, int prefs, int mrefs, int prefs2, int mrefs2,   \
-+                        int prefs3, int mrefs3, int prefs4, int mrefs4,        \
-+                        int parity, int clip_max);                             \
-+                                                                               \
-+        randomize_buffers(prev0, prev1, mask, 9*WIDTH);                        \
-+        randomize_buffers(next0, next1, mask, 9*WIDTH);                        \
-+        randomize_buffers( cur0,  cur1, mask, 9*WIDTH);                        \
-+                                                                               \
-+        call_ref(dst0, prev0 + 4*WIDTH, cur0 + 4*WIDTH, next0 + 4*WIDTH,       \
-+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
-+                3*stride, -3*stride, 4*stride, -4*stride,                      \
-+                0, mask);                                                      \
-+        call_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,       \
-+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
-+                3*stride, -3*stride, 4*stride, -4*stride,                      \
-+                0, mask);                                                      \
-+                                                                               \
-+        if (memcmp(dst0, dst1, sizeof dst0)                                    \
-+                || memcmp(prev0, prev1, sizeof prev0)                          \
-+                || memcmp(next0, next1, sizeof next0)                          \
-+                || memcmp( cur0,  cur1, sizeof cur0))                          \
-+            fail();                                                            \
-+        bench_new(dst1, prev1 + 4*WIDTH, cur1 + 4*WIDTH, next1 + 4*WIDTH,      \
-+                WIDTH, stride, -stride, 2*stride, -2*stride,                   \
-+                3*stride, -3*stride, 4*stride, -4*stride,                      \
-+                0, mask);                                                      \
-+    } while (0)
-+
-+void checkasm_check_vf_bwdif(void)
-+{
-+    BWDIFContext ctx_8, ctx_10;
-+
-+    ff_bwdif_init_filter_line(&ctx_8, 8);
-+    ff_bwdif_init_filter_line(&ctx_10, 10);
-+
-+    if (check_func(ctx_8.filter_line, "bwdif8")) {
-+        BODY(uint8_t, 8);
-+        report("bwdif8");
-+    }
-+
-+    if (check_func(ctx_10.filter_line, "bwdif10")) {
-+        BODY(uint16_t, 10);
-+        report("bwdif10");
-+    }
-+}
-diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
-index 6fda6d227e13..1620ab0be0fc 100644
---- a/tests/fate/checkasm.mak
-+++ b/tests/fate/checkasm.mak
-@@ -38,6 +38,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp                                  \
-                 fate-checkasm-v210enc                                   \
-                 fate-checkasm-vc1dsp                                    \
-                 fate-checkasm-vf_blend                                  \
-+                fate-checkasm-vf_bwdif                                  \
-                 fate-checkasm-vf_colorspace                             \
-                 fate-checkasm-vf_eq                                     \
-                 fate-checkasm-vf_gblur                                  \
-
-From 278fc0455e3c68e136022cf836777790d7c88c16 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 6 Jul 2023 13:56:18 +0000
-Subject: [PATCH 144/186] Revert "vf_bwdif: Add attributes to ask for
- vectorization"
-
-This reverts commit 281250290ba5c2dcd8676e9a261050e65c10bcb7.
-Will be replaced by hand coded asm as on upstream
----
- libavfilter/vf_bwdif.c | 29 ++++++++++++++---------------
- 1 file changed, 14 insertions(+), 15 deletions(-)
-
-diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
-index 539fabbd4686..34e8c5e234ee 100644
---- a/libavfilter/vf_bwdif.c
-+++ b/libavfilter/vf_bwdif.c
-@@ -74,10 +74,10 @@ typedef struct ThreadData {
-         int temporal_diff1 =(FFABS(prev[mrefs] - c) + FFABS(prev[prefs] - e)) >> 1; \
-         int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e)) >> 1; \
-         int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \
-- {/*\
-+ \
-         if (!diff) { \
-             dst[0] = d; \
--        } else {*/
-+        } else {
- 
- #define SPAT_CHECK() \
-             int b = ((prev2[mrefs2] + next2[mrefs2]) >> 1) - c; \
-@@ -89,16 +89,15 @@ typedef struct ThreadData {
-             diff = FFMAX3(diff, min, -max);
- 
- #define FILTER_LINE() \
--            int i1, i2; \
-             SPAT_CHECK() \
--            /*if (FFABS(c - e) > temporal_diff0)*/ { \
--                i1 = (((coef_hf[0] * (prev2[0] + next2[0]) \
-+            if (FFABS(c - e) > temporal_diff0) { \
-+                interpol = (((coef_hf[0] * (prev2[0] + next2[0]) \
-                     - coef_hf[1] * (prev2[mrefs2] + next2[mrefs2] + prev2[prefs2] + next2[prefs2]) \
-                     + coef_hf[2] * (prev2[mrefs4] + next2[mrefs4] + prev2[prefs4] + next2[prefs4])) >> 2) \
-                     + coef_lf[0] * (c + e) - coef_lf[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
--            } /*else*/ { \
--                i2 = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
--            }interpol = FFABS(c - e) > temporal_diff0 ? i1:i2;\
-+            } else { \
-+                interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \
-+            }
- 
- #define FILTER_EDGE() \
-             if (spat) { \
-@@ -112,7 +111,7 @@ typedef struct ThreadData {
-             else if (interpol < d - diff) \
-                 interpol = d - diff; \
-  \
--            dst[0] = !diff ? d : av_clip(interpol, 0, clip_max); \
-+            dst[0] = av_clip(interpol, 0, clip_max); \
-         } \
-  \
-         dst++; \
-@@ -123,7 +122,7 @@ typedef struct ThreadData {
-         next2++; \
-     }
- 
--static void __attribute__((optimize("tree-vectorize"))) filter_intra(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs,
-+static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs,
-                          int prefs3, int mrefs3, int parity, int clip_max)
- {
-     uint8_t *dst = dst1;
-@@ -133,7 +132,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_intra(void *restr
-     FILTER_INTRA()
- }
- 
--static void __attribute__((optimize("tree-vectorize"))) filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
-+static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
-                           int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                           int prefs3, int mrefs3, int prefs4, int mrefs4,
-                           int parity, int clip_max)
-@@ -151,7 +150,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_line_c(void *rest
-     FILTER2()
- }
- 
--static void __attribute__((optimize("tree-vectorize"))) filter_edge(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
-+static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1,
-                         int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                         int parity, int clip_max, int spat)
- {
-@@ -168,7 +167,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_edge(void *restri
-     FILTER2()
- }
- 
--static void __attribute__((optimize("tree-vectorize"))) filter_intra_16bit(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs,
-+static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mrefs,
-                                int prefs3, int mrefs3, int parity, int clip_max)
- {
-     uint16_t *dst = dst1;
-@@ -178,7 +177,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_intra_16bit(void
-     FILTER_INTRA()
- }
- 
--static void __attribute__((optimize("tree-vectorize"))) filter_line_c_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
-+static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1,
-                                 int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                                 int prefs3, int mrefs3, int prefs4, int mrefs4,
-                                 int parity, int clip_max)
-@@ -196,7 +195,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_line_c_16bit(void
-     FILTER2()
- }
- 
--static void __attribute__((optimize("tree-vectorize"))) filter_edge_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1,
-+static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1,
-                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                               int parity, int clip_max, int spat)
- {
-
-From 6c3566cf92cba9e2ccd87b53ac7f00f0ea431fb2 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 4 Jul 2023 14:04:39 +0000
-Subject: [PATCH 145/186] tests/checkasm: Add test for vf_bwdif filter_intra
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Signed-off-by: John Cox <jc@kynesim.co.uk>
-Signed-off-by: Martin Storsjö <martin@martin.st>
-(cherry picked from commit 7caa8d6b91e738ad2c1ea61746b6c062c470f7d3)
----
- tests/checkasm/vf_bwdif.c | 37 +++++++++++++++++++++++++++++++++++++
- 1 file changed, 37 insertions(+)
-
-diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
-index 46224bb57572..034bbabb4c5c 100644
---- a/tests/checkasm/vf_bwdif.c
-+++ b/tests/checkasm/vf_bwdif.c
-@@ -20,6 +20,7 @@
- #include "checkasm.h"
- #include "libavcodec/internal.h"
- #include "libavfilter/bwdif.h"
-+#include "libavutil/mem_internal.h"
- 
- #define WIDTH 256
- 
-@@ -81,4 +82,40 @@ void checkasm_check_vf_bwdif(void)
-         BODY(uint16_t, 10);
-         report("bwdif10");
-     }
-+
-+    if (check_func(ctx_8.filter_intra, "bwdif8.intra")) {
-+        LOCAL_ALIGNED_16(uint8_t, cur0,  [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, cur1,  [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, dst0,  [WIDTH*3]);
-+        LOCAL_ALIGNED_16(uint8_t, dst1,  [WIDTH*3]);
-+        const int stride = WIDTH;
-+        const int mask = (1<<8)-1;
-+
-+        declare_func(void, void *dst1, void *cur1, int w, int prefs, int mrefs,
-+                     int prefs3, int mrefs3, int parity, int clip_max);
-+
-+        randomize_buffers( cur0,  cur1, mask, 11*WIDTH);
-+        memset(dst0, 0xba, WIDTH * 3);
-+        memset(dst1, 0xba, WIDTH * 3);
-+
-+        call_ref(dst0 + stride,
-+                 cur0 + stride * 4, WIDTH,
-+                 stride, -stride, stride * 3, -stride * 3,
-+                 0, mask);
-+        call_new(dst1 + stride,
-+                 cur0 + stride * 4, WIDTH,
-+                 stride, -stride, stride * 3, -stride * 3,
-+                 0, mask);
-+
-+        if (memcmp(dst0, dst1, WIDTH*3)
-+                || memcmp( cur0,  cur1, WIDTH*11))
-+            fail();
-+
-+        bench_new(dst1 + stride,
-+                  cur0 + stride * 4, WIDTH,
-+                  stride, -stride, stride * 3, -stride * 3,
-+                  0, mask);
-+
-+        report("bwdif8.intra");
-+    }
- }
-
-From 652e80de533059d24e136242324d4129a545a158 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 4 Jul 2023 14:04:40 +0000
-Subject: [PATCH 146/186] avfilter/vf_bwdif: Add neon for filter_intra
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Adds an outline for aarch neon functions
-Adds common macros and consts for aarch64 neon
-Exports C filter_intra needed for tail fixup of neon code
-Adds neon for filter_intra
-
-Signed-off-by: John Cox <jc@kynesim.co.uk>
-Signed-off-by: Martin Storsjö <martin@martin.st>
-(cherry picked from commit 5075cfb4e6a21f6b4da9e62bdb0bad4cb32a4673)
----
- libavfilter/aarch64/Makefile                |   2 +
- libavfilter/aarch64/vf_bwdif_init_aarch64.c |  56 ++++++++
- libavfilter/aarch64/vf_bwdif_neon.S         | 136 ++++++++++++++++++++
- libavfilter/bwdif.h                         |   4 +
- libavfilter/vf_bwdif.c                      |   8 +-
- 5 files changed, 203 insertions(+), 3 deletions(-)
- create mode 100644 libavfilter/aarch64/vf_bwdif_init_aarch64.c
- create mode 100644 libavfilter/aarch64/vf_bwdif_neon.S
-
-diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile
-index b58daa3a3fea..b68209bc94a6 100644
---- a/libavfilter/aarch64/Makefile
-+++ b/libavfilter/aarch64/Makefile
-@@ -1,3 +1,5 @@
-+OBJS-$(CONFIG_BWDIF_FILTER)                  += aarch64/vf_bwdif_init_aarch64.o
- OBJS-$(CONFIG_NLMEANS_FILTER)                += aarch64/vf_nlmeans_init.o
- 
-+NEON-OBJS-$(CONFIG_BWDIF_FILTER)             += aarch64/vf_bwdif_neon.o
- NEON-OBJS-$(CONFIG_NLMEANS_FILTER)           += aarch64/vf_nlmeans_neon.o
-diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-new file mode 100644
-index 000000000000..3ffaa07ab369
---- /dev/null
-+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-@@ -0,0 +1,56 @@
-+/*
-+ * bwdif aarch64 NEON optimisations
-+ *
-+ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+#include "libavutil/common.h"
-+#include "libavfilter/bwdif.h"
-+#include "libavutil/aarch64/cpu.h"
-+
-+void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs,
-+                                int prefs3, int mrefs3, int parity, int clip_max);
-+
-+
-+static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs,
-+                                int prefs3, int mrefs3, int parity, int clip_max)
-+{
-+    const int w0 = clip_max != 255 ? 0 : w & ~15;
-+
-+    ff_bwdif_filter_intra_neon(dst1, cur1, w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max);
-+
-+    if (w0 < w)
-+        ff_bwdif_filter_intra_c((char *)dst1 + w0, (char *)cur1 + w0,
-+                                w - w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max);
-+}
-+
-+void
-+ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
-+{
-+    const int cpu_flags = av_get_cpu_flags();
-+
-+    if (bit_depth != 8)
-+        return;
-+
-+    if (!have_neon(cpu_flags))
-+        return;
-+
-+    s->filter_intra = filter_intra_helper;
-+}
-+
-diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S
-new file mode 100644
-index 000000000000..e288efbe6c33
---- /dev/null
-+++ b/libavfilter/aarch64/vf_bwdif_neon.S
-@@ -0,0 +1,136 @@
-+/*
-+ * bwdif aarch64 NEON optimisations
-+ *
-+ * Copyright (c) 2023 John Cox <jc@kynesim.co.uk>
-+ *
-+ * This file is part of FFmpeg.
-+ *
-+ * FFmpeg is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU Lesser General Public
-+ * License as published by the Free Software Foundation; either
-+ * version 2.1 of the License, or (at your option) any later version.
-+ *
-+ * FFmpeg is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * Lesser General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU Lesser General Public
-+ * License along with FFmpeg; if not, write to the Free Software
-+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-+ */
-+
-+
-+#include "libavutil/aarch64/asm.S"
-+
-+// Space taken on the stack by an int (32-bit)
-+#ifdef __APPLE__
-+.set    SP_INT, 4
-+#else
-+.set    SP_INT, 8
-+#endif
-+
-+.macro SQSHRUNN b, s0, s1, s2, s3, n
-+        sqshrun         \s0\().4h, \s0\().4s, #\n - 8
-+        sqshrun2        \s0\().8h, \s1\().4s, #\n - 8
-+        sqshrun         \s1\().4h, \s2\().4s, #\n - 8
-+        sqshrun2        \s1\().8h, \s3\().4s, #\n - 8
-+        uzp2            \b\().16b, \s0\().16b, \s1\().16b
-+.endm
-+
-+.macro SMULL4K a0, a1, a2, a3, s0, s1, k
-+        smull           \a0\().4s, \s0\().4h, \k
-+        smull2          \a1\().4s, \s0\().8h, \k
-+        smull           \a2\().4s, \s1\().4h, \k
-+        smull2          \a3\().4s, \s1\().8h, \k
-+.endm
-+
-+.macro UMULL4K a0, a1, a2, a3, s0, s1, k
-+        umull           \a0\().4s, \s0\().4h, \k
-+        umull2          \a1\().4s, \s0\().8h, \k
-+        umull           \a2\().4s, \s1\().4h, \k
-+        umull2          \a3\().4s, \s1\().8h, \k
-+.endm
-+
-+.macro UMLAL4K a0, a1, a2, a3, s0, s1, k
-+        umlal           \a0\().4s, \s0\().4h, \k
-+        umlal2          \a1\().4s, \s0\().8h, \k
-+        umlal           \a2\().4s, \s1\().4h, \k
-+        umlal2          \a3\().4s, \s1\().8h, \k
-+.endm
-+
-+.macro UMLSL4K a0, a1, a2, a3, s0, s1, k
-+        umlsl           \a0\().4s, \s0\().4h, \k
-+        umlsl2          \a1\().4s, \s0\().8h, \k
-+        umlsl           \a2\().4s, \s1\().4h, \k
-+        umlsl2          \a3\().4s, \s1\().8h, \k
-+.endm
-+
-+.macro LDR_COEFFS d, t0
-+        movrel          \t0, coeffs, 0
-+        ld1             {\d\().8h}, [\t0]
-+.endm
-+
-+// static const uint16_t coef_lf[2] = { 4309, 213 };
-+// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 };
-+// static const uint16_t coef_sp[2] = { 5077, 981 };
-+
-+const coeffs, align=4   // align 4 means align on 2^4 boundry
-+        .hword          4309 * 4, 213 * 4               // lf[0]*4 = v0.h[0]
-+        .hword          5570, 3801, 1016, -3801         // hf[0] = v0.h[2], -hf[1] = v0.h[5]
-+        .hword          5077, 981                       // sp[0] = v0.h[6]
-+endconst
-+
-+// ============================================================================
-+//
-+// void ff_bwdif_filter_intra_neon(
-+//      void *dst1,     // x0
-+//      void *cur1,     // x1
-+//      int w,          // w2
-+//      int prefs,      // w3
-+//      int mrefs,      // w4
-+//      int prefs3,     // w5
-+//      int mrefs3,     // w6
-+//      int parity,     // w7       unused
-+//      int clip_max)   // [sp, #0] unused
-+
-+function ff_bwdif_filter_intra_neon, export=1
-+        cmp             w2, #0
-+        ble             99f
-+
-+        LDR_COEFFS      v0, x17
-+
-+//    for (x = 0; x < w; x++) {
-+10:
-+
-+//        interpol = (coef_sp[0] * (cur[mrefs] + cur[prefs]) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13;
-+        ldr             q31, [x1, w4, sxtw]
-+        ldr             q30, [x1, w3, sxtw]
-+        ldr             q29, [x1, w6, sxtw]
-+        ldr             q28, [x1, w5, sxtw]
-+
-+        uaddl           v20.8h,  v31.8b,  v30.8b
-+        uaddl2          v21.8h,  v31.16b, v30.16b
-+
-+        UMULL4K         v2, v3, v4, v5, v20, v21, v0.h[6]
-+
-+        uaddl           v20.8h,  v29.8b,  v28.8b
-+        uaddl2          v21.8h,  v29.16b, v28.16b
-+
-+        UMLSL4K         v2, v3, v4, v5, v20, v21, v0.h[7]
-+
-+//        dst[0] = av_clip(interpol, 0, clip_max);
-+        SQSHRUNN        v2, v2, v3, v4, v5, 13
-+        str             q2, [x0], #16
-+
-+//        dst++;
-+//        cur++;
-+//    }
-+
-+        subs            w2,  w2,  #16
-+        add             x1,  x1,  #16
-+        bgt             10b
-+
-+99:
-+        ret
-+endfunc
-diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
-index 5749345f784e..ae6f6ce2233a 100644
---- a/libavfilter/bwdif.h
-+++ b/libavfilter/bwdif.h
-@@ -39,5 +39,9 @@ typedef struct BWDIFContext {
- 
- void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
- void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
-+void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth);
-+
-+void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
-+                             int prefs3, int mrefs3, int parity, int clip_max);
- 
- #endif /* AVFILTER_BWDIF_H */
-diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
-index 34e8c5e234ee..6ec8bbab5d72 100644
---- a/libavfilter/vf_bwdif.c
-+++ b/libavfilter/vf_bwdif.c
-@@ -122,8 +122,8 @@ typedef struct ThreadData {
-         next2++; \
-     }
- 
--static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs,
--                         int prefs3, int mrefs3, int parity, int clip_max)
-+void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
-+                             int prefs3, int mrefs3, int parity, int clip_max)
- {
-     uint8_t *dst = dst1;
-     uint8_t *cur = cur1;
-@@ -352,13 +352,15 @@ av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
-         s->filter_line  = filter_line_c_16bit;
-         s->filter_edge  = filter_edge_16bit;
-     } else {
--        s->filter_intra = filter_intra;
-+        s->filter_intra = ff_bwdif_filter_intra_c;
-         s->filter_line  = filter_line_c;
-         s->filter_edge  = filter_edge;
-     }
- 
- #if ARCH_X86
-     ff_bwdif_init_x86(s, bit_depth);
-+#elif ARCH_AARCH64
-+    ff_bwdif_init_aarch64(s, bit_depth);
- #endif
- }
- 
-
-From 981053a48f80de6c0be0c3975f7aa877aacc842e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 4 Jul 2023 14:04:41 +0000
-Subject: [PATCH 147/186] tests/checkasm: Add test for vf_bwdif filter_edge
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Signed-off-by: John Cox <jc@kynesim.co.uk>
-Signed-off-by: Martin Storsjö <martin@martin.st>
-(cherry picked from commit 7ed7c00f55a50ac88589f9e17c172d4a4fce0581)
----
- tests/checkasm/vf_bwdif.c | 54 +++++++++++++++++++++++++++++++++++++++
- 1 file changed, 54 insertions(+)
-
-diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
-index 034bbabb4c5c..5fdba09fdc73 100644
---- a/tests/checkasm/vf_bwdif.c
-+++ b/tests/checkasm/vf_bwdif.c
-@@ -83,6 +83,60 @@ void checkasm_check_vf_bwdif(void)
-         report("bwdif10");
-     }
- 
-+    {
-+        LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, cur0,  [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, cur1,  [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, dst0,  [WIDTH*3]);
-+        LOCAL_ALIGNED_16(uint8_t, dst1,  [WIDTH*3]);
-+        const int stride = WIDTH;
-+        const int mask = (1<<8)-1;
-+        int spat;
-+        int parity;
-+
-+        for (spat = 0; spat != 2; ++spat) {
-+            for (parity = 0; parity != 2; ++parity) {
-+                if (check_func(ctx_8.filter_edge, "bwdif8.edge.s%d.p%d", spat, parity)) {
-+
-+                    declare_func(void, void *dst1, void *prev1, void *cur1, void *next1,
-+                                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                                            int parity, int clip_max, int spat);
-+
-+                    randomize_buffers(prev0, prev1, mask, 11*WIDTH);
-+                    randomize_buffers(next0, next1, mask, 11*WIDTH);
-+                    randomize_buffers( cur0,  cur1, mask, 11*WIDTH);
-+                    memset(dst0, 0xba, WIDTH * 3);
-+                    memset(dst1, 0xba, WIDTH * 3);
-+
-+                    call_ref(dst0 + stride,
-+                             prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, WIDTH,
-+                             stride, -stride, stride * 2, -stride * 2,
-+                             parity, mask, spat);
-+                    call_new(dst1 + stride,
-+                             prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH,
-+                             stride, -stride, stride * 2, -stride * 2,
-+                             parity, mask, spat);
-+
-+                    if (memcmp(dst0, dst1, WIDTH*3)
-+                            || memcmp(prev0, prev1, WIDTH*11)
-+                            || memcmp(next0, next1, WIDTH*11)
-+                            || memcmp( cur0,  cur1, WIDTH*11))
-+                        fail();
-+
-+                    bench_new(dst1 + stride,
-+                             prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH,
-+                             stride, -stride, stride * 2, -stride * 2,
-+                             parity, mask, spat);
-+                }
-+            }
-+        }
-+
-+        report("bwdif8.edge");
-+    }
-+
-     if (check_func(ctx_8.filter_intra, "bwdif8.intra")) {
-         LOCAL_ALIGNED_16(uint8_t, cur0,  [11*WIDTH]);
-         LOCAL_ALIGNED_16(uint8_t, cur1,  [11*WIDTH]);
-
-From c48fed1cb66414179201cbac1ef6fb689cb4426a Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 4 Jul 2023 14:04:42 +0000
-Subject: [PATCH 148/186] avfilter/vf_bwdif: Add neon for filter_edge
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Adds clip and spatial macros for aarch64 neon
-Exports C filter_edge needed for tail fixup of neon code
-Adds neon for filter_edge
-
-Signed-off-by: John Cox <jc@kynesim.co.uk>
-Signed-off-by: Martin Storsjö <martin@martin.st>
-(cherry picked from commit 8130df83e0fbd3264fe990fb4e084ecbd452d0b1)
----
- libavfilter/aarch64/vf_bwdif_init_aarch64.c |  20 +++
- libavfilter/aarch64/vf_bwdif_neon.S         | 177 ++++++++++++++++++++
- libavfilter/bwdif.h                         |   4 +
- libavfilter/vf_bwdif.c                      |   8 +-
- 4 files changed, 205 insertions(+), 4 deletions(-)
-
-diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-index 3ffaa07ab369..e75cf2f20459 100644
---- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-@@ -24,10 +24,29 @@
- #include "libavfilter/bwdif.h"
- #include "libavutil/aarch64/cpu.h"
- 
-+void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1,
-+                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                               int parity, int clip_max, int spat);
-+
- void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs,
-                                 int prefs3, int mrefs3, int parity, int clip_max);
- 
- 
-+static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1,
-+                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                               int parity, int clip_max, int spat)
-+{
-+    const int w0 = clip_max != 255 ? 0 : w & ~15;
-+
-+    ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, prefs2, mrefs2,
-+                              parity, clip_max, spat);
-+
-+    if (w0 < w)
-+        ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
-+                               w - w0, prefs, mrefs, prefs2, mrefs2,
-+                               parity, clip_max, spat);
-+}
-+
- static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs,
-                                 int prefs3, int mrefs3, int parity, int clip_max)
- {
-@@ -52,5 +71,6 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
-         return;
- 
-     s->filter_intra = filter_intra_helper;
-+    s->filter_edge  = filter_edge_helper;
- }
- 
-diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S
-index e288efbe6c33..389302b813aa 100644
---- a/libavfilter/aarch64/vf_bwdif_neon.S
-+++ b/libavfilter/aarch64/vf_bwdif_neon.S
-@@ -66,6 +66,79 @@
-         umlsl2          \a3\().4s, \s1\().8h, \k
- .endm
- 
-+//      int b = m2s1 - m1;
-+//      int f = p2s1 - p1;
-+//      int dc = c0s1 - m1;
-+//      int de = c0s1 - p1;
-+//      int sp_max = FFMIN(p1 - c0s1, m1 - c0s1);
-+//      sp_max = FFMIN(sp_max, FFMAX(-b,-f));
-+//      int sp_min = FFMIN(c0s1 - p1, c0s1 - m1);
-+//      sp_min = FFMIN(sp_min, FFMAX(b,f));
-+//      diff = diff == 0 ? 0 : FFMAX3(diff, sp_min, sp_max);
-+.macro SPAT_CHECK diff, m2s1, m1, c0s1, p1, p2s1, t0, t1, t2, t3
-+        uqsub           \t0\().16b, \p1\().16b, \c0s1\().16b
-+        uqsub           \t2\().16b, \m1\().16b, \c0s1\().16b
-+        umin            \t2\().16b, \t0\().16b, \t2\().16b
-+
-+        uqsub           \t1\().16b, \m1\().16b, \m2s1\().16b
-+        uqsub           \t3\().16b, \p1\().16b, \p2s1\().16b
-+        umax            \t3\().16b, \t3\().16b, \t1\().16b
-+        umin            \t3\().16b, \t3\().16b, \t2\().16b
-+
-+        uqsub           \t0\().16b, \c0s1\().16b, \p1\().16b
-+        uqsub           \t2\().16b, \c0s1\().16b, \m1\().16b
-+        umin            \t2\().16b, \t0\().16b, \t2\().16b
-+
-+        uqsub           \t1\().16b, \m2s1\().16b, \m1\().16b
-+        uqsub           \t0\().16b, \p2s1\().16b, \p1\().16b
-+        umax            \t0\().16b, \t0\().16b, \t1\().16b
-+        umin            \t2\().16b, \t2\().16b, \t0\().16b
-+
-+        cmeq            \t1\().16b, \diff\().16b, #0
-+        umax            \diff\().16b, \diff\().16b, \t3\().16b
-+        umax            \diff\().16b, \diff\().16b, \t2\().16b
-+        bic             \diff\().16b, \diff\().16b, \t1\().16b
-+.endm
-+
-+//      i0 = s0;
-+//      if (i0 > d0 + diff0)
-+//          i0 = d0 + diff0;
-+//      else if (i0 < d0 - diff0)
-+//          i0 = d0 - diff0;
-+//
-+// i0 = s0 is safe
-+.macro DIFF_CLIP i0, s0, d0, diff, t0, t1
-+        uqadd           \t0\().16b, \d0\().16b, \diff\().16b
-+        uqsub           \t1\().16b, \d0\().16b, \diff\().16b
-+        umin            \i0\().16b, \s0\().16b, \t0\().16b
-+        umax            \i0\().16b, \i0\().16b, \t1\().16b
-+.endm
-+
-+//      i0 = FFABS(m1 - p1) > td0 ? i1 : i2;
-+//      DIFF_CLIP
-+//
-+// i0 = i1 is safe
-+.macro INTERPOL i0, i1, i2, m1, d0, p1, td0, diff, t0, t1, t2
-+        uabd            \t0\().16b, \m1\().16b, \p1\().16b
-+        cmhi            \t0\().16b, \t0\().16b, \td0\().16b
-+        bsl             \t0\().16b, \i1\().16b, \i2\().16b
-+        DIFF_CLIP       \i0, \t0, \d0, \diff, \t1, \t2
-+.endm
-+
-+.macro PUSH_VREGS
-+        stp             d8,  d9,  [sp, #-64]!
-+        stp             d10, d11, [sp, #16]
-+        stp             d12, d13, [sp, #32]
-+        stp             d14, d15, [sp, #48]
-+.endm
-+
-+.macro POP_VREGS
-+        ldp             d14, d15, [sp, #48]
-+        ldp             d12, d13, [sp, #32]
-+        ldp             d10, d11, [sp, #16]
-+        ldp             d8,  d9,  [sp], #64
-+.endm
-+
- .macro LDR_COEFFS d, t0
-         movrel          \t0, coeffs, 0
-         ld1             {\d\().8h}, [\t0]
-@@ -81,6 +154,110 @@ const coeffs, align=4   // align 4 means align on 2^4 boundry
-         .hword          5077, 981                       // sp[0] = v0.h[6]
- endconst
- 
-+// ============================================================================
-+//
-+// void ff_bwdif_filter_edge_neon(
-+//      void *dst1,     // x0
-+//      void *prev1,    // x1
-+//      void *cur1,     // x2
-+//      void *next1,    // x3
-+//      int w,          // w4
-+//      int prefs,      // w5
-+//      int mrefs,      // w6
-+//      int prefs2,     // w7
-+//      int mrefs2,     // [sp, #0]
-+//      int parity,     // [sp, #SP_INT]
-+//      int clip_max,   // [sp, #SP_INT*2]  unused
-+//      int spat);      // [sp, #SP_INT*3]
-+
-+function ff_bwdif_filter_edge_neon, export=1
-+        // Sanity check w
-+        cmp             w4, #0
-+        ble             99f
-+
-+// #define prev2 cur
-+//     const uint8_t * restrict next2 = parity ? prev : next;
-+
-+        ldr             w8,  [sp, #0]                   // mrefs2
-+
-+        ldr             w17, [sp, #SP_INT]              // parity
-+        ldr             w16, [sp, #SP_INT*3]            // spat
-+        cmp             w17, #0
-+        csel            x17, x1, x3, ne
-+
-+//     for (x = 0; x < w; x++) {
-+
-+10:
-+//        int m1 = cur[mrefs];
-+//        int d = (prev2[0] + next2[0]) >> 1;
-+//        int p1 = cur[prefs];
-+//        int temporal_diff0 = FFABS(prev2[0] - next2[0]);
-+//        int temporal_diff1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
-+//        int temporal_diff2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
-+//        int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2);
-+        ldr             q31, [x2]
-+        ldr             q21, [x17]
-+        uhadd           v16.16b, v31.16b, v21.16b       // d0 = v16
-+        uabd            v17.16b, v31.16b, v21.16b       // td0 = v17
-+        ldr             q24, [x2, w6, sxtw]             // m1 = v24
-+        ldr             q22, [x2, w5, sxtw]             // p1 = v22
-+
-+        ldr             q0,  [x1, w6, sxtw]             // prev[mrefs]
-+        ldr             q2,  [x1, w5, sxtw]             // prev[prefs]
-+        ldr             q1,  [x3, w6, sxtw]             // next[mrefs]
-+        ldr             q3,  [x3, w5, sxtw]             // next[prefs]
-+
-+        ushr            v29.16b, v17.16b, #1
-+
-+        uabd            v31.16b, v0.16b,  v24.16b
-+        uabd            v30.16b, v2.16b,  v22.16b
-+        uhadd           v0.16b,  v31.16b, v30.16b       // td1 = q0
-+
-+        uabd            v31.16b, v1.16b,  v24.16b
-+        uabd            v30.16b, v3.16b,  v22.16b
-+        uhadd           v1.16b,  v31.16b, v30.16b       // td2 = q1
-+
-+        umax            v0.16b,  v0.16b,  v29.16b
-+        umax            v0.16b,  v0.16b,  v1.16b        // diff = v0
-+
-+//        if (spat) {
-+//            SPAT_CHECK()
-+//        }
-+//        i0 = (m1 + p1) >> 1;
-+        cbz             w16, 1f
-+
-+        ldr             q31, [x2,  w8, sxtw]
-+        ldr             q18, [x17, w8, sxtw]
-+        ldr             q30, [x2,  w7, sxtw]
-+        ldr             q19, [x17, w7, sxtw]
-+        uhadd           v18.16b, v18.16b, v31.16b
-+        uhadd           v19.16b, v19.16b, v30.16b
-+
-+        SPAT_CHECK      v0, v18, v24, v16, v22, v19, v31, v30, v29, v28
-+
-+1:
-+        uhadd           v2.16b,  v22.16b, v24.16b
-+
-+        // i0 = v2, s0 = v2, d0 = v16, diff = v0, t0 = v31, t1 = v30
-+        DIFF_CLIP       v2, v2, v16, v0, v31, v30
-+
-+//        dst[0] = av_clip(interpol, 0, clip_max);
-+        str             q2, [x0], #16
-+
-+//        dst++;
-+//        cur++;
-+//    }
-+        subs            w4,  w4,  #16
-+        add             x1,  x1,  #16
-+        add             x2,  x2,  #16
-+        add             x3,  x3,  #16
-+        add             x17, x17, #16
-+        bgt             10b
-+
-+99:
-+        ret
-+endfunc
-+
- // ============================================================================
- //
- // void ff_bwdif_filter_intra_neon(
-diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
-index ae6f6ce2233a..ae1616d366cb 100644
---- a/libavfilter/bwdif.h
-+++ b/libavfilter/bwdif.h
-@@ -41,6 +41,10 @@ void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
- void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
- void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth);
- 
-+void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
-+                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                            int parity, int clip_max, int spat);
-+
- void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
-                              int prefs3, int mrefs3, int parity, int clip_max);
- 
-diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
-index 6ec8bbab5d72..688c2d257229 100644
---- a/libavfilter/vf_bwdif.c
-+++ b/libavfilter/vf_bwdif.c
-@@ -150,9 +150,9 @@ static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
-     FILTER2()
- }
- 
--static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1,
--                        int w, int prefs, int mrefs, int prefs2, int mrefs2,
--                        int parity, int clip_max, int spat)
-+void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
-+                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                            int parity, int clip_max, int spat)
- {
-     uint8_t *dst   = dst1;
-     uint8_t *prev  = prev1;
-@@ -354,7 +354,7 @@ av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
-     } else {
-         s->filter_intra = ff_bwdif_filter_intra_c;
-         s->filter_line  = filter_line_c;
--        s->filter_edge  = filter_edge;
-+        s->filter_edge  = ff_bwdif_filter_edge_c;
-     }
- 
- #if ARCH_X86
-
-From eb72477591f765f10373e64dd57745ebe908d01a Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 4 Jul 2023 14:04:43 +0000
-Subject: [PATCH 149/186] avfilter/vf_bwdif: Add neon for filter_line
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Exports C filter_line needed for tail fixup of neon code
-Adds neon for filter_line
-
-Signed-off-by: John Cox <jc@kynesim.co.uk>
-Signed-off-by: Martin Storsjö <martin@martin.st>
-(cherry picked from commit 94cb94a2c0910d364a7181fc5cc0e9556b777d0a)
----
- libavfilter/aarch64/vf_bwdif_init_aarch64.c |  21 ++
- libavfilter/aarch64/vf_bwdif_neon.S         | 203 ++++++++++++++++++++
- libavfilter/bwdif.h                         |   5 +
- libavfilter/vf_bwdif.c                      |  10 +-
- 4 files changed, 234 insertions(+), 5 deletions(-)
-
-diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-index e75cf2f20459..21e67884ab90 100644
---- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-@@ -31,6 +31,26 @@ void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1,
- void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs,
-                                 int prefs3, int mrefs3, int parity, int clip_max);
- 
-+void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1,
-+                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                               int prefs3, int mrefs3, int prefs4, int mrefs4,
-+                               int parity, int clip_max);
-+
-+
-+static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1,
-+                               int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                               int prefs3, int mrefs3, int prefs4, int mrefs4,
-+                               int parity, int clip_max)
-+{
-+    const int w0 = clip_max != 255 ? 0 : w & ~15;
-+
-+    ff_bwdif_filter_line_neon(dst1, prev1, cur1, next1,
-+                              w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
-+
-+    if (w0 < w)
-+        ff_bwdif_filter_line_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
-+                               w - w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
-+}
- 
- static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1,
-                                int w, int prefs, int mrefs, int prefs2, int mrefs2,
-@@ -71,6 +91,7 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
-         return;
- 
-     s->filter_intra = filter_intra_helper;
-+    s->filter_line  = filter_line_helper;
-     s->filter_edge  = filter_edge_helper;
- }
- 
-diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S
-index 389302b813aa..f185e94e3c6f 100644
---- a/libavfilter/aarch64/vf_bwdif_neon.S
-+++ b/libavfilter/aarch64/vf_bwdif_neon.S
-@@ -154,6 +154,209 @@ const coeffs, align=4   // align 4 means align on 2^4 boundry
-         .hword          5077, 981                       // sp[0] = v0.h[6]
- endconst
- 
-+// ===========================================================================
-+//
-+// void filter_line(
-+//      void *dst1,     // x0
-+//      void *prev1,    // x1
-+//      void *cur1,     // x2
-+//      void *next1,    // x3
-+//      int w,          // w4
-+//      int prefs,      // w5
-+//      int mrefs,      // w6
-+//      int prefs2,     // w7
-+//      int mrefs2,     // [sp, #0]
-+//      int prefs3,     // [sp, #SP_INT]
-+//      int mrefs3,     // [sp, #SP_INT*2]
-+//      int prefs4,     // [sp, #SP_INT*3]
-+//      int mrefs4,     // [sp, #SP_INT*4]
-+//      int parity,     // [sp, #SP_INT*5]
-+//      int clip_max)   // [sp, #SP_INT*6]
-+
-+function ff_bwdif_filter_line_neon, export=1
-+        // Sanity check w
-+        cmp             w4, #0
-+        ble             99f
-+
-+        // Rearrange regs to be the same as line3 for ease of debug!
-+        mov             w10, w4                         // w10 = loop count
-+        mov             w9,  w6                         // w9  = mref
-+        mov             w12, w7                         // w12 = pref2
-+        mov             w11, w5                         // w11 = pref
-+        ldr             w8,  [sp, #0]                   // w8 =  mref2
-+        ldr             w7,  [sp, #SP_INT*2]            // w7  = mref3
-+        ldr             w6,  [sp, #SP_INT*4]            // w6  = mref4
-+        ldr             w13, [sp, #SP_INT]              // w13 = pref3
-+        ldr             w14, [sp, #SP_INT*3]            // w14 = pref4
-+
-+        mov             x4,  x3
-+        mov             x3,  x2
-+        mov             x2,  x1
-+
-+        LDR_COEFFS      v0, x17
-+
-+// #define prev2 cur
-+//        const uint8_t * restrict next2 = parity ? prev : next;
-+        ldr             w17, [sp, #SP_INT*5]            // parity
-+        cmp             w17, #0
-+        csel            x17, x2, x4, ne
-+
-+        PUSH_VREGS
-+
-+//         for (x = 0; x < w; x++) {
-+//             int diff0, diff2;
-+//             int d0, d2;
-+//             int temporal_diff0, temporal_diff2;
-+//
-+//             int i1, i2;
-+//             int j1, j2;
-+//             int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4;
-+
-+10:
-+//             c0 = prev2[0] + next2[0];            // c0 = v20, v21
-+//             d0  = c0 >> 1;                       // d0 = v10
-+//             temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11
-+        ldr             q31, [x3]
-+        ldr             q21, [x17]
-+        uhadd           v10.16b, v31.16b, v21.16b
-+        uabd            v11.16b, v31.16b, v21.16b
-+        uaddl           v20.8h,  v21.8b,  v31.8b
-+        uaddl2          v21.8h,  v21.16b, v31.16b
-+
-+        ldr             q31, [x3, w6, sxtw]
-+        ldr             q23, [x17, w6, sxtw]
-+
-+//             i1 = coef_hf[0] * c0;                // i1 = v2-v5
-+        UMULL4K         v2, v3, v4, v5, v20, v21, v0.h[2]
-+
-+        ldr             q30, [x3, w14, sxtw]
-+        ldr             q25, [x17, w14, sxtw]
-+
-+//             m4 = prev2[mrefs4] + next2[mrefs4];  // m4 = v22,v23
-+        uaddl           v22.8h,  v23.8b,  v31.8b
-+        uaddl2          v23.8h,  v23.16b, v31.16b
-+
-+//             p4 = prev2[prefs4] + next2[prefs4];  // p4 = v24,v25, (p4 >> 1) = v12
-+        uhadd           v12.16b, v25.16b, v30.16b
-+        uaddl           v24.8h,  v25.8b,  v30.8b
-+        uaddl2          v25.8h,  v25.16b, v30.16b
-+
-+//             m3 = cur[mrefs3];                    // m3 = v20
-+        ldr             q20, [x3, w7, sxtw]
-+
-+//             p3 = cur[prefs3];                    // p3 = v21
-+        ldr             q21, [x3, w13, sxtw]
-+
-+//             i1 += coef_hf[2] * (m4 + p4);        // (-m4:v22,v23) (-p4:v24,v25)
-+        add             v22.8h,  v22.8h,  v24.8h
-+        add             v23.8h,  v23.8h,  v25.8h
-+        UMLAL4K         v2, v3, v4, v5, v22, v23, v0.h[4]
-+
-+        ldr             q29, [x3, w8, sxtw]
-+        ldr             q23, [x17, w8, sxtw]
-+
-+//             i1 -= coef_lf[1] * 4 * (m3 + p3);    // -
-+        uaddl           v30.8h,  v20.8b,  v21.8b
-+        uaddl2          v31.8h,  v20.16b, v21.16b
-+
-+        UMLSL4K         v2, v3, v4, v5, v30, v31, v0.h[1]
-+
-+        ldr             q31, [x3, w12, sxtw]
-+        ldr             q27, [x17, w12, sxtw]
-+
-+//             m2 = prev2[mrefs2] + next2[mrefs2];  // m2 = v22,v23, (m2 >> 1) = v13
-+        uhadd           v13.16b, v23.16b, v29.16b
-+        uaddl           v22.8h,  v23.8b,  v29.8b
-+        uaddl2          v23.8h,  v23.16b, v29.16b
-+
-+//             m1 = cur[mrefs];                     // m1 = v24
-+        ldr             q24, [x3, w9, sxtw]
-+
-+//             p2 = prev2[prefs2] + next2[prefs2];  // p2 = v26, v27
-+//             temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14
-+//             d2  = p2 >> 1;                       // d2 = v15
-+        uabd            v14.16b, v31.16b, v27.16b
-+        uhadd           v15.16b, v31.16b, v27.16b
-+        uaddl           v26.8h,  v27.8b,  v31.8b
-+        uaddl2          v27.8h,  v27.16b, v31.16b
-+
-+//             i1 -= coef_hf[1] * (m2 + p2);        // (-m2:v22,v23*) (-p2:v26*,v27*)
-+        add             v22.8h,  v22.8h,  v26.8h
-+        add             v23.8h,  v23.8h,  v27.8h
-+        UMLSL4K         v2, v3, v4, v5, v22, v23, v0.h[3]
-+
-+//             p1 = cur[prefs];                     // p1 = v22
-+        ldr             q22, [x3, w11, sxtw]
-+
-+//             i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17
-+        uaddl           v18.8h,  v22.8b,  v24.8b
-+        uaddl2          v19.8h,  v22.16b, v24.16b
-+        UMULL4K         v28, v29, v30, v31, v18, v19, v0.h[6]
-+
-+        uaddl           v18.8h,  v20.8b,  v21.8b
-+        uaddl2          v19.8h,  v20.16b, v21.16b
-+        UMLSL4K         v28, v29, v30, v31, v18, v19, v0.h[7]
-+
-+        SQSHRUNN        v17, v28, v29, v30, v31, 13
-+
-+//             i1 += coef_lf[0] * 4 * (m1 + p1);    // p1 = v22, m1 = v24
-+        uaddl           v26.8h,  v24.8b,  v22.8b
-+        uaddl2          v27.8h,  v24.16b, v22.16b
-+        UMLAL4K         v2, v3, v4, v5, v26, v27, v0.h[0]
-+
-+        ldr             q31, [x2, w9, sxtw]
-+        ldr             q29, [x4, w9, sxtw]
-+
-+        ldr             q30, [x2, w11, sxtw]
-+        ldr             q28, [x4, w11, sxtw]
-+
-+//             i1 >>= 15;                            // i1 = v2, -v3, -v4*, -v5*
-+        SQSHRUNN        v2, v2, v3, v4, v5, 15
-+
-+//             {
-+//                 int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
-+//                 int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
-+        uabd            v30.16b, v22.16b, v30.16b
-+        uabd            v31.16b, v24.16b, v31.16b
-+        uabd            v28.16b, v22.16b, v28.16b
-+        uabd            v29.16b, v24.16b, v29.16b
-+        uhadd           v31.16b, v31.16b, v30.16b
-+        uhadd           v29.16b, v29.16b, v28.16b
-+
-+//                 diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18
-+        ushr            v18.16b, v11.16b, #1
-+        umax            v18.16b, v18.16b, v31.16b
-+        umax            v18.16b, v18.16b, v29.16b
-+
-+        // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15
-+        SPAT_CHECK      v18, v13, v24, v10, v22, v15, v31, v30, v29, v28
-+
-+        // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18
-+        INTERPOL        v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29
-+
-+//                 dst[0] = av_clip_uint8(interpol);
-+        str             q2,  [x0], #16
-+//             }
-+//
-+//             dst++;
-+//             cur++;
-+//             prev++;
-+//             prev2++;
-+//             next++;
-+//         }
-+
-+        subs            w10, w10, #16
-+        add             x2,  x2,  #16
-+        add             x3,  x3,  #16
-+        add             x4,  x4,  #16
-+        add             x17, x17, #16
-+        bgt             10b
-+
-+        POP_VREGS
-+99:
-+        ret
-+endfunc
-+
- // ============================================================================
- //
- // void ff_bwdif_filter_edge_neon(
-diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
-index ae1616d366cb..cce99953f363 100644
---- a/libavfilter/bwdif.h
-+++ b/libavfilter/bwdif.h
-@@ -48,4 +48,9 @@ void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
- void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
-                              int prefs3, int mrefs3, int parity, int clip_max);
- 
-+void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
-+                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                            int prefs3, int mrefs3, int prefs4, int mrefs4,
-+                            int parity, int clip_max);
-+
- #endif /* AVFILTER_BWDIF_H */
-diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
-index 688c2d257229..2dc47f9614f8 100644
---- a/libavfilter/vf_bwdif.c
-+++ b/libavfilter/vf_bwdif.c
-@@ -132,10 +132,10 @@ void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs
-     FILTER_INTRA()
- }
- 
--static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
--                          int w, int prefs, int mrefs, int prefs2, int mrefs2,
--                          int prefs3, int mrefs3, int prefs4, int mrefs4,
--                          int parity, int clip_max)
-+void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
-+                            int w, int prefs, int mrefs, int prefs2, int mrefs2,
-+                            int prefs3, int mrefs3, int prefs4, int mrefs4,
-+                            int parity, int clip_max)
- {
-     uint8_t *dst   = dst1;
-     uint8_t *prev  = prev1;
-@@ -353,7 +353,7 @@ av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
-         s->filter_edge  = filter_edge_16bit;
-     } else {
-         s->filter_intra = ff_bwdif_filter_intra_c;
--        s->filter_line  = filter_line_c;
-+        s->filter_line  = ff_bwdif_filter_line_c;
-         s->filter_edge  = ff_bwdif_filter_edge_c;
-     }
- 
-
-From 88f7f8eb8b2fe1e006fdc05564259f6a2448fdbc Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 4 Jul 2023 14:04:44 +0000
-Subject: [PATCH 150/186] avfilter/vf_bwdif: Add a filter_line3 method for
- optimisation
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add an optional filter_line3 to the available optimisations.
-
-filter_line3 is equivalent to filter_line, memcpy, filter_line
-
-filter_line shares quite a number of loads and some calculations in
-common with its next iteration and testing shows that using aarch64
-neon filter_line3s performance is 30% better than two filter_lines
-and a memcpy.
-
-Adds a test for vf_bwdif filter_line3 to checkasm
-
-Rounds job start lines down to a multiple of 4. This means that if
-filter_line3 exists then filter_line will not sometimes be called
-once at the end of a slice depending on thread count. The final slice
-may do up to 3 extra lines but filter_edge is faster than filter_line
-so it is unlikely to create any noticable thread load variation.
-
-Signed-off-by: John Cox <jc@kynesim.co.uk>
-Signed-off-by: Martin Storsjö <martin@martin.st>
-(cherry picked from commit 697533e76dbea8cc7fd6a0642bc60050cc05ead8)
----
- libavfilter/bwdif.h       |  7 ++++
- libavfilter/vf_bwdif.c    | 44 +++++++++++++++++++--
- tests/checkasm/vf_bwdif.c | 81 +++++++++++++++++++++++++++++++++++++++
- 3 files changed, 129 insertions(+), 3 deletions(-)
-
-diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h
-index cce99953f363..496cec72ef01 100644
---- a/libavfilter/bwdif.h
-+++ b/libavfilter/bwdif.h
-@@ -35,6 +35,9 @@ typedef struct BWDIFContext {
-     void (*filter_edge)(void *dst, void *prev, void *cur, void *next,
-                         int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                         int parity, int clip_max, int spat);
-+    void (*filter_line3)(void *dst, int dstride,
-+                         const void *prev, const void *cur, const void *next, int prefs,
-+                         int w, int parity, int clip_max);
- } BWDIFContext;
- 
- void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
-@@ -53,4 +56,8 @@ void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
-                             int prefs3, int mrefs3, int prefs4, int mrefs4,
-                             int parity, int clip_max);
- 
-+void ff_bwdif_filter_line3_c(void * dst1, int d_stride,
-+                             const void * prev1, const void * cur1, const void * next1, int s_stride,
-+                             int w, int parity, int clip_max);
-+
- #endif /* AVFILTER_BWDIF_H */
-diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
-index 2dc47f9614f8..9847d38b6a63 100644
---- a/libavfilter/vf_bwdif.c
-+++ b/libavfilter/vf_bwdif.c
-@@ -150,6 +150,31 @@ void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
-     FILTER2()
- }
- 
-+#define NEXT_LINE()\
-+    dst += d_stride; \
-+    prev += prefs; \
-+    cur  += prefs; \
-+    next += prefs;
-+
-+void ff_bwdif_filter_line3_c(void * dst1, int d_stride,
-+                             const void * prev1, const void * cur1, const void * next1, int s_stride,
-+                             int w, int parity, int clip_max)
-+{
-+    const int prefs = s_stride;
-+    uint8_t * dst  = dst1;
-+    const uint8_t * prev = prev1;
-+    const uint8_t * cur  = cur1;
-+    const uint8_t * next = next1;
-+
-+    ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
-+                           prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max);
-+    NEXT_LINE();
-+    memcpy(dst, cur, w);
-+    NEXT_LINE();
-+    ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w,
-+                           prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max);
-+}
-+
- void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
-                             int w, int prefs, int mrefs, int prefs2, int mrefs2,
-                             int parity, int clip_max, int spat)
-@@ -212,6 +237,13 @@ static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1,
-     FILTER2()
- }
- 
-+// Round job start line down to multiple of 4 so that if filter_line3 exists
-+// and the frame is a multiple of 4 high then filter_line will never be called
-+static inline int job_start(const int jobnr, const int nb_jobs, const int h)
-+{
-+    return jobnr >= nb_jobs ? h : ((h * jobnr) / nb_jobs) & ~3;
-+}
-+
- static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
- {
-     BWDIFContext *s = ctx->priv;
-@@ -221,8 +253,8 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-     int clip_max = (1 << (yadif->csp->comp[td->plane].depth)) - 1;
-     int df = (yadif->csp->comp[td->plane].depth + 7) / 8;
-     int refs = linesize / df;
--    int slice_start = (td->h *  jobnr   ) / nb_jobs;
--    int slice_end   = (td->h * (jobnr+1)) / nb_jobs;
-+    int slice_start = job_start(jobnr, nb_jobs, td->h);
-+    int slice_end   = job_start(jobnr + 1, nb_jobs, td->h);
-     int y;
- 
-     for (y = slice_start; y < slice_end; y++) {
-@@ -244,6 +276,11 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-                                refs << 1, -(refs << 1),
-                                td->parity ^ td->tff, clip_max,
-                                (y < 2) || ((y + 3) > td->h) ? 0 : 1);
-+            } else if (s->filter_line3 && y + 2 < slice_end && y + 6 < td->h) {
-+                s->filter_line3(dst, td->frame->linesize[td->plane],
-+                                prev, cur, next, linesize, td->w,
-+                                td->parity ^ td->tff, clip_max);
-+                y += 2;
-             } else {
-                 s->filter_line(dst, prev, cur, next, td->w,
-                                refs, -refs, refs << 1, -(refs << 1),
-@@ -280,7 +317,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
-         td.plane = i;
- 
-         ff_filter_execute(ctx, filter_slice, &td, NULL,
--                          FFMIN(h, ff_filter_get_nb_threads(ctx)));
-+                          FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx)));
-     }
-     if (yadif->current_field == YADIF_FIELD_END) {
-         yadif->current_field = YADIF_FIELD_NORMAL;
-@@ -347,6 +384,7 @@ static int config_props(AVFilterLink *link)
- 
- av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth)
- {
-+    s->filter_line3 = 0;
-     if (bit_depth > 8) {
-         s->filter_intra = filter_intra_16bit;
-         s->filter_line  = filter_line_c_16bit;
-diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c
-index 5fdba09fdc73..3399cacdf790 100644
---- a/tests/checkasm/vf_bwdif.c
-+++ b/tests/checkasm/vf_bwdif.c
-@@ -28,6 +28,10 @@
-     for (size_t i = 0; i < count; i++) \
-         buf0[i] = buf1[i] = rnd() & mask
- 
-+#define randomize_overflow_check(buf0, buf1, mask, count) \
-+    for (size_t i = 0; i < count; i++) \
-+        buf0[i] = buf1[i] = (rnd() & 1) != 0 ? mask : 0;
-+
- #define BODY(type, depth)                                                      \
-     do {                                                                       \
-         type prev0[9*WIDTH], prev1[9*WIDTH];                                   \
-@@ -83,6 +87,83 @@ void checkasm_check_vf_bwdif(void)
-         report("bwdif10");
-     }
- 
-+    if (!ctx_8.filter_line3)
-+        ctx_8.filter_line3 = ff_bwdif_filter_line3_c;
-+
-+    {
-+        LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, cur0,  [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, cur1,  [11*WIDTH]);
-+        LOCAL_ALIGNED_16(uint8_t, dst0,  [WIDTH*3]);
-+        LOCAL_ALIGNED_16(uint8_t, dst1,  [WIDTH*3]);
-+        const int stride = WIDTH;
-+        const int mask = (1<<8)-1;
-+        int parity;
-+
-+        for (parity = 0; parity != 2; ++parity) {
-+            if (check_func(ctx_8.filter_line3, "bwdif8.line3.rnd.p%d", parity)) {
-+
-+                declare_func(void, void * dst1, int d_stride,
-+                                          const void * prev1, const void * cur1, const void * next1, int prefs,
-+                                          int w, int parity, int clip_max);
-+
-+                randomize_buffers(prev0, prev1, mask, 11*WIDTH);
-+                randomize_buffers(next0, next1, mask, 11*WIDTH);
-+                randomize_buffers( cur0,  cur1, mask, 11*WIDTH);
-+
-+                call_ref(dst0, stride,
-+                         prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride,
-+                         WIDTH, parity, mask);
-+                call_new(dst1, stride,
-+                         prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
-+                         WIDTH, parity, mask);
-+
-+                if (memcmp(dst0, dst1, WIDTH*3)
-+                        || memcmp(prev0, prev1, WIDTH*11)
-+                        || memcmp(next0, next1, WIDTH*11)
-+                        || memcmp( cur0,  cur1, WIDTH*11))
-+                    fail();
-+
-+                bench_new(dst1, stride,
-+                         prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
-+                         WIDTH, parity, mask);
-+            }
-+        }
-+
-+        // Use just 0s and ~0s to try to provoke bad cropping or overflow
-+        // Parity makes no difference to this test so just test 0
-+        if (check_func(ctx_8.filter_line3, "bwdif8.line3.overflow")) {
-+
-+            declare_func(void, void * dst1, int d_stride,
-+                                      const void * prev1, const void * cur1, const void * next1, int prefs,
-+                                      int w, int parity, int clip_max);
-+
-+            randomize_overflow_check(prev0, prev1, mask, 11*WIDTH);
-+            randomize_overflow_check(next0, next1, mask, 11*WIDTH);
-+            randomize_overflow_check( cur0,  cur1, mask, 11*WIDTH);
-+
-+            call_ref(dst0, stride,
-+                     prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride,
-+                     WIDTH, 0, mask);
-+            call_new(dst1, stride,
-+                     prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride,
-+                     WIDTH, 0, mask);
-+
-+            if (memcmp(dst0, dst1, WIDTH*3)
-+                    || memcmp(prev0, prev1, WIDTH*11)
-+                    || memcmp(next0, next1, WIDTH*11)
-+                    || memcmp( cur0,  cur1, WIDTH*11))
-+                fail();
-+
-+            // No point to benching
-+        }
-+
-+        report("bwdif8.line3");
-+    }
-+
-     {
-         LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]);
-         LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]);
-
-From 27eb1719ee70a8ccaf2acac2161b84e283ac5b1f Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 4 Jul 2023 14:04:45 +0000
-Subject: [PATCH 151/186] avfilter/vf_bwdif: Add neon for filter_line3
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Signed-off-by: John Cox <jc@kynesim.co.uk>
-Signed-off-by: Martin Storsjö <martin@martin.st>
-(cherry picked from commit f00222e81f7d6a59d977fbb280d67989818e0ad2)
----
- libavfilter/aarch64/vf_bwdif_init_aarch64.c |  28 ++
- libavfilter/aarch64/vf_bwdif_neon.S         | 272 ++++++++++++++++++++
- 2 files changed, 300 insertions(+)
-
-diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-index 21e67884ab90..f52bc4b9b431 100644
---- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c
-@@ -36,6 +36,33 @@ void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1,
-                                int prefs3, int mrefs3, int prefs4, int mrefs4,
-                                int parity, int clip_max);
- 
-+void ff_bwdif_filter_line3_neon(void * dst1, int d_stride,
-+                                const void * prev1, const void * cur1, const void * next1, int s_stride,
-+                                int w, int parity, int clip_max);
-+
-+
-+static void filter_line3_helper(void * dst1, int d_stride,
-+                                const void * prev1, const void * cur1, const void * next1, int s_stride,
-+                                int w, int parity, int clip_max)
-+{
-+    // Asm works on 16 byte chunks
-+    // If w is a multiple of 16 then all is good - if not then if width rounded
-+    // up to nearest 16 will fit in both src & dst strides then allow the asm
-+    // to write over the padding bytes as that is almost certainly faster than
-+    // having to invoke the C version to clean up the tail.
-+    const int w1 = FFALIGN(w, 16);
-+    const int w0 = clip_max != 255 ? 0 :
-+                   d_stride <= w1 && s_stride <= w1 ? w : w & ~15;
-+
-+    ff_bwdif_filter_line3_neon(dst1, d_stride,
-+                               prev1, cur1, next1, s_stride,
-+                               w0, parity, clip_max);
-+
-+    if (w0 < w)
-+        ff_bwdif_filter_line3_c((char *)dst1 + w0, d_stride,
-+                                (const char *)prev1 + w0, (const char *)cur1 + w0, (const char *)next1 + w0, s_stride,
-+                                w - w0, parity, clip_max);
-+}
- 
- static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1,
-                                int w, int prefs, int mrefs, int prefs2, int mrefs2,
-@@ -93,5 +120,6 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth)
-     s->filter_intra = filter_intra_helper;
-     s->filter_line  = filter_line_helper;
-     s->filter_edge  = filter_edge_helper;
-+    s->filter_line3 = filter_line3_helper;
- }
- 
-diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S
-index f185e94e3c6f..ae9aab20cdd9 100644
---- a/libavfilter/aarch64/vf_bwdif_neon.S
-+++ b/libavfilter/aarch64/vf_bwdif_neon.S
-@@ -154,6 +154,278 @@ const coeffs, align=4   // align 4 means align on 2^4 boundry
-         .hword          5077, 981                       // sp[0] = v0.h[6]
- endconst
- 
-+// ===========================================================================
-+//
-+// void ff_bwdif_filter_line3_neon(
-+//         void * dst1,         // x0
-+//         int d_stride,        // w1
-+//         const void * prev1,  // x2
-+//         const void * cur1,   // x3
-+//         const void * next1,  // x4
-+//         int s_stride,        // w5
-+//         int w,               // w6
-+//         int parity,          // w7
-+//         int clip_max);       // [sp, #0] (Ignored)
-+
-+function ff_bwdif_filter_line3_neon, export=1
-+        // Sanity check w
-+        cmp             w6, #0
-+        ble             99f
-+
-+        LDR_COEFFS      v0, x17
-+
-+// #define prev2 cur
-+//        const uint8_t * restrict next2 = parity ? prev : next;
-+        cmp             w7, #0
-+        csel            x17, x2, x4, ne
-+
-+        // We want all the V registers - save all the ones we must
-+        PUSH_VREGS
-+
-+        // Some rearrangement of initial values for nice layout of refs in regs
-+        mov             w10, w6                         // w10 = loop count
-+        neg             w9,  w5                         // w9  = mref
-+        lsl             w8,  w9,  #1                    // w8 =  mref2
-+        add             w7,  w9,  w9, LSL #1            // w7  = mref3
-+        lsl             w6,  w9,  #2                    // w6  = mref4
-+        mov             w11, w5                         // w11 = pref
-+        lsl             w12, w5,  #1                    // w12 = pref2
-+        add             w13, w5,  w5, LSL #1            // w13 = pref3
-+        lsl             w14, w5,  #2                    // w14 = pref4
-+        add             w15, w5,  w5, LSL #2            // w15 = pref5
-+        add             w16, w14, w12                   // w16 = pref6
-+
-+        lsl             w5,  w1,  #1                    // w5 = d_stride * 2
-+
-+//         for (x = 0; x < w; x++) {
-+//             int diff0, diff2;
-+//             int d0, d2;
-+//             int temporal_diff0, temporal_diff2;
-+//
-+//             int i1, i2;
-+//             int j1, j2;
-+//             int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4;
-+
-+10:
-+//             c0 = prev2[0] + next2[0];                // c0 = v20, v21
-+//             d0  = c0 >> 1;                           // d0 = v10
-+//             temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11
-+        ldr             q31, [x3]
-+        ldr             q21, [x17]
-+        uhadd           v10.16b, v31.16b, v21.16b
-+        uabd            v11.16b, v31.16b, v21.16b
-+        uaddl           v20.8h,  v21.8b,  v31.8b
-+        uaddl2          v21.8h,  v21.16b, v31.16b
-+
-+        ldr             q31, [x3, w6, sxtw]
-+        ldr             q23, [x17, w6, sxtw]
-+
-+//             i1 = coef_hf[0] * c0;                    // i1 = v2-v5
-+        UMULL4K         v2, v3, v4, v5, v20, v21, v0.h[2]
-+
-+        ldr             q30, [x3, w14, sxtw]
-+        ldr             q25, [x17, w14, sxtw]
-+
-+//             m4 = prev2[mrefs4] + next2[mrefs4];      // m4 = v22,v23
-+        uaddl           v22.8h,  v23.8b,  v31.8b
-+        uaddl2          v23.8h,  v23.16b, v31.16b
-+
-+//             p4 = prev2[prefs4] + next2[prefs4];      // p4 = v24,v25, (p4 >> 1) = v12
-+        uhadd           v12.16b, v25.16b, v30.16b
-+        uaddl           v24.8h,  v25.8b,  v30.8b
-+        uaddl2          v25.8h,  v25.16b, v30.16b
-+
-+//             j1 = -coef_hf[1] * (c0 + p4);            // j1 = v6-v9  (-c0:v20,v21)
-+        add             v20.8h,  v20.8h,  v24.8h
-+        add             v21.8h,  v21.8h,  v25.8h
-+        SMULL4K         v6, v7, v8, v9, v20, v21, v0.h[5]
-+
-+//             m3 = cur[mrefs3];                        // m3 = v20
-+        ldr             q20, [x3, w7, sxtw]
-+
-+//             p3 = cur[prefs3];                        // p3 = v21
-+        ldr             q21, [x3, w13, sxtw]
-+
-+//             i1 += coef_hf[2] * (m4 + p4);            // (-m4:v22,v23) (-p4:v24,v25)
-+        add             v22.8h,  v22.8h,  v24.8h
-+        add             v23.8h,  v23.8h,  v25.8h
-+        UMLAL4K         v2, v3, v4, v5, v22, v23, v0.h[4]
-+
-+        ldr             q29, [x3, w8, sxtw]
-+        ldr             q23, [x17, w8, sxtw]
-+
-+//             i1 -= coef_lf[1] * 4 * (m3 + p3);        // -
-+        uaddl           v30.8h,  v20.8b,  v21.8b
-+        uaddl2          v31.8h,  v20.16b, v21.16b
-+
-+        ldr             q28, [x3, w16, sxtw]
-+        ldr             q25, [x17, w16, sxtw]
-+
-+        UMLSL4K         v2, v3, v4, v5, v30, v31, v0.h[1]
-+
-+//             m2 = prev2[mrefs2] + next2[mrefs2];      // m2 = v22,v23, (m2 >> 1) = v13
-+        uhadd           v13.16b, v23.16b, v29.16b
-+        uaddl           v22.8h,  v23.8b,  v29.8b
-+        uaddl2          v23.8h,  v23.16b, v29.16b
-+
-+        ldr             q31, [x3, w12, sxtw]
-+        ldr             q27, [x17, w12, sxtw]
-+
-+//             p6 = prev2[prefs6] + next2[prefs6];      // p6 = v24,v25
-+        uaddl           v24.8h,  v25.8b,  v28.8b
-+        uaddl2          v25.8h,  v25.16b, v28.16b
-+
-+//             j1 += coef_hf[2] * (m2 + p6);            // (-p6:v24,v25)
-+        add             v24.8h,  v24.8h,  v22.8h
-+        add             v25.8h,  v25.8h,  v23.8h
-+        UMLAL4K         v6, v7, v8, v9, v24, v25, v0.h[4]
-+
-+//             m1 = cur[mrefs];                         // m1 = v24
-+        ldr             q24, [x3, w9, sxtw]
-+
-+//             p5 = cur[prefs5];                        // p5 = v25
-+        ldr             q25, [x3, w15, sxtw]
-+
-+//             p2 = prev2[prefs2] + next2[prefs2];      // p2 = v26, v27
-+//             temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14
-+//             d2  = p2 >> 1;                           // d2 = v15
-+        uabd            v14.16b, v31.16b, v27.16b
-+        uhadd           v15.16b, v31.16b, v27.16b
-+        uaddl           v26.8h,  v27.8b,  v31.8b
-+        uaddl2          v27.8h,  v27.16b, v31.16b
-+
-+//             j1 += coef_hf[0] * p2;                   // -
-+        UMLAL4K         v6, v7, v8, v9, v26, v27, v0.h[2]
-+
-+//             i1 -= coef_hf[1] * (m2 + p2);            // (-m2:v22,v23*) (-p2:v26*,v27*)
-+        add             v22.8h,  v22.8h,  v26.8h
-+        add             v23.8h,  v23.8h,  v27.8h
-+        UMLSL4K         v2, v3, v4, v5, v22, v23, v0.h[3]
-+
-+//             p1 = cur[prefs];                         // p1 = v22
-+        ldr             q22, [x3, w11, sxtw]
-+
-+//             j1 -= coef_lf[1] * 4 * (m1 + p5);        // -
-+        uaddl           v26.8h,  v24.8b,  v25.8b
-+        uaddl2          v27.8h,  v24.16b, v25.16b
-+        UMLSL4K         v6, v7, v8, v9, v26, v27, v0.h[1]
-+
-+//             j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1]  * (m1 + p5)) >> 13; // (-p5:v25*) j2=v16
-+        uaddl           v18.8h,  v22.8b,  v21.8b
-+        uaddl2          v19.8h,  v22.16b, v21.16b
-+        UMULL4K         v28, v29, v30, v31, v18, v19, v0.h[6]
-+
-+        uaddl           v18.8h,  v24.8b,  v25.8b
-+        uaddl2          v19.8h,  v24.16b, v25.16b
-+        UMLSL4K         v28, v29, v30, v31, v18, v19, v0.h[7]
-+
-+        SQSHRUNN        v16, v28, v29, v30, v31, 13
-+
-+//             i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17
-+        uaddl           v18.8h,  v22.8b,  v24.8b
-+        uaddl2          v19.8h,  v22.16b, v24.16b
-+        UMULL4K         v28, v29, v30, v31, v18, v19, v0.h[6]
-+
-+        uaddl           v18.8h,  v20.8b,  v21.8b
-+        uaddl2          v19.8h,  v20.16b, v21.16b
-+        UMLSL4K         v28, v29, v30, v31, v18, v19, v0.h[7]
-+
-+        SQSHRUNN        v17, v28, v29, v30, v31, 13
-+
-+//             i1 += coef_lf[0] * 4 * (m1 + p1);        // p1 = v22, m1 = v24
-+        uaddl           v26.8h,  v24.8b,  v22.8b
-+        uaddl2          v27.8h,  v24.16b, v22.16b
-+        UMLAL4K         v2, v3, v4, v5, v26, v27, v0.h[0]
-+
-+        ldr             q31, [x2, w9, sxtw]
-+        ldr             q29, [x4, w9, sxtw]
-+
-+//             j1 += coef_lf[0] * 4 * (p1 + p3);        // p1 = v22, p3 = v21
-+        uaddl           v26.8h,  v21.8b,  v22.8b
-+        uaddl2          v27.8h,  v21.16b, v22.16b
-+        UMLAL4K         v6, v7, v8, v9, v26, v27, v0.h[0]
-+
-+        ldr             q30, [x2, w11, sxtw]
-+        ldr             q28, [x4, w11, sxtw]
-+
-+//             i1 >>= 15;                               // i1 = v2, -v3, -v4*, -v5*
-+        SQSHRUNN        v2, v2, v3, v4, v5, 15
-+
-+//             j1 >>= 15;                               // j1 = v3, -v6*, -v7*, -v8*, -v9*
-+        SQSHRUNN        v3, v6, v7, v8, v9, 15
-+
-+//             {
-+//                 int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1;
-+//                 int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1;
-+        uabd            v30.16b, v22.16b, v30.16b
-+        uabd            v31.16b, v24.16b, v31.16b
-+        uabd            v28.16b, v22.16b, v28.16b
-+        uabd            v29.16b, v24.16b, v29.16b
-+        uhadd           v31.16b, v31.16b, v30.16b
-+        uhadd           v29.16b, v29.16b, v28.16b
-+
-+        ldr             q27, [x2, w13, sxtw]
-+        ldr             q26, [x4, w13, sxtw]
-+
-+//                 diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18
-+        ushr            v18.16b, v11.16b, #1
-+        umax            v18.16b, v18.16b, v31.16b
-+        umax            v18.16b, v18.16b, v29.16b
-+//             }                                        // v28, v30 preserved for next block
-+//             {  // tdiff2 = v14
-+//                 int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1;
-+//                 int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1;
-+        uabd            v31.16b, v21.16b, v27.16b
-+        uabd            v29.16b, v21.16b, v26.16b
-+        uhadd           v31.16b, v31.16b, v30.16b
-+        uhadd           v29.16b, v29.16b, v28.16b
-+
-+//                 diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v19
-+        ushr            v19.16b, v14.16b, #1
-+        umax            v19.16b, v19.16b, v31.16b
-+        umax            v19.16b, v19.16b, v29.16b
-+//             }
-+
-+        // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15
-+        SPAT_CHECK      v18, v13, v24, v10, v22, v15, v31, v30, v29, v28
-+
-+        //  diff2 = v19, d0 = v10, p1 = v22, d2 = v15, p3 = v21, (p4 >> 1) = v12
-+        SPAT_CHECK      v19, v10, v22, v15, v21, v12, v31, v30, v29, v28
-+
-+        // j1 = v3, j2 = v16, p1 = v22, d2 = v15, p3 = v21, td2 = v14, diff2 = v19
-+        INTERPOL        v3, v3, v16, v22, v15, v21, v14, v19, v31, v30, v29
-+
-+//                 dst[d_stride * 2] = av_clip_uint8(interpol);
-+        str             q3,  [x0, w5, sxtw]
-+
-+//             dst[d_stride] = p1;
-+        str             q22, [x0, w1, sxtw]
-+
-+        // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18
-+        INTERPOL        v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29
-+
-+//                 dst[0] = av_clip_uint8(interpol);
-+        str             q2,  [x0], #16
-+//             }
-+//
-+//             dst++;
-+//             cur++;
-+//             prev++;
-+//             prev2++;
-+//             next++;
-+//         }
-+        subs            w10, w10, #16
-+        add             x2,  x2,  #16
-+        add             x3,  x3,  #16
-+        add             x4,  x4,  #16
-+        add             x17, x17, #16
-+        bgt             10b
-+
-+        POP_VREGS
-+99:
-+        ret
-+endfunc
-+
- // ===========================================================================
- //
- // void filter_line(
-
-From 953a9d1e34140084185e747ebf3f3877b2779267 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 24 Jul 2023 16:39:06 +0100
-Subject: [PATCH 152/186] weak_link: Fix ref count init
-
-(cherry picked from commit d6de45b15a0c96bfdc96bbc441963a60945e5eba)
----
- libavcodec/weak_link.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c
-index f234a985b9c1..5a79e89ed7c2 100644
---- a/libavcodec/weak_link.c
-+++ b/libavcodec/weak_link.c
-@@ -19,6 +19,7 @@ struct ff_weak_link_master * ff_weak_link_new(void * p)
-     struct ff_weak_link_master * w = malloc(sizeof(*w));
-     if (!w)
-         return NULL;
-+    atomic_init(&w->ref_count, 0);
-     w->ptr = p;
-     if (pthread_rwlock_init(&w->lock, NULL)) {
-         free(w);
-
-From 8ffc0497c128a4d5f60849fe56e7f985e6a7ef71 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 24 Jul 2023 17:28:06 +0100
-Subject: [PATCH 153/186] v4l2_m2m: Check fd before attempting to close (fix
- valgrind warn)
-
-(cherry picked from commit befa42878d054d1fba53d5da14406faaae224daf)
----
- libavcodec/v4l2_m2m.c | 7 +++++--
- 1 file changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index 28d9ed49887e..238ceea23546 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -340,8 +340,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
-     ff_v4l2_context_release(&s->output);
- 
-     dmabufs_ctl_unref(&s->db_ctl);
--    close(s->fd);
--    s->fd = -1;
-+
-+    if (s->fd != -1) {
-+        close(s->fd);
-+        s->fd = -1;
-+    }
- 
-     s->self_ref = NULL;
-     // This is only called on avctx close so after this point we don't have that
-
-From 567f293b764431685f83b368736b18cfdc42e29e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 26 Jul 2023 16:29:39 +0000
-Subject: [PATCH 154/186] v4l2_req_devscan: Fix udev leak
-
-(cherry picked from commit 53b17ffd8a8890ef483163f3c9b0f96b437303f1)
----
- libavcodec/v4l2_req_devscan.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_req_devscan.c b/libavcodec/v4l2_req_devscan.c
-index cfa94d55c49b..ee8527ba1f35 100644
---- a/libavcodec/v4l2_req_devscan.c
-+++ b/libavcodec/v4l2_req_devscan.c
-@@ -437,12 +437,14 @@ int devscan_build(void * const dc, struct devscan **pscan)
-     }
- 
-     udev_enumerate_unref(enumerate);
-+    udev_unref(udev);
- 
-     *pscan = scan;
-     return 0;
- 
- fail:
--    udev_unref(udev);
-+    if (udev)
-+        udev_unref(udev);
-     devscan_delete(&scan);
-     return ret;
- }
-
-From 74a85ee6638b4fdedeaadd0d62c6f6c46345ecad Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 26 Jul 2023 16:42:27 +0000
-Subject: [PATCH 155/186] v4l2_m2m: Fix device_ref leak
-
-(cherry picked from commit bfea15c07b4301cd1208981c8f221e5e3a598b34)
----
- libavcodec/v4l2_m2m.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c
-index 238ceea23546..add64b8e63e6 100644
---- a/libavcodec/v4l2_m2m.c
-+++ b/libavcodec/v4l2_m2m.c
-@@ -338,6 +338,7 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv)
-     }
- 
-     ff_v4l2_context_release(&s->output);
-+    av_buffer_unref(&s->device_ref);
- 
-     dmabufs_ctl_unref(&s->db_ctl);
- 
-
-From d018ca7bb4428e622e0acb5f3fa6b3ae6a52873e Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 28 Jul 2023 16:10:01 +0000
-Subject: [PATCH 156/186] v4l2_m2m_dec: Avoid structure init warnings when
- struct changes
-
-(cherry picked from commit 8a836af420ed8c8dba90e2fd88691bcaa0668f8a)
----
- libavcodec/v4l2_m2m_dec.c | 11 ++++++++---
- 1 file changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 11c83b2d6643..584e0b8825f9 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -1004,11 +1004,13 @@ parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
- #if CONFIG_H264_DECODER
-         case AV_CODEC_ID_H264:
-         {
--            H264ParamSets ps = {{NULL}};
-+            H264ParamSets ps;
-             int is_avc = 0;
-             int nal_length_size = 0;
-             int ret;
- 
-+            memset(&ps, 0, sizeof(ps));
-+
-             ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
-                                            &ps, &is_avc, &nal_length_size,
-                                            avctx->err_recognition, avctx);
-@@ -1034,12 +1036,15 @@ parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
- #if CONFIG_HEVC_DECODER
-         case AV_CODEC_ID_HEVC:
-         {
--            HEVCParamSets ps = {{NULL}};
--            HEVCSEI sei = {{{{0}}}};
-+            HEVCParamSets ps;
-+            HEVCSEI sei;
-             int is_nalff = 0;
-             int nal_length_size = 0;
-             int ret;
- 
-+            memset(&ps, 0, sizeof(ps));
-+            memset(&sei, 0, sizeof(sei));
-+
-             ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size,
-                                            &ps, &sei, &is_nalff, &nal_length_size,
-                                            avctx->err_recognition, 0, avctx);
-
-From a466a89ac3c68a4bb41af63760caef0a624c86dd Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Sat, 22 Jul 2023 12:33:50 +0000
-Subject: [PATCH 157/186] v4l2_m2m_dec: Avoid calling get_format if no V4L2
- decoder device
-
-Move the get_format callback to after the decoder device has been found.
-This means that get_format will never be called if there is no chance
-that init will succeed which helps programs (such as VLC) that do
-significant processing in that callback to avoid it. It also means that
-the list of formats availible can actually represent reality.
-
-(cherry picked from commit 3b27cb41d7df73c054452fa49269988d4df32409)
----
- libavcodec/v4l2_context.c |  41 +++++++++++++
- libavcodec/v4l2_context.h |  13 ++++
- libavcodec/v4l2_m2m_dec.c | 122 ++++++++++++++++++++++++++++----------
- 3 files changed, 145 insertions(+), 31 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 79a31cf9300b..978a487ca98d 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -1064,6 +1064,47 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout)
-     return 0;
- }
- 
-+// Return 0 terminated list of drm fourcc video formats for this context
-+// NULL if none found or error
-+// Returned list is malloced so must be freed
-+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN)
-+{
-+    unsigned int i;
-+    unsigned int n = 0;
-+    unsigned int size = 0;
-+    uint32_t * e = NULL;
-+    *pN = 0;
-+
-+    for (i = 0; i < 1024; ++i) {
-+        struct v4l2_fmtdesc fdesc = {
-+            .index = i,
-+            .type = ctx->type
-+        };
-+
-+        if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc))
-+            return e;
-+
-+        if (n + 1 >= size) {
-+            unsigned int newsize = (size == 0) ? 16 : size * 2;
-+            uint32_t * t = av_realloc(e, newsize * sizeof(*t));
-+            if (!t)
-+                return e;
-+            e = t;
-+            size = newsize;
-+        }
-+
-+        e[n] = fdesc.pixelformat;
-+        e[++n] = 0;
-+        if (pN)
-+            *pN = n;
-+    }
-+
-+    // If we've looped 1024 times we are clearly confused
-+    *pN = 0;
-+    av_free(e);
-+    return NULL;
-+}
-+
- int ff_v4l2_context_get_format(V4L2Context* ctx, int probe)
- {
-     struct v4l2_format_update fmt = { 0 };
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index 5afed3e6ecb4..f4240f7dddb2 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -151,6 +151,19 @@ int ff_v4l2_context_set_format(V4L2Context* ctx);
-  */
- int ff_v4l2_context_get_format(V4L2Context* ctx, int probe);
- 
-+/**
-+ * Get the list of drm fourcc pixel formats for this context
-+ *
-+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context
-+ *       description for required variables.
-+ * @param[in] pN A pointer to receive the number of formats
-+ *       found. May be NULL if not wanted.
-+ * @return Pointer to malloced list of zero terminated formats,
-+ *         NULL if none or error. As list is malloced it must be
-+ *         freed.
-+ */
-+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN);
-+
- /**
-  * Releases a V4L2Context.
-  *
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index 584e0b8825f9..c4f38cc24e1f 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -873,10 +873,9 @@ check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s)
- };
- 
- static int
--check_size(AVCodecContext * const avctx, V4L2m2mContext * const s)
-+check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc)
- {
-     unsigned int i;
--    const uint32_t fcc = ff_v4l2_get_format_pixelformat(&s->capture.format);
-     const uint32_t w = avctx->coded_width;
-     const uint32_t h = avctx->coded_height;
- 
-@@ -1073,12 +1072,91 @@ parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s)
-     }
- }
- 
-+static int
-+choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
-+{
-+    const V4L2m2mPriv * const priv = avctx->priv_data;
-+    unsigned int fmts_n;
-+    uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n);
-+    enum AVPixelFormat *fmts2 = NULL;
-+    enum AVPixelFormat t;
-+    enum AVPixelFormat gf_pix_fmt;
-+    unsigned int i;
-+    unsigned int n = 0;
-+    unsigned int pref_n = 1;
-+    int rv = AVERROR(ENOENT);
-+
-+    if (!fmts)
-+        return AVERROR(ENOENT);
-+
-+    if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 2))) == NULL) {
-+        rv = AVERROR(ENOMEM);
-+        goto error;
-+    }
-+
-+    // Filter for formats that are supported by ffmpeg and
-+    // can accomodate the stream size
-+    fmts2[n++] = AV_PIX_FMT_DRM_PRIME;
-+    for (i = 0; i != fmts_n; ++i) {
-+        const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO);
-+        if (f == AV_PIX_FMT_NONE)
-+            continue;
-+
-+        if (check_size(avctx, s, fmts[i]) != 0)
-+            continue;
-+
-+        if (f == priv->pix_fmt)
-+            pref_n = n;
-+        fmts2[n++] = f;
-+    }
-+    fmts2[n] = AV_PIX_FMT_NONE;
-+
-+    if (n < 2) {
-+        av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__);
-+        goto error;
-+    }
-+
-+    // Put preferred s/w format at the end - ff_get_format will put it in sw_pix_fmt
-+    t = fmts2[n - 1];
-+    fmts2[n - 1] = fmts2[pref_n];
-+    fmts2[pref_n] = t;
-+
-+    gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
-+    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
-+           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
-+           avctx->coded_width, avctx->coded_height,
-+           gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
-+
-+    if (gf_pix_fmt == AV_PIX_FMT_NONE)
-+        goto error;
-+
-+    if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
-+        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
-+        s->capture.av_pix_fmt = avctx->sw_pix_fmt;
-+        s->output_drm = 1;
-+    }
-+    else {
-+        avctx->pix_fmt = gf_pix_fmt;
-+        s->capture.av_pix_fmt = gf_pix_fmt;
-+        s->output_drm = 0;
-+    }
-+
-+    // Get format converts capture.av_pix_fmt back into a V4L2 format in the context
-+    if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0)
-+        goto error;
-+    rv = ff_v4l2_context_set_format(&s->capture);
-+
-+error:
-+    av_free(fmts2);
-+    av_free(fmts);
-+    return rv;
-+}
-+
- static av_cold int v4l2_decode_init(AVCodecContext *avctx)
- {
-     V4L2Context *capture, *output;
-     V4L2m2mContext *s;
-     V4L2m2mPriv *priv = avctx->priv_data;
--    int gf_pix_fmt;
-     int ret;
- 
-     av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
-@@ -1122,28 +1200,8 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-     capture->av_pix_fmt = avctx->pix_fmt;
-     capture->min_buf_size = 0;
- 
--    /* the client requests the codec to generate DRM frames:
--     *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
--     *       check the ff_v4l2_buffer_to_avframe conversion function.
--     *   - the DRM frame format is passed in the DRM frame descriptor layer.
--     *       check the v4l2_get_drm_frame function.
--     */
--
--    avctx->sw_pix_fmt = avctx->pix_fmt;
--    gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
--    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
--           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
--           avctx->coded_width, avctx->coded_height,
--           gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
--
--    if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
--        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
--        s->output_drm = 1;
--    }
--    else {
--        capture->av_pix_fmt = gf_pix_fmt;
--        s->output_drm = 0;
--    }
-+    capture->av_pix_fmt = AV_PIX_FMT_NONE;
-+    s->output_drm = 0;
- 
-     s->db_ctl = NULL;
-     if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) {
-@@ -1185,19 +1243,21 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
-         return ret;
-     }
- 
--    if ((ret = v4l2_prepare_decoder(s)) < 0)
--        return ret;
--
-     if ((ret = get_quirks(avctx, s)) != 0)
-         return ret;
- 
--    if ((ret = check_size(avctx, s)) != 0)
--        return ret;
--
-     if ((ret = check_profile(avctx, s)) != 0) {
-         av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile);
-         return ret;
-     }
-+
-+    // Size check done as part of format filtering
-+    if ((ret = choose_capture_format(avctx, s)) != 0)
-+        return ret;
-+
-+    if ((ret = v4l2_prepare_decoder(s)) < 0)
-+        return ret;
-+
-     return 0;
- }
- 
-
-From ff30fb1c60c9753eb42d107f083bdadea7918ebe Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 10 Aug 2023 06:26:35 +0000
-Subject: [PATCH 158/186] v4l2_req_dmabufs: Fix crash on free if dmabuf
- imported
-
-Thanks to Ratchanan Srirattanamet for finding this
----
- libavcodec/v4l2_req_dmabufs.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
-index acc0366e7630..017c3892a593 100644
---- a/libavcodec/v4l2_req_dmabufs.c
-+++ b/libavcodec/v4l2_req_dmabufs.c
-@@ -232,7 +232,8 @@ void dmabuf_free(struct dmabuf_h * dh)
-     request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
- #endif
- 
--    dh->fns->buf_free(dh);
-+    if (dh->fns != NULL && dh->fns->buf_free)
-+        dh->fns->buf_free(dh);
- 
-     if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL)
-         munmap(dh->mapptr, dh->size);
-
-From 7342db8c6a46af0ebe38369cb1cce6eacaeed528 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 10 Aug 2023 06:34:47 +0000
-Subject: [PATCH 159/186] aarch64/rgb2rgb_neon: Fix bgr24->yuv matrix read to
- flip correct way
-
----
- libswscale/aarch64/rgb2rgb_neon.S | 15 ++++++---------
- 1 file changed, 6 insertions(+), 9 deletions(-)
-
-diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
-index 476ca723a0ef..077d1dd5938a 100644
---- a/libswscale/aarch64/rgb2rgb_neon.S
-+++ b/libswscale/aarch64/rgb2rgb_neon.S
-@@ -92,15 +92,12 @@ endfunc
- 
- function ff_rgb24toyv12_aarch64, export=1
-         ldr             x15, [sp, #8]
--        ld1             {v3.s}[2], [x15], #4
--        ld1             {v3.s}[1], [x15], #4
--        ld1             {v3.s}[0], [x15], #4
--        ld1             {v4.s}[2], [x15], #4
--        ld1             {v4.s}[1], [x15], #4
--        ld1             {v4.s}[0], [x15], #4
--        ld1             {v5.s}[2], [x15], #4
--        ld1             {v5.s}[1], [x15], #4
--        ld1             {v5.s}[0], [x15]
-+        ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
-+        ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
-+        ld3             {v3.s, v4.s, v5.s}[2], [x15]
-+        mov             v6.16b, v3.16b
-+        mov             v3.16b, v5.16b
-+        mov             v5.16b, v6.16b
-         b               99f
- endfunc
- 
-
-From 2de40aab0aa4e2ee9be96b60efda8f5e99a4e445 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 10 Aug 2023 06:36:51 +0000
-Subject: [PATCH 160/186] aarch64/rgb2rgb_neon: Add macros to make common code
- explicit
-
----
- libswscale/aarch64/rgb2rgb_neon.S | 276 ++++++++++--------------------
- 1 file changed, 95 insertions(+), 181 deletions(-)
-
-diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
-index 077d1dd5938a..0956800b4185 100644
---- a/libswscale/aarch64/rgb2rgb_neon.S
-+++ b/libswscale/aarch64/rgb2rgb_neon.S
-@@ -78,6 +78,67 @@ function ff_interleave_bytes_neon, export=1
-         ret
- endfunc
- 
-+// Expand rgb2 into r0+r1/g0+g1/b0+b1
-+.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2
-+        uxtl            \r0\().8h, \r2\().8b
-+        uxtl            \g0\().8h, \g2\().8b
-+        uxtl            \b0\().8h, \b2\().8b
-+
-+        uxtl2           \r1\().8h, \r2\().16b
-+        uxtl2           \g1\().8h, \g2\().16b
-+        uxtl2           \b1\().8h, \b2\().16b
-+.endm
-+
-+// Expand rgb2 into r0+r1/g0+g1/b0+b1
-+// and pick every other el to put back into rgb2 for chroma
-+.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2
-+        XRGB3Y          \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2
-+
-+        bic             \r2\().8h, #0xff, LSL #8
-+        bic             \g2\().8h, #0xff, LSL #8
-+        bic             \b2\().8h, #0xff, LSL #8
-+.endm
-+
-+.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2
-+        smull           \d0\().4s, \s0\().4h, \c0
-+        smlal           \d0\().4s, \s1\().4h, \c1
-+        smlal           \d0\().4s, \s2\().4h, \c2
-+        smull2          \d1\().4s, \s0\().8h, \c0
-+        smlal2          \d1\().4s, \s1\().8h, \c1
-+        smlal2          \d1\().4s, \s2\().8h, \c2
-+.endm
-+
-+// d0 may be s0
-+// s0, s2 corrupted
-+.macro SHRN_Y d0, s0, s1, s2, s3, k128h
-+        shrn            \s0\().4h, \s0\().4s, #12
-+        shrn2           \s0\().8h, \s1\().4s, #12
-+        add             \s0\().8h, \s0\().8h, \k128h\().8h     // +128 (>> 3 = 16)
-+        sqrshrun        \d0\().8b, \s0\().8h, #3
-+        shrn            \s2\().4h, \s2\().4s, #12
-+        shrn2           \s2\().8h, \s3\().4s, #12
-+        add             \s2\().8h, \s2\().8h, \k128h\().8h
-+        sqrshrun2       \d0\().16b, v28.8h, #3
-+.endm
-+
-+.macro SHRN_C d0, s0, s1, k128b
-+        shrn            \s0\().4h, \s0\().4s, #14
-+        shrn2           \s0\().8h, \s1\().4s, #14
-+        sqrshrn         \s0\().8b, \s0\().8h, #1
-+        add             \d0\().8b, \s0\().8b, \k128b\().8b     // +128
-+.endm
-+
-+.macro STB2V s0, n, a
-+        st1             {\s0\().b}[(\n+0)], [\a], #1
-+        st1             {\s0\().b}[(\n+1)], [\a], #1
-+.endm
-+
-+.macro STB4V s0, n, a
-+        STB2V           \s0, (\n+0), \a
-+        STB2V           \s0, (\n+2), \a
-+.endm
-+
-+
- // void ff_rgb24toyv12_aarch64(
- //              const uint8_t *src,             // x0
- //              uint8_t *ydst,                  // x1
-@@ -111,7 +172,7 @@ endfunc
- //              int lumStride,                  // w6
- //              int chromStride,                // w7
- //              int srcStr,                     // [sp, #0]
--//              int32_t *rgb2yuv);              // [sp, #8]
-+//              int32_t *rgb2yuv);              // [sp, #8] (including Mac)
- 
- // regs
- // v0-2         Src bytes - reused as chroma src
-@@ -130,13 +191,12 @@ endfunc
- // v30          V out
- // v31          V tmp
- 
--// Assumes Little Endian in tail stores & conversion matrix
--
- function ff_bgr24toyv12_aarch64, export=1
-         ldr             x15, [sp, #8]
-         ld3             {v3.s, v4.s, v5.s}[0], [x15], #12
-         ld3             {v3.s, v4.s, v5.s}[1], [x15], #12
-         ld3             {v3.s, v4.s, v5.s}[2], [x15]
-+
- 99:
-         ldr             w14, [sp, #0]
-         movi            v7.8b, #128
-@@ -167,73 +227,29 @@ function ff_bgr24toyv12_aarch64, export=1
-         b.le            13f
- 
- 10:
--        uxtl            v16.8h, v0.8b
--        uxtl            v17.8h, v1.8b
--        uxtl            v18.8h, v2.8b
--
--        uxtl2           v20.8h, v0.16b
--        uxtl2           v21.8h, v1.16b
--        uxtl2           v22.8h, v2.16b
--
--        bic             v0.8h, #0xff, LSL #8
--        bic             v1.8h, #0xff, LSL #8
--        bic             v2.8h, #0xff, LSL #8
-+        XRGB3YC         v16, v17, v18,  v20, v21, v22,  v0, v1, v2
- 
-         // Testing shows it is faster to stack the smull/smlal ops together
-         // rather than interleave them between channels and indeed even the
-         // shift/add sections seem happier not interleaved
- 
-         // Y0
--        smull           v26.4s, v16.4h, v3.h[0]
--        smlal           v26.4s, v17.4h, v4.h[0]
--        smlal           v26.4s, v18.4h, v5.h[0]
--        smull2          v27.4s, v16.8h, v3.h[0]
--        smlal2          v27.4s, v17.8h, v4.h[0]
--        smlal2          v27.4s, v18.8h, v5.h[0]
-+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
-         // Y1
--        smull           v28.4s, v20.4h, v3.h[0]
--        smlal           v28.4s, v21.4h, v4.h[0]
--        smlal           v28.4s, v22.4h, v5.h[0]
--        smull2          v29.4s, v20.8h, v3.h[0]
--        smlal2          v29.4s, v21.8h, v4.h[0]
--        smlal2          v29.4s, v22.8h, v5.h[0]
--        shrn            v26.4h, v26.4s, #12
--        shrn2           v26.8h, v27.4s, #12
--        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
--        sqrshrun        v26.8b, v26.8h, #3
--        shrn            v28.4h, v28.4s, #12
--        shrn2           v28.8h, v29.4s, #12
--        add             v28.8h, v28.8h, v6.8h
--        sqrshrun2       v26.16b, v28.8h, #3
--        // Y0/Y1
-+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
-+        SHRN_Y          v26, v26, v27, v28, v29, v6
- 
-         // U
-         // Vector subscript *2 as we loaded into S but are only using H
--        smull           v24.4s, v0.4h, v3.h[2]
--        smlal           v24.4s, v1.4h, v4.h[2]
--        smlal           v24.4s, v2.4h, v5.h[2]
--        smull2          v25.4s, v0.8h, v3.h[2]
--        smlal2          v25.4s, v1.8h, v4.h[2]
--        smlal2          v25.4s, v2.8h, v5.h[2]
-+        SMLAL3          v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
- 
-         // V
--        smull           v30.4s, v0.4h, v3.h[4]
--        smlal           v30.4s, v1.4h, v4.h[4]
--        smlal           v30.4s, v2.4h, v5.h[4]
--        smull2          v31.4s, v0.8h, v3.h[4]
--        smlal2          v31.4s, v1.8h, v4.h[4]
--        smlal2          v31.4s, v2.8h, v5.h[4]
-+        SMLAL3          v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
- 
-         ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
- 
--        shrn            v24.4h, v24.4s, #14
--        shrn2           v24.8h, v25.4s, #14
--        sqrshrn         v24.8b, v24.8h, #1
--        add             v24.8b, v24.8b, v7.8b     // +128
--        shrn            v30.4h, v30.4s, #14
--        shrn2           v30.8h, v31.4s, #14
--        sqrshrn         v30.8b, v30.8h, #1
--        add             v30.8b, v30.8b, v7.8b     // +128
-+        SHRN_C          v24, v24, v25, v7
-+        SHRN_C          v30, v30, v31, v7
- 
-         subs            w9, w9, #16
- 
-@@ -250,69 +266,21 @@ function ff_bgr24toyv12_aarch64, export=1
- 13:
-         // Body is simple copy of main loop body minus preload
- 
--        uxtl            v16.8h, v0.8b
--        uxtl            v17.8h, v1.8b
--        uxtl            v18.8h, v2.8b
--
--        uxtl2           v20.8h, v0.16b
--        uxtl2           v21.8h, v1.16b
--        uxtl2           v22.8h, v2.16b
--
--        bic             v0.8h, #0xff, LSL #8
--        bic             v1.8h, #0xff, LSL #8
--        bic             v2.8h, #0xff, LSL #8
--
-+        XRGB3YC         v16, v17, v18,  v20, v21, v22,  v0, v1, v2
-         // Y0
--        smull           v26.4s, v16.4h, v3.h[0]
--        smlal           v26.4s, v17.4h, v4.h[0]
--        smlal           v26.4s, v18.4h, v5.h[0]
--        smull2          v27.4s, v16.8h, v3.h[0]
--        smlal2          v27.4s, v17.8h, v4.h[0]
--        smlal2          v27.4s, v18.8h, v5.h[0]
-+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
-         // Y1
--        smull           v28.4s, v20.4h, v3.h[0]
--        smlal           v28.4s, v21.4h, v4.h[0]
--        smlal           v28.4s, v22.4h, v5.h[0]
--        smull2          v29.4s, v20.8h, v3.h[0]
--        smlal2          v29.4s, v21.8h, v4.h[0]
--        smlal2          v29.4s, v22.8h, v5.h[0]
--        shrn            v26.4h, v26.4s, #12
--        shrn2           v26.8h, v27.4s, #12
--        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
--        sqrshrun        v26.8b, v26.8h, #3
--        shrn            v28.4h, v28.4s, #12
--        shrn2           v28.8h, v29.4s, #12
--        add             v28.8h, v28.8h, v6.8h
--        sqrshrun2       v26.16b, v28.8h, #3
--        // Y0/Y1
--
-+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
-+        SHRN_Y          v26, v26, v27, v28, v29, v6
-         // U
--        // Vector subscript *2 as we loaded into S but are only using H
--        smull           v24.4s, v0.4h, v3.h[2]
--        smlal           v24.4s, v1.4h, v4.h[2]
--        smlal           v24.4s, v2.4h, v5.h[2]
--        smull2          v25.4s, v0.8h, v3.h[2]
--        smlal2          v25.4s, v1.8h, v4.h[2]
--        smlal2          v25.4s, v2.8h, v5.h[2]
--
-+        SMLAL3          v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2]
-         // V
--        smull           v30.4s, v0.4h, v3.h[4]
--        smlal           v30.4s, v1.4h, v4.h[4]
--        smlal           v30.4s, v2.4h, v5.h[4]
--        smull2          v31.4s, v0.8h, v3.h[4]
--        smlal2          v31.4s, v1.8h, v4.h[4]
--        smlal2          v31.4s, v2.8h, v5.h[4]
-+        SMLAL3          v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4]
- 
-         cmp             w9, #-16
- 
--        shrn            v24.4h, v24.4s, #14
--        shrn2           v24.8h, v25.4s, #14
--        sqrshrn         v24.8b, v24.8h, #1
--        add             v24.8b, v24.8b, v7.8b     // +128
--        shrn            v30.4h, v30.4s, #14
--        shrn2           v30.8h, v31.4s, #14
--        sqrshrn         v30.8b, v30.8h, #1
--        add             v30.8b, v30.8b, v7.8b     // +128
-+        SHRN_C          v24, v24, v25, v7
-+        SHRN_C          v30, v30, v31, v7
- 
-         // Here:
-         // w9 == 0      width % 16 == 0, tail done
-@@ -347,14 +315,14 @@ function ff_bgr24toyv12_aarch64, export=1
- 2:
-         tbz             w9, #3, 1f
-         st1             {v26.8b},    [x11], #8
--        st1             {v24.s}[0],  [x12], #4
--        st1             {v30.s}[0],  [x13], #4
-+        STB4V           v24, 0, x12
-+        STB4V           v30, 0, x13
- 1:      tbz             w9, #2, 1f
--        st1             {v26.s}[2],  [x11], #4
--        st1             {v24.h}[2],  [x12], #2
--        st1             {v30.h}[2],  [x13], #2
-+        STB4V           v26  8, x11
-+        STB2V           v24, 4, x12
-+        STB2V           v30, 4, x13
- 1:      tbz             w9, #1, 1f
--        st1             {v26.h}[6],  [x11], #2
-+        STB2V           v26, 12, x11
-         st1             {v24.b}[6],  [x12], #1
-         st1             {v30.b}[6],  [x13], #1
- 1:      tbz             w9, #0, 1f
-@@ -381,44 +349,15 @@ function ff_bgr24toyv12_aarch64, export=1
-         b.le            13f
- 
- 10:
--        uxtl            v16.8h, v0.8b
--        uxtl            v17.8h, v1.8b
--        uxtl            v18.8h, v2.8b
--
--        uxtl2           v20.8h, v0.16b
--        uxtl2           v21.8h, v1.16b
--        uxtl2           v22.8h, v2.16b
--
--        // Testing shows it is faster to stack the smull/smlal ops together
--        // rather than interleave them between channels and indeed even the
--        // shift/add sections seem happier not interleaved
--
-+        XRGB3Y          v16, v17, v18,  v20, v21, v22,  v0, v1, v2
-         // Y0
--        smull           v26.4s, v16.4h, v3.h[0]
--        smlal           v26.4s, v17.4h, v4.h[0]
--        smlal           v26.4s, v18.4h, v5.h[0]
--        smull2          v27.4s, v16.8h, v3.h[0]
--        smlal2          v27.4s, v17.8h, v4.h[0]
--        smlal2          v27.4s, v18.8h, v5.h[0]
-+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
-         // Y1
--        smull           v28.4s, v20.4h, v3.h[0]
--        smlal           v28.4s, v21.4h, v4.h[0]
--        smlal           v28.4s, v22.4h, v5.h[0]
--        smull2          v29.4s, v20.8h, v3.h[0]
--        smlal2          v29.4s, v21.8h, v4.h[0]
--        smlal2          v29.4s, v22.8h, v5.h[0]
-+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
- 
-         ld3             {v0.16b, v1.16b, v2.16b}, [x10], #48
- 
--        shrn            v26.4h, v26.4s, #12
--        shrn2           v26.8h, v27.4s, #12
--        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
--        sqrshrun        v26.8b, v26.8h, #3
--        shrn            v28.4h, v28.4s, #12
--        shrn2           v28.8h, v29.4s, #12
--        add             v28.8h, v28.8h, v6.8h
--        sqrshrun2       v26.16b, v28.8h, #3
--        // Y0/Y1
-+        SHRN_Y          v26, v26, v27, v28, v29, v6
- 
-         subs            w9, w9, #16
- 
-@@ -433,40 +372,15 @@ function ff_bgr24toyv12_aarch64, export=1
- 13:
-         // Body is simple copy of main loop body minus preload
- 
--        uxtl            v16.8h, v0.8b
--        uxtl            v17.8h, v1.8b
--        uxtl            v18.8h, v2.8b
--
--        uxtl2           v20.8h, v0.16b
--        uxtl2           v21.8h, v1.16b
--        uxtl2           v22.8h, v2.16b
--
-+        XRGB3Y          v16, v17, v18,  v20, v21, v22,  v0, v1, v2
-         // Y0
--        smull           v26.4s, v16.4h, v3.h[0]
--        smlal           v26.4s, v17.4h, v4.h[0]
--        smlal           v26.4s, v18.4h, v5.h[0]
--        smull2          v27.4s, v16.8h, v3.h[0]
--        smlal2          v27.4s, v17.8h, v4.h[0]
--        smlal2          v27.4s, v18.8h, v5.h[0]
-+        SMLAL3          v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0]
-         // Y1
--        smull           v28.4s, v20.4h, v3.h[0]
--        smlal           v28.4s, v21.4h, v4.h[0]
--        smlal           v28.4s, v22.4h, v5.h[0]
--        smull2          v29.4s, v20.8h, v3.h[0]
--        smlal2          v29.4s, v21.8h, v4.h[0]
--        smlal2          v29.4s, v22.8h, v5.h[0]
-+        SMLAL3          v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0]
- 
-         cmp             w9, #-16
- 
--        shrn            v26.4h, v26.4s, #12
--        shrn2           v26.8h, v27.4s, #12
--        add             v26.8h, v26.8h, v6.8h     // +128 (>> 3 = 16)
--        sqrshrun        v26.8b, v26.8h, #3
--        shrn            v28.4h, v28.4s, #12
--        shrn2           v28.8h, v29.4s, #12
--        add             v28.8h, v28.8h, v6.8h
--        sqrshrun2       v26.16b, v28.8h, #3
--        // Y0/Y1
-+        SHRN_Y          v26, v26, v27, v28, v29, v6
- 
-         // Here:
-         // w9 == 0      width % 16 == 0, tail done
-@@ -500,9 +414,9 @@ function ff_bgr24toyv12_aarch64, export=1
-         tbz             w9, #3, 1f
-         st1             {v26.8b},    [x11], #8
- 1:      tbz             w9, #2, 1f
--        st1             {v26.s}[2],  [x11], #4
-+        STB4V           v26, 8,  x11
- 1:      tbz             w9, #1, 1f
--        st1             {v26.h}[6],  [x11], #2
-+        STB2V           v26, 12, x11
- 1:      tbz             w9, #0, 1f
-         st1             {v26.b}[14], [x11]
- 1:
-
-From d711e7b9dde2ff557dc28f7456a6986db32ed52b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 10 Aug 2023 08:11:21 +0000
-Subject: [PATCH 161/186] v4l2_req_media: Fix dmabuf fd leak in MMAP mode
-
----
- libavcodec/v4l2_req_media.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c
-index 1a9944774a48..0394bb2b23f3 100644
---- a/libavcodec/v4l2_req_media.c
-+++ b/libavcodec/v4l2_req_media.c
-@@ -1205,8 +1205,10 @@ qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be,
-                     .plane = i,
-                     .flags = O_RDWR, // *** Arguably O_RDONLY would be fine
-                 };
--                if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0)
-+                if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) {
-                     be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length);
-+                    close(xbuf.fd); // dmabuf_import dups the fd so close this one
-+                }
-             }
-             else {
-                 be->dh[i] = dmabuf_import_mmap(
-
-From 3b3a95a51be4c5187808b55d41d98c71a67b91f6 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 6 Sep 2023 14:36:41 +0100
-Subject: [PATCH 162/186] v4l2m2m_dec: Having calculated available pixfmt
- actually pass them to user
-
----
- libavcodec/v4l2_m2m_dec.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index c4f38cc24e1f..f67dd23ba1cc 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -1099,6 +1099,7 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
-     fmts2[n++] = AV_PIX_FMT_DRM_PRIME;
-     for (i = 0; i != fmts_n; ++i) {
-         const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO);
-+        av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f));
-         if (f == AV_PIX_FMT_NONE)
-             continue;
- 
-@@ -1121,7 +1122,7 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
-     fmts2[n - 1] = fmts2[pref_n];
-     fmts2[pref_n] = t;
- 
--    gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
-+    gf_pix_fmt = ff_get_format(avctx, fmts2);
-     av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
-            avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt),
-            avctx->coded_width, avctx->coded_height,
-
-From 1760b9854cda4fd94d19d2529a71134c69809b98 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 6 Sep 2023 14:45:16 +0100
-Subject: [PATCH 163/186] v4l2m2m: Simplify reinit - also fixes fmt selection
-
----
- libavcodec/v4l2_context.c | 41 +++++++++++++++------------------------
- 1 file changed, 16 insertions(+), 25 deletions(-)
-
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index 978a487ca98d..ed126f8f2b35 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -28,6 +28,7 @@
- #include <fcntl.h>
- #include <poll.h>
- #include "libavutil/avassert.h"
-+#include "libavutil/pixdesc.h"
- #include "libavcodec/avcodec.h"
- #include "decode.h"
- #include "v4l2_buffers.h"
-@@ -357,13 +358,23 @@ static int do_source_change(V4L2m2mContext * const s)
- 
-     s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
- 
--    av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n",
-+    av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n",
-+           av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)),
-            s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
-            s->capture.width, s->capture.height,
-            s->capture.selection.width, s->capture.selection.height,
-            s->capture.selection.left, s->capture.selection.top, reinit);
- 
--    if (reinit) {
-+    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
-+    if (ret)
-+        av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
-+    s->draining = 0;
-+
-+    if (!reinit) {
-+        /* Buffers are OK so just stream off to ack */
-+        av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__);
-+    }
-+    else {
-         if (avctx)
-             ret = ff_set_dimensions(s->avctx,
-                                     s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width,
-@@ -371,11 +382,7 @@ static int do_source_change(V4L2m2mContext * const s)
-         if (ret < 0)
-             av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
- 
--        ret = ff_v4l2_m2m_codec_reinit(s);
--        if (ret) {
--            av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n");
--            return AVERROR(EINVAL);
--        }
-+        ff_v4l2_context_release(&s->capture);
- 
-         if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) ||
-             s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) {
-@@ -388,26 +395,10 @@ static int do_source_change(V4L2m2mContext * const s)
-         // Update pixel format - should only actually do something on initial change
-         s->capture.av_pix_fmt =
-             ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO);
--        if (s->output_drm) {
--            avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
--            avctx->sw_pix_fmt = s->capture.av_pix_fmt;
--        }
--        else
--            avctx->pix_fmt = s->capture.av_pix_fmt;
--
--        goto reinit_run;
-+        avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt;
-+        avctx->sw_pix_fmt = s->capture.av_pix_fmt;
-     }
- 
--    /* Buffers are OK so just stream off to ack */
--    av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__);
--
--    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
--    if (ret)
--        av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
--    s->draining = 0;
--
--    /* reinit executed */
--reinit_run:
-     ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON);
-     return 1;
- }
-
-From caf47c030e05c78d18db72d3ba979b933c0579e5 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 8 Sep 2023 12:13:38 +0000
-Subject: [PATCH 164/186] v4l2: Add (more) RGB formats to DRM & V4L2
-
----
- libavcodec/v4l2_buffers.c | 33 +++++++++++++++++++++++++++++++++
- libavcodec/v4l2_fmt.c     |  8 ++++++++
- 2 files changed, 41 insertions(+)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index 8d80d1978830..e7b57322162f 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -390,6 +390,39 @@ static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
-     }
- 
-     switch (avbuf->context->av_pix_fmt) {
-+    case AV_PIX_FMT_0BGR:
-+        layer->format = DRM_FORMAT_RGBX8888;
-+        break;
-+    case AV_PIX_FMT_RGB0:
-+        layer->format = DRM_FORMAT_XBGR8888;
-+        break;
-+    case AV_PIX_FMT_0RGB:
-+        layer->format = DRM_FORMAT_BGRX8888;
-+        break;
-+    case AV_PIX_FMT_BGR0:
-+        layer->format = DRM_FORMAT_XRGB8888;
-+        break;
-+
-+    case AV_PIX_FMT_ABGR:
-+        layer->format = DRM_FORMAT_RGBA8888;
-+        break;
-+    case AV_PIX_FMT_RGBA:
-+        layer->format = DRM_FORMAT_ABGR8888;
-+        break;
-+    case AV_PIX_FMT_ARGB:
-+        layer->format = DRM_FORMAT_BGRA8888;
-+        break;
-+    case AV_PIX_FMT_BGRA:
-+        layer->format = DRM_FORMAT_ARGB8888;
-+        break;
-+
-+    case AV_PIX_FMT_BGR24:
-+        layer->format = DRM_FORMAT_BGR888;
-+        break;
-+    case AV_PIX_FMT_RGB24:
-+        layer->format = DRM_FORMAT_RGB888;
-+        break;
-+
-     case AV_PIX_FMT_YUYV422:
- 
-         layer->format = DRM_FORMAT_YUYV;
-diff --git a/libavcodec/v4l2_fmt.c b/libavcodec/v4l2_fmt.c
-index 6df47e3f5a3c..c820a1d5227b 100644
---- a/libavcodec/v4l2_fmt.c
-+++ b/libavcodec/v4l2_fmt.c
-@@ -42,6 +42,14 @@ static const struct fmt_conversion {
-     { AV_FMT(RGB24),       AV_CODEC(RAWVIDEO),    V4L2_FMT(RGB24) },
-     { AV_FMT(BGR0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGR32) },
-     { AV_FMT(0RGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGB32) },
-+    { AV_FMT(BGR0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGRX32) },
-+    { AV_FMT(RGB0),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGBX32) },
-+    { AV_FMT(0BGR),        AV_CODEC(RAWVIDEO),    V4L2_FMT(XBGR32) },
-+    { AV_FMT(0RGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(XRGB32) },
-+    { AV_FMT(BGRA),        AV_CODEC(RAWVIDEO),    V4L2_FMT(BGRA32) },
-+    { AV_FMT(RGBA),        AV_CODEC(RAWVIDEO),    V4L2_FMT(RGBA32) },
-+    { AV_FMT(ABGR),        AV_CODEC(RAWVIDEO),    V4L2_FMT(ABGR32) },
-+    { AV_FMT(ARGB),        AV_CODEC(RAWVIDEO),    V4L2_FMT(ARGB32) },
-     { AV_FMT(GRAY8),       AV_CODEC(RAWVIDEO),    V4L2_FMT(GREY) },
-     { AV_FMT(YUV420P),     AV_CODEC(RAWVIDEO),    V4L2_FMT(YUV420) },
-     { AV_FMT(YUYV422),     AV_CODEC(RAWVIDEO),    V4L2_FMT(YUYV) },
-
-From 975422cbef7048a32ef0f8768a07a96d2d63a42d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 24 Oct 2023 12:54:02 +0100
-Subject: [PATCH 165/186] dmabuf: Use vidbuf_cached for dmabuf allocation
-
-Gates usage to kernel 6.1.57 and later as that is when the rpivid iommu
-patch was merged.
-
-(cherry picked from commit 9a898f4ea127b30f1ca81eb98dfba3dd101db179)
----
- libavcodec/v4l2_req_dmabufs.c  | 73 ++++++++++++++++++++++++++--------
- libavcodec/v4l2_req_dmabufs.h  |  1 +
- libavcodec/v4l2_request_hevc.c | 49 ++++++++++++-----------
- 3 files changed, 83 insertions(+), 40 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c
-index 017c3892a593..9a4b69d3fa5a 100644
---- a/libavcodec/v4l2_req_dmabufs.c
-+++ b/libavcodec/v4l2_req_dmabufs.c
-@@ -15,11 +15,12 @@
- #include "v4l2_req_dmabufs.h"
- #include "v4l2_req_utils.h"
- 
--#define DMABUF_NAME1  "/dev/dma_heap/linux,cma"
--#define DMABUF_NAME2  "/dev/dma_heap/reserved"
--
- #define TRACE_ALLOC 0
- 
-+#ifndef __O_CLOEXEC
-+#define __O_CLOEXEC 0
-+#endif
-+
- struct dmabufs_ctl;
- struct dmabuf_h;
- 
-@@ -297,23 +298,33 @@ struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc)
- //
- // Alloc dmabuf via CMA
- 
--static int ctl_cma_new(struct dmabufs_ctl * dbsc)
-+static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names)
- {
--    while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 &&
--           errno == EINTR)
--        /* Loop */;
--
--    if (dbsc->fd == -1) {
--        while ((dbsc->fd = open(DMABUF_NAME2, O_RDWR)) == -1 &&
-+    for (; *names != NULL; ++names)
-+    {
-+        while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 &&
-                errno == EINTR)
-             /* Loop */;
--        if (dbsc->fd == -1) {
--            request_log("Unable to open either %s or %s\n",
--                    DMABUF_NAME1, DMABUF_NAME2);
--            return -1;
-+        if (dbsc->fd != -1)
-+        {
-+            request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names);
-+            return 0;
-         }
-+        request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno));
-     }
--    return 0;
-+    request_log("Unable to open any dma_heap device\n");
-+    return -1;
-+}
-+
-+static int ctl_cma_new(struct dmabufs_ctl * dbsc)
-+{
-+    static const char * const names[] = {
-+        "/dev/dma_heap/linux,cma",
-+        "/dev/dma_heap/reserved",
-+        NULL
-+    };
-+
-+    return ctl_cma_new2(dbsc, names);
- }
- 
- static void ctl_cma_free(struct dmabufs_ctl * dbsc)
-@@ -321,7 +332,6 @@ static void ctl_cma_free(struct dmabufs_ctl * dbsc)
-     if (dbsc->fd != -1)
-         while (close(dbsc->fd) == -1 && errno == EINTR)
-             /* loop */;
--
- }
- 
- static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size)
-@@ -347,6 +357,10 @@ static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh,
- 
-     dh->fd = data.fd;
-     dh->size = (size_t)data.len;
-+
-+//    fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__,
-+//            dh->size, (size_t)lseek(dh->fd, 0, SEEK_END));
-+
-     return 0;
- }
- 
-@@ -364,7 +378,32 @@ static const struct dmabuf_fns dmabuf_cma_fns = {
- 
- struct dmabufs_ctl * dmabufs_ctl_new(void)
- {
--    request_debug(NULL, "Dmabufs using CMA\n");;
-+    request_debug(NULL, "Dmabufs using CMA\n");
-     return dmabufs_ctl_new2(&dmabuf_cma_fns);
- }
- 
-+static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc)
-+{
-+    static const char * const names[] = {
-+        "/dev/dma_heap/vidbuf_cached",
-+        "/dev/dma_heap/linux,cma",
-+        "/dev/dma_heap/reserved",
-+        NULL
-+    };
-+
-+    return ctl_cma_new2(dbsc, names);
-+}
-+
-+static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = {
-+    .buf_alloc  = buf_cma_alloc,
-+    .buf_free   = buf_cma_free,
-+    .ctl_new    = ctl_cma_new_vidbuf_cached,
-+    .ctl_free   = ctl_cma_free,
-+};
-+
-+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void)
-+{
-+    request_debug(NULL, "Dmabufs using Vidbuf\n");
-+    return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns);
-+}
-+
-diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h
-index 381ba2708da6..8c1ab0b5df5e 100644
---- a/libavcodec/v4l2_req_dmabufs.h
-+++ b/libavcodec/v4l2_req_dmabufs.h
-@@ -7,6 +7,7 @@ struct dmabufs_ctl;
- struct dmabuf_h;
- 
- struct dmabufs_ctl * dmabufs_ctl_new(void);
-+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void);
- void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc);
- struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc);
- 
-diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c
-index db7ed13b6d76..5b37319d6aa9 100644
---- a/libavcodec/v4l2_request_hevc.c
-+++ b/libavcodec/v4l2_request_hevc.c
-@@ -176,17 +176,6 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-     av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n",
-            decdev_media_path(decdev), decdev_video_path(decdev));
- 
--    if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) {
--        av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n");
--        src_memtype = MEDIABUFS_MEMORY_MMAP;
--        dst_memtype = MEDIABUFS_MEMORY_MMAP;
--    }
--    else {
--        av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n");
--        src_memtype = MEDIABUFS_MEMORY_DMABUF;
--        dst_memtype = MEDIABUFS_MEMORY_DMABUF;
--    }
--
-     if ((ctx->pq = pollqueue_new()) == NULL) {
-         av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n");
-         goto fail1;
-@@ -202,6 +191,25 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx)
-         goto fail3;
-     }
- 
-+    // Version test for functional Pi5 HEVC iommu.
-+    // rpivid kernel patch was merged in 6.1.57
-+    // *** Remove when it is unlikely that there are any broken kernels left
-+    if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(6,1,57))
-+        ctx->dbufs = dmabufs_ctl_new_vidbuf_cached();
-+    else
-+        ctx->dbufs = dmabufs_ctl_new();
-+
-+    if (ctx->dbufs == NULL) {
-+        av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n");
-+        src_memtype = MEDIABUFS_MEMORY_MMAP;
-+        dst_memtype = MEDIABUFS_MEMORY_MMAP;
-+    }
-+    else {
-+        av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n");
-+        src_memtype = MEDIABUFS_MEMORY_DMABUF;
-+        dst_memtype = MEDIABUFS_MEMORY_DMABUF;
-+    }
-+
-     // Ask for an initial bitbuf size of max size / 4
-     // We will realloc if we need more
-     // Must use sps->h/w as avctx contains cropped size
-@@ -229,23 +237,15 @@ retry_src_memtype:
-         goto fail4;
-     }
- 
--    if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) {
--        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n");
-+    if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0)
-         ctx->fns = &V2(ff_v4l2_req_hevc, 4);
--    }
- #if CONFIG_V4L2_REQ_HEVC_VX
--    else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) {
--        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n");
-+    else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0)
-         ctx->fns = &V2(ff_v4l2_req_hevc, 3);
--    }
--    else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) {
--        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n");
-+    else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0)
-         ctx->fns = &V2(ff_v4l2_req_hevc, 2);
--    }
--    else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) {
--        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n");
-+    else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0)
-         ctx->fns = &V2(ff_v4l2_req_hevc, 1);
--    }
- #endif
-     else {
-         av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n");
-@@ -253,6 +253,9 @@ retry_src_memtype:
-         goto fail4;
-     }
- 
-+    av_log(avctx, AV_LOG_DEBUG, "%s probed successfully: driver v %#x\n",
-+           ctx->fns->name, mediabufs_ctl_driver_version(ctx->mbufs));
-+
-     if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) {
-         char tbuf1[5];
-         av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
-
-From dd548f4cf1828f957b6d72021233813c49ac5c7c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 11 Jan 2024 18:27:35 +0000
-Subject: [PATCH 166/186] v4l2_m2m_dec: Move drm_prime hwframecontext setup to
- after 1st frame
-
-(cherry picked from commit 2b9675fdbb3c99a08055366a3a9a216d1369ec9c)
----
- libavcodec/v4l2_buffers.c |  4 +++
- libavcodec/v4l2_context.c | 63 ++++++++++++++++++++++++---------------
- libavcodec/v4l2_context.h |  8 +++++
- 3 files changed, 51 insertions(+), 24 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index e7b57322162f..e412636a7a13 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -607,6 +607,10 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
- 
-     if (buf_to_m2mctx(avbuf)->output_drm) {
-         /* 1. get references to the actual data */
-+        const int rv = ff_v4l2_context_frames_set(avbuf->context);
-+        if (rv != 0)
-+            return rv;
-+
-         frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
-         frame->format = AV_PIX_FMT_DRM_PRIME;
-         frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref);
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index ed126f8f2b35..a01a105892ab 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -1227,6 +1227,42 @@ fail_release:
-     return ret;
- }
- 
-+int ff_v4l2_context_frames_set(V4L2Context *const ctx)
-+{
-+    AVHWFramesContext *hwframes;
-+    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-+    const int w = ctx->width != 0 ? ctx->width : s->avctx->width;
-+    const int h = ctx->height != 0 ? ctx->height : s->avctx->height;
-+    int ret;
-+
-+    if (ctx->frames_ref != NULL) {
-+        const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data;
-+        if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h)
-+            return 0;
-+        av_buffer_unref(&ctx->frames_ref);
-+    }
-+
-+    ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
-+    if (!ctx->frames_ref)
-+        return AVERROR(ENOMEM);
-+
-+    hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
-+    hwframes->format = AV_PIX_FMT_DRM_PRIME;
-+    hwframes->sw_format = ctx->av_pix_fmt;
-+    hwframes->width = w;
-+    hwframes->height = h;
-+    ret = av_hwframe_ctx_init(ctx->frames_ref);
-+    if (ret < 0) {
-+        av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret));
-+        av_buffer_unref(&ctx->frames_ref);
-+        return ret;
-+    }
-+
-+    av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__,
-+           av_get_pix_fmt_name(ctx->av_pix_fmt), w, h);
-+    return 0;
-+}
-+
- int ff_v4l2_context_init(V4L2Context* ctx)
- {
-     struct v4l2_queryctrl qctrl;
-@@ -1245,30 +1281,11 @@ int ff_v4l2_context_init(V4L2Context* ctx)
-     pthread_cond_init(&ctx->cond, NULL);
-     atomic_init(&ctx->q_count, 0);
- 
--    if (s->output_drm) {
--        AVHWFramesContext *hwframes;
--
--        ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
--        if (!ctx->frames_ref) {
--            ret = AVERROR(ENOMEM);
--            goto fail_unlock;
--        }
--
--        hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
--        hwframes->format = AV_PIX_FMT_DRM_PRIME;
--        hwframes->sw_format = ctx->av_pix_fmt;
--        hwframes->width = ctx->width != 0 ? ctx->width : s->avctx->width;
--        hwframes->height = ctx->height != 0 ? ctx->height : s->avctx->height;
--        ret = av_hwframe_ctx_init(ctx->frames_ref);
--        if (ret < 0)
--            goto fail_unref_hwframes;
--    }
--
-     ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
-     if (ret) {
-         ret = AVERROR(errno);
-         av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret));
--        goto fail_unref_hwframes;
-+        goto fail_unlock;
-     }
- 
-     memset(&qctrl, 0, sizeof(qctrl));
-@@ -1277,7 +1294,7 @@ int ff_v4l2_context_init(V4L2Context* ctx)
-         ret = AVERROR(errno);
-         if (ret != AVERROR(EINVAL)) {
-             av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret));
--            goto fail_unref_hwframes;
-+            goto fail_unlock;
-         }
-         // Control unsupported - set default if wanted
-         if (ctx->num_buffers < 2)
-@@ -1291,12 +1308,10 @@ int ff_v4l2_context_init(V4L2Context* ctx)
- 
-     ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem);
-     if (ret < 0)
--        goto fail_unref_hwframes;
-+        goto fail_unlock;
- 
-     return 0;
- 
--fail_unref_hwframes:
--    av_buffer_unref(&ctx->frames_ref);
- fail_unlock:
-     ff_mutex_destroy(&ctx->lock);
-     return ret;
-diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h
-index f4240f7dddb2..9f1c05a918ff 100644
---- a/libavcodec/v4l2_context.h
-+++ b/libavcodec/v4l2_context.h
-@@ -134,6 +134,14 @@ typedef struct V4L2Context {
-  */
- int ff_v4l2_context_init(V4L2Context* ctx);
- 
-+/**
-+ * (re)set the hwframecontext from the current v4l2 context
-+ *
-+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables.
-+ * @return 0 in case of success, a negative value representing the error otherwise.
-+ */
-+int ff_v4l2_context_frames_set(V4L2Context *const ctx);
-+
- /**
-  * Sets the V4L2Context format in the v4l2 driver.
-  *
-
-From 08c71f5f211216b2f9c5b5317682c639cf6c300f Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 12 Jan 2024 15:17:43 +0000
-Subject: [PATCH 167/186] vf_bwdif: Add capability to deinterlace NV12
-
-As bwdif takes no account of horizontally adjacent pixels the same
-code can be used on planes that have multiple components as is used
-on single component planes. Update the filtering code to cope with
-multi-component planes and add NV12 to the list of supported formats.
-
-Signed-off-by: John Cox <jc@kynesim.co.uk>
-(cherry picked from commit 38338fe9123a01210695e63f05e929f53d6868ff)
----
- libavfilter/vf_bwdif.c | 16 +++++++++++++---
- 1 file changed, 13 insertions(+), 3 deletions(-)
-
-diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c
-index 9847d38b6a63..4d69b3039ddf 100644
---- a/libavfilter/vf_bwdif.c
-+++ b/libavfilter/vf_bwdif.c
-@@ -302,19 +302,28 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic,
-     YADIFContext *yadif = &bwdif->yadif;
-     ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff };
-     int i;
-+    int last_plane = -1;
- 
-     for (i = 0; i < yadif->csp->nb_components; i++) {
-         int w = dstpic->width;
-         int h = dstpic->height;
-+        const AVComponentDescriptor * const comp = yadif->csp->comp + i;
-+
-+        // If the last plane was the same as this plane assume we've dealt
-+        // with all the pels already
-+        if (last_plane == comp->plane)
-+            continue;
-+        last_plane = comp->plane;
- 
-         if (i == 1 || i == 2) {
-             w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
-             h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
-         }
- 
--        td.w     = w;
--        td.h     = h;
--        td.plane = i;
-+        // comp step is in bytes but td.w is in pels
-+        td.w       = w * comp->step / ((comp->depth + 7) / 8);
-+        td.h       = h;
-+        td.plane   = comp->plane;
- 
-         ff_filter_execute(ctx, filter_slice, &td, NULL,
-                           FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx)));
-@@ -350,6 +359,7 @@ static const enum AVPixelFormat pix_fmts[] = {
-     AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9,
-     AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10,
-     AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16,
-+    AV_PIX_FMT_NV12,
-     AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
-     AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
-     AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16,
-
-From 5b910aca795d4a352697fa749fce720a3643178d Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 12 Jan 2024 16:46:27 +0000
-Subject: [PATCH 168/186] v4l2_m2m_dec: Try to accomodate ffmpegs ideas about
- default s/w fmts
-
-(cherry picked from commit c61de480d628ad60292f3695d7d29b9edd880be3)
----
- libavcodec/v4l2_m2m_dec.c | 21 ++++++++++++++-------
- 1 file changed, 14 insertions(+), 7 deletions(-)
-
-diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c
-index f67dd23ba1cc..38832230794d 100644
---- a/libavcodec/v4l2_m2m_dec.c
-+++ b/libavcodec/v4l2_m2m_dec.c
-@@ -1079,7 +1079,6 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
-     unsigned int fmts_n;
-     uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n);
-     enum AVPixelFormat *fmts2 = NULL;
--    enum AVPixelFormat t;
-     enum AVPixelFormat gf_pix_fmt;
-     unsigned int i;
-     unsigned int n = 0;
-@@ -1089,7 +1088,7 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
-     if (!fmts)
-         return AVERROR(ENOENT);
- 
--    if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 2))) == NULL) {
-+    if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) {
-         rv = AVERROR(ENOMEM);
-         goto error;
-     }
-@@ -1110,17 +1109,25 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s)
-             pref_n = n;
-         fmts2[n++] = f;
-     }
--    fmts2[n] = AV_PIX_FMT_NONE;
- 
-     if (n < 2) {
-         av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__);
-         goto error;
-     }
- 
--    // Put preferred s/w format at the end - ff_get_format will put it in sw_pix_fmt
--    t = fmts2[n - 1];
--    fmts2[n - 1] = fmts2[pref_n];
--    fmts2[pref_n] = t;
-+    if (n != 2) {
-+        // ffmpeg.c really only expects one s/w format. It thinks that the
-+        // last format in the list is the s/w format of the h/w format but
-+        // also chooses the first non-h/w format as the preferred s/w format.
-+        // The only way of reconciling this is to dup our preferred format into
-+        // both last & first place :-(
-+        const enum AVPixelFormat t = fmts2[pref_n];
-+        fmts2[pref_n] = fmts2[1];
-+        fmts2[1] = t;
-+        fmts2[n++] = t;
-+    }
-+
-+    fmts2[n] = AV_PIX_FMT_NONE;
- 
-     gf_pix_fmt = ff_get_format(avctx, fmts2);
-     av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n",
-
-From d140f93c6d1c54c6bb28ef31051c77e6a3ac8c78 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 18 Jan 2024 15:57:30 +0000
-Subject: [PATCH 169/186] v4l2_m2m_dec: Fix cma allocated s/w output
-
-(cherry picked from commit 87cefe4ddad7c36faad052e12268d7e05c5b694a)
----
- libavcodec/v4l2_buffers.c | 33 +++++++++++++++++++++++----------
- 1 file changed, 23 insertions(+), 10 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index e412636a7a13..b3ef74bcd48c 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -486,6 +486,11 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data)
-         // Buffer still attached to context
-         V4L2m2mContext *s = buf_to_m2mctx(avbuf);
- 
-+        if (!s->output_drm && avbuf->dmabuf[0] != NULL) {
-+            for (unsigned int i = 0; i != avbuf->num_planes; ++i)
-+                dmabuf_read_end(avbuf->dmabuf[i]);
-+        }
-+
-         ff_mutex_lock(&ctx->lock);
- 
-         ff_v4l2_buffer_set_avail(avbuf);
-@@ -533,6 +538,9 @@ static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
-                 avbuf->buf.m.planes[i].m.fd = dma_fd;
-             else
-                 avbuf->buf.m.fd = dma_fd;
-+
-+            if (!s->output_drm)
-+                avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]);
-         }
-         else {
-             struct v4l2_exportbuffer expbuf;
-@@ -647,6 +655,11 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
-         break;
-     }
- 
-+    if (avbuf->dmabuf[0] != NULL) {
-+        for (unsigned int i = 0; i != avbuf->num_planes; ++i)
-+            dmabuf_read_start(avbuf->dmabuf[i]);
-+    }
-+
-     return 0;
- }
- 
-@@ -947,6 +960,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-     V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
-     AVBufferRef * bufref;
-     V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
-+    int want_mmap;
- 
-     *pbufref = NULL;
-     if (avbuf == NULL)
-@@ -988,10 +1002,10 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-     } else
-         avbuf->num_planes = 1;
- 
--    for (i = 0; i < avbuf->num_planes; i++) {
--        const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
--            (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
-+    want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP &&
-+        (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm);
- 
-+    for (i = 0; i < avbuf->num_planes; i++) {
-         avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ?
-             ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline :
-             ctx->format.fmt.pix.bytesperline;
-@@ -1032,13 +1046,12 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-         avbuf->buf.length    = avbuf->planes[0].length;
-     }
- 
--    if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
--        if (s->output_drm) {
--            ret = v4l2_buffer_export_drm(avbuf);
--            if (ret) {
--                av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n");
--                goto fail;
--            }
-+    if (!want_mmap) {
-+        // export_drm does dmabuf alloc if we aren't using v4l2 alloc
-+        ret = v4l2_buffer_export_drm(avbuf);
-+        if (ret) {
-+            av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n");
-+            goto fail;
-         }
-     }
- 
-
-From 79d7b3b96768de5d65cdee2b6cd2e91b827f0776 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 29 Jan 2024 15:12:34 +0000
-Subject: [PATCH 170/186] v4l2_req: Fix media pool delete race
-
-fds & polltasks associated with media fds that are still in flight are
-not freed on delete but the main pool is leading to use after free when
-they finally do complete. Stop scanning the free chain on delete and
-simply delete everything, in-flight or not. This requires changing alloc
-as the buffers weren't previously tracked in-flight.
-
-(cherry picked from commit 6599b6d1fa1c75d295d5f568a48f8d250250fb7c)
----
- libavcodec/v4l2_req_media.c | 38 ++++++++++++++++++++-----------------
- 1 file changed, 21 insertions(+), 17 deletions(-)
-
-diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c
-index 0394bb2b23f3..c94cc5b0f684 100644
---- a/libavcodec/v4l2_req_media.c
-+++ b/libavcodec/v4l2_req_media.c
-@@ -86,6 +86,8 @@ struct media_pool {
-     int fd;
-     sem_t sem;
-     pthread_mutex_t lock;
-+    unsigned int pool_n;
-+    struct media_request * pool_reqs;
-     struct media_request * free_reqs;
-     struct pollqueue * pq;
- };
-@@ -251,18 +253,17 @@ int media_request_abort(struct media_request ** const preq)
-     return 0;
- }
- 
--static void delete_req_chain(struct media_request * const chain)
-+static void free_req_pool(struct media_request * const pool, const unsigned int n)
- {
--    struct media_request * next = chain;
--    while (next) {
--        struct media_request * const req = next;
--        next = req->next;
-+    unsigned int i;
-+    for (i = 0; i != n; ++i) {
-+        struct media_request * const req = pool + i;
-         if (req->pt)
-             polltask_delete(&req->pt);
-         if (req->fd != -1)
-             close(req->fd);
--        free(req);
-     }
-+    free(pool);
- }
- 
- struct media_pool * media_pool_new(const char * const media_path,
-@@ -283,17 +284,16 @@ struct media_pool * media_pool_new(const char * const media_path,
-         goto fail1;
-     }
- 
-+    if ((mp->pool_reqs = calloc(n, sizeof(*mp->pool_reqs))) == NULL)
-+        goto fail3;
-+    mp->pool_n = n;
-     for (i = 0; i != n; ++i) {
--        struct media_request * req = malloc(sizeof(*req));
--        if (!req)
--            goto fail4;
-+        mp->pool_reqs[i].mp = mp;
-+        mp->pool_reqs[i].fd = -1;
-+    }
- 
--        *req = (struct media_request){
--            .next = mp->free_reqs,
--            .mp = mp,
--            .fd = -1
--        };
--        mp->free_reqs = req;
-+    for (i = 0; i != n; ++i) {
-+        struct media_request * const req = mp->pool_reqs + i;
- 
-         if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) {
-             request_log("Failed to alloc request %d: %s\n", i, strerror(errno));
-@@ -303,6 +303,9 @@ struct media_pool * media_pool_new(const char * const media_path,
-         req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req);
-         if (!req->pt)
-             goto fail4;
-+
-+        req->next = mp->free_reqs,
-+        mp->free_reqs = req;
-     }
- 
-     sem_init(&mp->sem, 0, n);
-@@ -310,7 +313,8 @@ struct media_pool * media_pool_new(const char * const media_path,
-     return mp;
- 
- fail4:
--    delete_req_chain(mp->free_reqs);
-+    free_req_pool(mp->pool_reqs, mp->pool_n);
-+fail3:
-     close(mp->fd);
-     pthread_mutex_destroy(&mp->lock);
- fail1:
-@@ -327,7 +331,7 @@ void media_pool_delete(struct media_pool ** pMp)
-         return;
-     *pMp = NULL;
- 
--    delete_req_chain(mp->free_reqs);
-+    free_req_pool(mp->pool_reqs, mp->pool_n);
-     close(mp->fd);
-     sem_destroy(&mp->sem);
-     pthread_mutex_destroy(&mp->lock);
-
-From 041844bf65a92134d2d4cb4036b6a31ce1bf2693 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 30 Jan 2024 14:24:59 +0000
-Subject: [PATCH 171/186] drm_vout: Fix connector etc. desc memory leak
-
-(cherry picked from commit 2f95ad366697901acb114a6d2a45810180f3652d)
----
- libavdevice/drm_vout.c | 7 +++++++
- 1 file changed, 7 insertions(+)
-
-diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c
-index 491e1dc60861..275748abdcee 100644
---- a/libavdevice/drm_vout.c
-+++ b/libavdevice/drm_vout.c
-@@ -501,6 +501,13 @@ static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm
-                 crtc ? crtc->height : 0,
-                 (s->conId == (int)con->connector_id ?
-             " (chosen)" : ""));
-+
-+          if (crtc)
-+              drmModeFreeCrtc(crtc);
-+          if (enc)
-+              drmModeFreeEncoder(enc);
-+          if (con)
-+              drmModeFreeConnector(con);
-       }
- 
-       if (!s->conId) {
-
-From 3300e10481d711b3a872cf0b9a13bd4009011b0b Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 30 Jan 2024 16:20:53 +0000
-Subject: [PATCH 172/186] conf_native: Add --tsan option
-
-(cherry picked from commit 3b5aa0d31bd420f8a642f6fc7919674b8a5d5b31)
----
- pi-util/conf_native.sh | 11 +++++++++--
- 1 file changed, 9 insertions(+), 2 deletions(-)
-
-diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh
-index f0ed1595948b..0dbaa53e97e0 100755
---- a/pi-util/conf_native.sh
-+++ b/pi-util/conf_native.sh
-@@ -10,6 +10,8 @@ RPI_KEEPS=""
- NOSHARED=
- MMAL=
- USR_PREFIX=
-+TOOLCHAIN=
-+R=rel
- 
- while [ "$1" != "" ] ; do
-     case $1 in
-@@ -22,6 +24,10 @@ while [ "$1" != "" ] ; do
- 	--usr)
- 	    USR_PREFIX=/usr
- 	    ;;
-+	--tsan)
-+	    TOOLCHAIN="--toolchain=gcc-tsan"
-+	    R=tsan
-+	    ;;
- 	*)
- 	    echo "Usage $0: [--noshared] [--mmal] [--usr]"
- 	    echo "  noshared  Build static libs and executable - good for testing"
-@@ -82,11 +88,11 @@ V=`cat RELEASE`
- SHARED_LIBS="--enable-shared"
- if [ $NOSHARED ]; then
-   SHARED_LIBS="--disable-shared"
--  OUT=$BUILDBASE/$B-$C-$V-static-rel
-+  OUT=$BUILDBASE/$B-$C-$V-static-$R
-   echo Static libs
- else
-   echo Shared libs
--  OUT=$BUILDBASE/$B-$C-$V-shared-rel
-+  OUT=$BUILDBASE/$B-$C-$V-shared-$R
- fi
- 
- if [ ! $USR_PREFIX ]; then
-@@ -106,6 +112,7 @@ $FFSRC/configure \
-  --libdir=$LIB_PREFIX\
-  --incdir=$INC_PREFIX\
-  $MCOPTS\
-+ $TOOLCHAIN\
-  --disable-stripping\
-  --disable-thumb\
-  --enable-sand\
-
-From 8f62193afd1823882eee4d7ce81b989dd881640c Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 30 Jan 2024 16:25:53 +0000
-Subject: [PATCH 173/186] v4l2_m2m: Rework use of ctx->lock to avoid use while
- uninit
-
-(cherry picked from commit 024508c338bd707f0a9d34cdf660984171da1a6a)
----
- libavcodec/v4l2_buffers.c |  9 ++--
- libavcodec/v4l2_context.c | 86 +++++++++++++++++++++++++++------------
- 2 files changed, 63 insertions(+), 32 deletions(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index b3ef74bcd48c..e844a1a0b67d 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -484,7 +484,7 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data)
- 
-     if (ctx != NULL) {
-         // Buffer still attached to context
--        V4L2m2mContext *s = buf_to_m2mctx(avbuf);
-+        V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
- 
-         if (!s->output_drm && avbuf->dmabuf[0] != NULL) {
-             for (unsigned int i = 0; i != avbuf->num_planes; ++i)
-@@ -494,15 +494,14 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data)
-         ff_mutex_lock(&ctx->lock);
- 
-         ff_v4l2_buffer_set_avail(avbuf);
-+        avbuf->buf.timestamp.tv_sec = 0;
-+        avbuf->buf.timestamp.tv_usec = 0;
- 
--        if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-+        if (V4L2_TYPE_IS_OUTPUT(ctx->type)) {
-             av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
--            /* no need to queue more buffers to the driver */
-         }
-         else if (ctx->streamon) {
-             av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name);
--            avbuf->buf.timestamp.tv_sec = 0;
--            avbuf->buf.timestamp.tv_usec = 0;
-             ff_v4l2_buffer_enqueue(avbuf);  // will set to IN_DRIVER
-         }
-         else {
-diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c
-index a01a105892ab..0d61a432c3c8 100644
---- a/libavcodec/v4l2_context.c
-+++ b/libavcodec/v4l2_context.c
-@@ -906,56 +906,88 @@ static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
-         }
-     }
- 
-+    ff_mutex_lock(&ctx->lock);
-     for (i = 0; i < ctx->num_buffers; ++i) {
-         struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
-         if (buf->status == V4L2BUF_AVAILABLE) {
-             rv = ff_v4l2_buffer_enqueue(buf);
-             if (rv < 0)
--                return rv;
-+                break;
-         }
-     }
--    return 0;
-+    ff_mutex_unlock(&ctx->lock);
-+    return rv;
- }
- 
--int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
-+static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx)
- {
-     int type = ctx->type;
-     int ret = 0;
--    AVCodecContext * const avctx = logger(ctx);
-+
-+    if (!V4L2_TYPE_IS_OUTPUT(ctx->type))
-+        stuff_all_buffers(avctx, ctx);
-+
-+    if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) {
-+        ret = AVERROR(errno);
-+        av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
-+               av_err2str(ret));
-+        return ret;
-+    }
-+
-+    ctx->first_buf = 1;
-+    ctx->streamon = 1;
-+    ctx->flag_last = 0;
-+    av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name);
-+    return ret;
-+}
-+
-+static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx)
-+{
-+    int type = ctx->type;
-+    int ret = 0;
-+    const int has_bufs = ctx_buffers_alloced(ctx);
- 
-     // Avoid doing anything if there is nothing we can do
--    if (cmd == VIDIOC_STREAMOFF && !ctx_buffers_alloced(ctx) && !ctx->streamon)
-+    if (!has_bufs && !ctx->streamon)
-         return 0;
- 
--    ff_mutex_lock(&ctx->lock);
--
--    if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type))
--        stuff_all_buffers(avctx, ctx);
-+    if (has_bufs)
-+        ff_mutex_lock(&ctx->lock);
- 
--    if (ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type) < 0) {
--        const int err = errno;
--        av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name,
--               cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err);
--        ret = AVERROR(err);
-+    if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) {
-+        ret = AVERROR(errno);
-+        av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name,
-+               av_err2str(ret));
-     }
--    else
--    {
--        if (cmd == VIDIOC_STREAMOFF)
--            flush_all_buffers_status(ctx);
--        else
--            ctx->first_buf = 1;
-+    else {
-+        flush_all_buffers_status(ctx);
- 
--        ctx->streamon = (cmd == VIDIOC_STREAMON);
--        av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name,
--               cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF");
-+        ctx->streamon = 0;
-+        ctx->flag_last = 0;
-+
-+        av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name);
-     }
- 
--    // Both stream off & on effectively clear flag_last
--    ctx->flag_last = 0;
-+    if (has_bufs)
-+        ff_mutex_unlock(&ctx->lock);
-+    return ret;
-+}
- 
--    ff_mutex_unlock(&ctx->lock);
- 
--    return ret;
-+int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
-+{
-+    AVCodecContext * const avctx = logger(ctx);
-+
-+    switch (cmd) {
-+        case VIDIOC_STREAMOFF:
-+            return set_streamoff(avctx, ctx);
-+        case VIDIOC_STREAMON:
-+            return set_streamon(avctx, ctx);
-+        default:
-+            av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd);
-+            break;
-+    }
-+    return AVERROR_BUG;
- }
- 
- int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
-
-From 76b95699abad71a942280db3b60c7f906b705166 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 1 Feb 2024 18:11:06 +0000
-Subject: [PATCH 174/186] matroskaenc: Fix H264 delayed extradata creation
-
-(cherry picked from commit bd60b02509168625e12889068e3f0834148334ca)
----
- libavformat/matroskaenc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
-index 61e4c976ef76..a6a00f03e7c9 100644
---- a/libavformat/matroskaenc.c
-+++ b/libavformat/matroskaenc.c
-@@ -1125,7 +1125,7 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn
-     case AV_CODEC_ID_WAVPACK:
-         return put_wv_codecpriv(dyn_cp, extradata, extradata_size);
-     case AV_CODEC_ID_H264:
--        if (par->extradata_size)
-+        if (extradata_size)
-             return ff_isom_write_avcc(dyn_cp, extradata,
-                                       extradata_size);
-         else
-
-From 83da9f5444e5c938bf623622d163c9559aa57cef Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Thu, 1 Feb 2024 18:12:38 +0000
-Subject: [PATCH 175/186] matroskaenc: Assume H264 is Annex B if no extradata
-
-(cherry picked from commit 7d0cf1279dcf8e97d7dec7f2b7dcd0379b335e3d)
----
- libavformat/matroskaenc.c | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
-diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
-index a6a00f03e7c9..131ad31d1bda 100644
---- a/libavformat/matroskaenc.c
-+++ b/libavformat/matroskaenc.c
-@@ -3194,9 +3194,15 @@ static int mkv_init(struct AVFormatContext *s)
-             track->reformat = mkv_reformat_wavpack;
-             break;
-         case AV_CODEC_ID_H264:
-+            // Default to reformat if no extradata as the only current
-+            // encoder which does this is v4l2m2m which needs reformat
-+            if (par->extradata_size == 0 ||
-+                (par->extradata_size > 3 &&
-+                 (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)))
-+                track->reformat = mkv_reformat_h2645;
-+            break;
-         case AV_CODEC_ID_HEVC:
--            if ((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 ||
--                 par->codec_id == AV_CODEC_ID_HEVC && par->extradata_size > 6) &&
-+            if (par->extradata_size > 6 &&
-                 (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))
-                 track->reformat = mkv_reformat_h2645;
-             break;
-
-From 02eec233a033b677709c641fe3f03a2a8714d750 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Fri, 16 Feb 2024 11:50:56 +0000
-Subject: [PATCH 176/186] aarch64/rgb2rgb: Change incorrect SXTX to stxw
-
-(cherry picked from commit 4c362244e0f1c41d5af6c95a71da7b32029fa982)
----
- libswscale/aarch64/rgb2rgb_neon.S | 12 ++++++------
- 1 file changed, 6 insertions(+), 6 deletions(-)
-
-diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S
-index 0956800b4185..38f83a1b7878 100644
---- a/libswscale/aarch64/rgb2rgb_neon.S
-+++ b/libswscale/aarch64/rgb2rgb_neon.S
-@@ -338,8 +338,8 @@ function ff_bgr24toyv12_aarch64, export=1
-         b.eq            90f
- 
-         subs            w9,  w4, #0
--        add             x0, x0, w14, SXTX
--        add             x1, x1, w6, SXTX
-+        add             x0, x0, w14, sxtw
-+        add             x1, x1, w6, sxtw
-         mov             x10, x0
-         mov             x11, x1
-         b.lt            12f
-@@ -424,10 +424,10 @@ function ff_bgr24toyv12_aarch64, export=1
- 
- // ------------------- Loop to start
- 
--        add             x0, x0, w14, SXTX
--        add             x1, x1, w6, SXTX
--        add             x2, x2, w7, SXTX
--        add             x3, x3, w7, SXTX
-+        add             x0, x0, w14, sxtw
-+        add             x1, x1, w6, sxtw
-+        add             x2, x2, w7, sxtw
-+        add             x3, x3, w7, sxtw
-         subs            w5, w5, #1
-         b.gt            11b
- 90:
-
-From 1a20c50471f23cd6976c6606ab53be7ef5a9afb7 Mon Sep 17 00:00:00 2001
-From: James Le Cuirot <chewi@gentoo.org>
-Date: Sat, 17 Feb 2024 13:29:36 +0000
-Subject: [PATCH 177/186] aarch64/rpi_sand: Fix building under Clang/LLVM
-
-The "Arm A64 Instruction Set Architecture" manual says that the MOV
-(element) instruction takes the form `MOV <Vd>.<Ts>[<index1>],
-<Vn>.<Ts>[<index2>]`, where `<Ts>` is one of B, H, S, or D. Only certain
-other instructions accept a number in front. GNU as allows you to
-include it for any instruction, but this is non-standard. This is
-explained at https://stackoverflow.com/questions/71907156.
-
-(cherry picked from commit ba40fd4ff2de0ced75d713c6aac9cdac2d379625)
----
- libavutil/aarch64/rpi_sand_neon.S | 50 +++++++++++++++----------------
- 1 file changed, 25 insertions(+), 25 deletions(-)
-
-diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S
-index 11658de0c8c2..3a6bc3de74e5 100644
---- a/libavutil/aarch64/rpi_sand_neon.S
-+++ b/libavutil/aarch64/rpi_sand_neon.S
-@@ -387,13 +387,13 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1
-                 st3             {v0.4h  - v2.4h},  [x0], #24
-                 st3             {v16.4h - v18.4h}, [x2], #24
-                 beq             11b
--                mov             v0.2d[0],  v0.2d[1]
-+                mov             v0.d[0],  v0.d[1]
-                 sub             w9,  w9,  #12
--                mov             v1.2d[0],  v1.2d[1]
--                mov             v2.2d[0],  v2.2d[1]
--                mov             v16.2d[0], v16.2d[1]
--                mov             v17.2d[0], v17.2d[1]
--                mov             v18.2d[0], v18.2d[1]
-+                mov             v1.d[0],  v1.d[1]
-+                mov             v2.d[0],  v2.d[1]
-+                mov             v16.d[0], v16.d[1]
-+                mov             v17.d[0], v17.d[1]
-+                mov             v18.d[0], v18.d[1]
- 1:
-                 cmp             w9,  #6-48
-                 blt             1f
-@@ -526,28 +526,28 @@ function ff_rpi_sand30_lines_to_planar_y16, export=1
-                 blt             1f
-                 st3             {v16.4h, v17.4h, v18.4h}, [x0], #24
-                 beq             11b
--                mov             v16.2d[0], v16.2d[1]
-+                mov             v16.d[0], v16.d[1]
-                 sub             w5,  w5,  #12
--                mov             v17.2d[0], v17.2d[1]
--                mov             v18.2d[0], v18.2d[1]
-+                mov             v17.d[0], v17.d[1]
-+                mov             v18.d[0], v18.d[1]
- 1:
-                 cmp             w5,  #6-96
-                 blt             1f
-                 st3             {v16.h, v17.h, v18.h}[0], [x0], #6
-                 st3             {v16.h, v17.h, v18.h}[1], [x0], #6
-                 beq             11b
--                mov             v16.2s[0], v16.2s[1]
-+                mov             v16.s[0], v16.s[1]
-                 sub             w5,  w5,  #6
--                mov             v17.2s[0], v17.2s[1]
--                mov             v18.2s[0], v18.2s[1]
-+                mov             v17.s[0], v17.s[1]
-+                mov             v18.s[0], v18.s[1]
- 1:
-                 cmp             w5,  #3-96
-                 blt             1f
-                 st3             {v16.h, v17.h, v18.h}[0], [x0], #6
-                 beq             11b
--                mov             v16.4h[0], v16.4h[1]
-+                mov             v16.h[0], v16.h[1]
-                 sub             w5,  w5,  #3
--                mov             v17.4h[0], v17.4h[1]
-+                mov             v17.h[0], v17.h[1]
- 1:
-                 cmp             w5,  #2-96
-                 blt             1f
-@@ -625,10 +625,10 @@ function ff_rpi_sand30_lines_to_planar_y8, export=1
-                 blt             1f
-                 st3             {v16.8b, v17.8b, v18.8b}, [x0], #24
-                 beq             11b
--                mov             v16.2d[0], v16.2d[1]
-+                mov             v16.d[0], v16.d[1]
-                 sub             w5,  w5,  #24
--                mov             v17.2d[0], v17.2d[1]
--                mov             v18.2d[0], v18.2d[1]
-+                mov             v17.d[0], v17.d[1]
-+                mov             v18.d[0], v18.d[1]
- 1:
-                 cmp             w5,  #12-96
-                 blt             1f
-@@ -637,28 +637,28 @@ function ff_rpi_sand30_lines_to_planar_y8, export=1
-                 st3             {v16.b, v17.b, v18.b}[2], [x0], #3
-                 st3             {v16.b, v17.b, v18.b}[3], [x0], #3
-                 beq             11b
--                mov             v16.2s[0], v16.2s[1]
-+                mov             v16.s[0], v16.s[1]
-                 sub             w5,  w5,  #12
--                mov             v17.2s[0], v17.2s[1]
--                mov             v18.2s[0], v18.2s[1]
-+                mov             v17.s[0], v17.s[1]
-+                mov             v18.s[0], v18.s[1]
- 1:
-                 cmp             w5,  #6-96
-                 blt             1f
-                 st3             {v16.b, v17.b, v18.b}[0], [x0], #3
-                 st3             {v16.b, v17.b, v18.b}[1], [x0], #3
-                 beq             11b
--                mov             v16.4h[0], v16.4h[1]
-+                mov             v16.h[0], v16.h[1]
-                 sub             w5,  w5,  #6
--                mov             v17.4h[0], v17.4h[1]
--                mov             v18.4h[0], v18.4h[1]
-+                mov             v17.h[0], v17.h[1]
-+                mov             v18.h[0], v18.h[1]
- 1:
-                 cmp             w5,  #3-96
-                 blt             1f
-                 st3             {v16.b, v17.b, v18.b}[0], [x0], #3
-                 beq             11b
--                mov             v16.8b[0], v16.8b[1]
-+                mov             v16.b[0], v16.b[1]
-                 sub             w5,  w5,  #3
--                mov             v17.8b[0], v17.8b[1]
-+                mov             v17.b[0], v17.b[1]
- 1:
-                 cmp             w5,  #2-96
-                 blt             1f
-
-From 09fa999ad4ea877311216081bba5989c4b44349e Mon Sep 17 00:00:00 2001
-From: James Le Cuirot <chewi@gentoo.org>
-Date: Sat, 17 Feb 2024 14:37:44 +0000
-Subject: [PATCH 178/186] rtpenc: Fix building with GCC 14
-
-This incompatible pointer type issue became a fatal error in GCC 14. The
-AVBuffer API started using size_t in 5.0 with ef6a9e5e.
-
-(cherry picked from commit 5d3c1c0213f2d6fe7b310e65def4c44a6c610b18)
----
- libavformat/rtpenc.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
-index f67dc2a15ae1..1f1e4cb25ad9 100644
---- a/libavformat/rtpenc.c
-+++ b/libavformat/rtpenc.c
-@@ -588,7 +588,7 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt)
-     case AV_CODEC_ID_H264:
-     {
-         uint8_t *side_data;
--        int side_data_size = 0;
-+        size_t side_data_size = 0;
- 
-         side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
-                                             &side_data_size);
-
-From 3bf2bc4d72a255aaec4b55bb5fd71258d8d16f67 Mon Sep 17 00:00:00 2001
-From: James Le Cuirot <chewi@gentoo.org>
-Date: Sun, 18 Feb 2024 09:18:31 +0000
-Subject: [PATCH 179/186] v4l2_req: Fix building against musl by including
- pthread.h
-
-(cherry picked from commit d4b70cc3ddd24036e0fd59ad562c6844767e314a)
----
- libavcodec/v4l2_req_decode_q.h | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/libavcodec/v4l2_req_decode_q.h b/libavcodec/v4l2_req_decode_q.h
-index af7bbe1de462..27eafbc42b27 100644
---- a/libavcodec/v4l2_req_decode_q.h
-+++ b/libavcodec/v4l2_req_decode_q.h
-@@ -1,6 +1,8 @@
- #ifndef AVCODEC_V4L2_REQ_DECODE_Q_H
- #define AVCODEC_V4L2_REQ_DECODE_Q_H
- 
-+#include <pthread.h>
-+
- typedef struct req_decode_ent {
-     struct req_decode_ent * next;
-     struct req_decode_ent * prev;
-
-From f4af3f1cbbec0bc6fa355bdaf6dcaa01790640d3 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 27 Feb 2024 12:57:08 +0000
-Subject: [PATCH 180/186] v4l2_buffers: Fix init of drmprime source (OUTPUT)
- buffers for encode
-
-Previous fix for mmaped dmabuf CAPTURE buffers broke this due to failure
-to note that dmabuf export wasn't wanted for OUTPUT buffers.
-
-(cherry picked from commit 21859689f25854eb9b46e1efacacf3eca3bef6e8)
----
- libavcodec/v4l2_buffers.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c
-index e844a1a0b67d..2d1db41a3017 100644
---- a/libavcodec/v4l2_buffers.c
-+++ b/libavcodec/v4l2_buffers.c
-@@ -1045,7 +1045,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct
-         avbuf->buf.length    = avbuf->planes[0].length;
-     }
- 
--    if (!want_mmap) {
-+    if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) {
-         // export_drm does dmabuf alloc if we aren't using v4l2 alloc
-         ret = v4l2_buffer_export_drm(avbuf);
-         if (ret) {
-
-From 88b5be9784c6f6088d050c09203804142b7b4316 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 9 Jan 2024 09:00:17 +0000
-Subject: [PATCH 181/186] ffconf: Add ability to output yuv files & run
- valgrind as part of conform
-
-(cherry picked from commit 8104e3701b3766976670a1d4afb13704c66a9c94)
----
- pi-util/ffconf.py | 59 ++++++++++++++++++++++++++++++++++++-----------
- 1 file changed, 46 insertions(+), 13 deletions(-)
-
-diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py
-index 657568014e57..204e6257fb36 100755
---- a/pi-util/ffconf.py
-+++ b/pi-util/ffconf.py
-@@ -14,7 +14,12 @@ HWACCEL_RPI     = 2
- HWACCEL_DRM     = 3
- HWACCEL_VAAPI   = 4
- 
--def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec):
-+def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, args):
-+    ffmpeg_exec = args.ffmpeg
-+    gen_yuv = args.gen_yuv
-+    valgrind = args.valgrind
-+    rv = 0
-+
-     hwaccel = ""
-     if dectype == HWACCEL_RPI:
-         hwaccel = "rpi"
-@@ -48,17 +53,29 @@ def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_e
-     except:
-         pass
- 
--    flog = open(os.path.join(tmp_root, name + ".log"), "wt")
-+    yuv_file = os.path.join(tmp_root, name + ".dec.yuv")
-+    try:
-+        os.remove(yuv_file)
-+    except:
-+        pass
-+
-+    flog = open(os.path.join(tmp_root, name + ".log"), "w+t")
- 
--    ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file]
-+    ffargs = [ffmpeg_exec, "-flags", "unaligned"] +\
-+        (["-hwaccel", hwaccel] if hwaccel else []) +\
-+        ["-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] +\
-+        pix_fmt +\
-+        ([yuv_file] if gen_yuv else ["-f", "md5", dec_file])
-+
-+    if valgrind:
-+        ffargs = ['valgrind', '--leak-check=full'] + ffargs
- 
-     # Unaligned needed for cropping conformance
--    if hwaccel:
--        rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT)
--    else:
--        rstr = subprocess.call(
--            [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file],
--            stdout=flog, stderr=subprocess.STDOUT)
-+    rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT)
-+
-+    if gen_yuv:
-+        with open(dec_file, 'wt') as f:
-+            subprocess.call(["md5sum", yuv_file], stdout=f, stderr=subprocess.STDOUT)
- 
-     try:
-         m1 = None
-@@ -74,9 +91,21 @@ def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_e
-     except:
-         pass
- 
-+    if valgrind:
-+        flog.seek(0)
-+        leak = True
-+        valerr = True
-+
-+        for line in flog:
-+            if re.search("^==[0-9]+== All heap blocks were freed", line):
-+                leak = False
-+            if re.search("^==[0-9]+== ERROR SUMMARY: 0 errors", line):
-+                valerr = False
-+        if leak or valerr:
-+            rv = 4
-+
-     if  m1 and m2 and m1.group() == m2.group():
-         print("Match: " + m1.group(), file=flog)
--        rv = 0
-     elif not m1:
-         print("****** Cannot find m1", file=flog)
-         rv = 3
-@@ -121,7 +150,7 @@ def runtest(name, tests):
-             return True
-     return False
- 
--def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec):
-+def doconf(csva, tests, test_root, vcodec, dectype, args):
-     unx_failures = []
-     unx_success = []
-     failures = 0
-@@ -133,7 +162,7 @@ def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec):
-             print ("==== ", name, end="")
-             sys.stdout.flush()
- 
--            rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec)
-+            rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, args=args)
-             if (rv == 0):
-                 successes += 1
-             else:
-@@ -158,6 +187,8 @@ def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec):
-                     print(": * CRASH *")
-                 elif (rv == 3) :
-                     print(": * MD5 MISSING *")
-+                elif (rv == 4) :
-+                    print(": * VALGRIND *")
-                 else :
-                     print(": * BANG *")
- 
-@@ -189,6 +220,8 @@ if __name__ == '__main__':
-     argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename")
-     argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use")
-     argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
-+    argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests")
-+    argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)")
-     args = argp.parse_args()
- 
-     if args.csvgen:
-@@ -211,5 +244,5 @@ if __name__ == '__main__':
-     elif args.vaapi:
-         dectype = HWACCEL_VAAPI
- 
--    doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg)
-+    doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args)
- 
-
-From 2a2e3858f09b446ee866c5e94c4d0cb81a176ad7 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 5 Mar 2024 15:47:34 +0000
-Subject: [PATCH 182/186] ffconf: Validate ffmpeg & test_root options rather
- than crashing
-
-(cherry picked from commit c3948731965a10b3d459931f4134dd3d95b463aa)
----
- pi-util/ffconf.py | 12 +++++++++++-
- 1 file changed, 11 insertions(+), 1 deletion(-)
-
-diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py
-index 204e6257fb36..71cd8387203f 100755
---- a/pi-util/ffconf.py
-+++ b/pi-util/ffconf.py
-@@ -219,11 +219,15 @@ if __name__ == '__main__':
-     argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir")
-     argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename")
-     argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use")
--    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
-+    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name; if directory given use <dir>/ffmpeg")
-     argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests")
-     argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)")
-     args = argp.parse_args()
- 
-+    if not os.path.isdir(args.test_root):
-+        print("Test root dir '%s' not found" % args.test_root)
-+        exit(1)
-+
-     if args.csvgen:
-         csv.writer(sys.stdout).writerows(scandir(args.test_root))
-         exit(0)
-@@ -244,5 +248,11 @@ if __name__ == '__main__':
-     elif args.vaapi:
-         dectype = HWACCEL_VAAPI
- 
-+    if os.path.isdir(args.ffmpeg):
-+        args.ffmpeg = os.path.join(args.ffmpeg, "ffmpeg")
-+    if not os.path.isfile(args.ffmpeg):
-+        print("FFmpeg file '%s' not found" % args.ffmpeg)
-+        exit(1)
-+
-     doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args)
- 
-
-From 3f92b3ddcf6647bb88b585bfa286a77c18b4ee30 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Wed, 6 Mar 2024 11:55:22 +0000
-Subject: [PATCH 183/186] ffconf: Fix expected conformance s.t.
- VPSSPSPPS_A_MainConcept_1 fails
-
-(cherry picked from commit faa8c6afcda58e1ad91eacc4f4ca6b4d467200b3)
----
- pi-util/conf_h265.2016.csv | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv
-index 4efd5d1c676d..177f1c8111fd 100644
---- a/pi-util/conf_h265.2016.csv
-+++ b/pi-util/conf_h265.2016.csv
-@@ -128,7 +128,7 @@
- 3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10
- 1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8
- 1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8
--3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
-+2,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
- 1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10
- 1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8
- 1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8
-
-From ad8c5df726d8623d2b8ce00fddb4312c3b871415 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 11 Mar 2024 18:36:51 +0000
-Subject: [PATCH 184/186] ffconf: Add loop option for race testing
-
----
- pi-util/ffconf.py | 11 ++++++++++-
- 1 file changed, 10 insertions(+), 1 deletion(-)
-
-diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py
-index 71cd8387203f..702461200671 100755
---- a/pi-util/ffconf.py
-+++ b/pi-util/ffconf.py
-@@ -198,6 +198,8 @@ def doconf(csva, tests, test_root, vcodec, dectype, args):
-     else:
-         print("All tests normal:", successes, "ok,", failures, "failed")
- 
-+    return unx_failures + unx_success
-+
- 
- class ConfCSVDialect(csv.Dialect):
-     delimiter = ','
-@@ -222,6 +224,7 @@ if __name__ == '__main__':
-     argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name; if directory given use <dir>/ffmpeg")
-     argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests")
-     argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)")
-+    argp.add_argument("--loop", default=0, type=int, help="Create yuv file (stored with log under /tmp)")
-     args = argp.parse_args()
- 
-     if not os.path.isdir(args.test_root):
-@@ -254,5 +257,11 @@ if __name__ == '__main__':
-         print("FFmpeg file '%s' not found" % args.ffmpeg)
-         exit(1)
- 
--    doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args)
-+    i = 0
-+    while True:
-+        i = i + 1
-+        if args.loop:
-+            print("== Loop ", i)
-+        if doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args) or (args.loop >= 0 and i > args.loop):
-+            break
- 
-
-From f75376acc36b73deed90ae5f60b53355b9cef599 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Tue, 19 Mar 2024 15:29:02 +0000
-Subject: [PATCH 185/186] pi-util/ffperf: Make ffmpeg run options an option
-
-Also change the default run args to be a simple s/w decode
----
- pi-util/ffperf.py | 24 ++++++++++++++++++------
- 1 file changed, 18 insertions(+), 6 deletions(-)
-
-diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py
-index 65c5224cd8fb..767efe2de2fa 100755
---- a/pi-util/ffperf.py
-+++ b/pi-util/ffperf.py
-@@ -1,5 +1,6 @@
- #!/usr/bin/env python3
- 
-+import shlex
- import time
- import string
- import os
-@@ -36,14 +37,20 @@ class tstats:
-     def __gt__(self, other):
-         return self.elapsed > other.elapsed
- 
--    def time_file(name, prefix, ffmpeg="./ffmpeg"):
-+    def time_file(name, prefix, args):
-+        cmdargs = [args.ffmpeg]
-+        for x in args.args :
-+            if x == '{INPUT}':
-+                cmdargs.append(prefix + name)
-+            elif x == '{NULL}':
-+                cmdargs.append(os.devnull)
-+            else:
-+                cmdargs.append(x)
-+
-         stats = tstats()
-         stats.name = name
-         start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
--        cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw",
--                                  "-vcodec", "hevc_rpi",
--                                  "-t", "30", "-i", prefix + name,
--                                  "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
-+        cproc = subprocess.Popen(cmdargs, bufsize=-1, stdout=flog, stderr=flog);
-         pinfo = os.wait4(cproc.pid, 0)
-         end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
-         stats.elapsed = end_time - start_time
-@@ -67,6 +74,10 @@ To blank the screen before starting use "xdg-screensaver activate"
- """)
- 
-     argp.add_argument("streams", nargs='*')
-+    argp.add_argument("--args", default='-t 30 -i {INPUT} -f null {NULL}', help="""
-+ffmpeg arguments, default='-t 30 -i {INPUT} -f null {NULL}';
-+  {INPUT} is replaced by current inputfile path;
-+  {NULL} is replaced by the system null device""")
-     argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename")
-     argp.add_argument("--csv_in", help="CSV input filename")
-     argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).")
-@@ -74,6 +85,7 @@ To blank the screen before starting use "xdg-screensaver activate"
-     argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable")
- 
-     args = argp.parse_args()
-+    args.args = shlex.split(args.args)
- 
-     csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"])
-     csv_out.writeheader()
-@@ -107,7 +119,7 @@ To blank the screen before starting use "xdg-screensaver activate"
- 
-         t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999})
-         for i in range(args.repeat):
--            t = tstats.time_file(f, prefix, args.ffmpeg)
-+            t = tstats.time_file(f, prefix, args)
-             print ("...", t.times_str())
-             if t0 > t:
-                 t0 = t
-
-From b87000d0dc80ec8e0cbd4406e62bd64b5519a544 Mon Sep 17 00:00:00 2001
-From: John Cox <jc@kynesim.co.uk>
-Date: Mon, 25 Mar 2024 18:25:47 +0000
-Subject: [PATCH 186/186] pi-util/ffconf: Fix --loop help text
-
----
- pi-util/ffconf.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py
-index 702461200671..26091f3c07c2 100755
---- a/pi-util/ffconf.py
-+++ b/pi-util/ffconf.py
-@@ -224,7 +224,7 @@ if __name__ == '__main__':
-     argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name; if directory given use <dir>/ffmpeg")
-     argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests")
-     argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)")
--    argp.add_argument("--loop", default=0, type=int, help="Create yuv file (stored with log under /tmp)")
-+    argp.add_argument("--loop", default=0, type=int, help="Loop n times, or until unexpected result")
-     args = argp.parse_args()
- 
-     if not os.path.isdir(args.test_root):
+                 fate-checkasm-rv34dsp                                   \
+                 fate-checkasm-rv40dsp                                   \
+diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak
+index 5b8a294afd18..2d67ca68fcaa 100644
+--- a/tests/fate/filter-video.mak
++++ b/tests/fate/filter-video.mak
+@@ -391,9 +391,9 @@ fate-filter-fps-down-eof-pass: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3:eo
+ fate-filter-fps-start-drop: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3:start_time=1.5
+ fate-filter-fps-start-fill: CMD = framecrc -lavfi testsrc2=r=7:d=1.5,setpts=PTS+14,fps=3:start_time=1.5
+ 
+-FATE_FILTER_SAMPLES-$(call FILTERDEMDEC, FPS SCALE, MOV, QTRLE) += fate-filter-fps-cfr fate-filter-fps
+-fate-filter-fps-cfr: CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -r 30 -fps_mode cfr -pix_fmt yuv420p
+-fate-filter-fps:     CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -vf fps=30 -pix_fmt yuv420p
++#FATE_FILTER_SAMPLES-$(call FILTERDEMDEC, FPS SCALE, MOV, QTRLE) += fate-filter-fps-cfr fate-filter-fps
++#fate-filter-fps-cfr: CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -r 30 -fps_mode cfr -pix_fmt yuv420p
++#fate-filter-fps:     CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -vf fps=30 -pix_fmt yuv420p
+ 
+ FATE_FILTER_SAMPLES-$(call FILTERFRAMECRC, TESTSRC2 FSYNC, FILE_PROTOCOL) += fate-filter-fsync-up fate-filter-fsync-down
+ fate-filter-fsync-up: tests/data/maps/fsync-up
+diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils
+index fb2ed6d158fa..61fd261b6088 100644
+--- a/tests/ref/fate/imgutils
++++ b/tests/ref/fate/imgutils
+@@ -235,6 +235,9 @@ nv24            planes: 2, linesizes:  64 128   0   0, plane_sizes:  3072  6144
+ nv42            planes: 2, linesizes:  64 128   0   0, plane_sizes:  3072  6144     0     0, plane_offsets:  3072     0     0, total_size: 9216
+ y210be          planes: 1, linesizes: 256   0   0   0, plane_sizes: 12288     0     0     0, plane_offsets:     0     0     0, total_size: 12288
+ y210le          planes: 1, linesizes: 256   0   0   0, plane_sizes: 12288     0     0     0, plane_offsets:     0     0     0, total_size: 12288
++sand128         planes: 2, linesizes:  64  64   0   0, plane_sizes:  3072  1536     0     0, plane_offsets:  3072     0     0, total_size: 4608
++sand64_10       planes: 2, linesizes: 128 128   0   0, plane_sizes:  6144  3072     0     0, plane_offsets:  6144     0     0, total_size: 9216
++sand64_16       planes: 2, linesizes: 128 128   0   0, plane_sizes:  6144  3072     0     0, plane_offsets:  6144     0     0, total_size: 9216
+ x2rgb10le       planes: 1, linesizes: 256   0   0   0, plane_sizes: 12288     0     0     0, plane_offsets:     0     0     0, total_size: 12288
+ x2rgb10be       planes: 1, linesizes: 256   0   0   0, plane_sizes: 12288     0     0     0, plane_offsets:     0     0     0, total_size: 12288
+ x2bgr10le       planes: 1, linesizes: 256   0   0   0, plane_sizes: 12288     0     0     0, plane_offsets:     0     0     0, total_size: 12288
+@@ -451,6 +454,9 @@ nv24            total_size:   9216,  black_unknown_crc: 0x1c302b58,  black_tv_cr
+ nv42            total_size:   9216,  black_unknown_crc: 0x1c302b58,  black_tv_crc: 0x1c302b58,  black_pc_crc: 0xdf792ea7
+ y210be          total_size:  12288,  black_unknown_crc: 0x5483d935,  black_tv_crc: 0x5483d935,  black_pc_crc: 0x06397bf3
+ y210le          total_size:  12288,  black_unknown_crc: 0x5d8e1cf6,  black_tv_crc: 0x5d8e1cf6,  black_pc_crc: 0x8fceec45
++sand128         total_size:   4608,  black_unknown_crc: 0xd00f6cc6,  black_tv_crc: 0xd00f6cc6,  black_pc_crc: 0x234969af
++sand64_10       total_size:   9216,  black_unknown_crc: 0xee47624d,  black_tv_crc: 0xee47624d,  black_pc_crc: 0x7c6afe45
++sand64_16       total_size:   9216,  black_unknown_crc: 0xfff85b60,  black_tv_crc: 0xfff85b60,  black_pc_crc: 0xc03cff93
+ x2rgb10le       total_size:  12288,  black_unknown_crc: 0x00000000,  black_tv_crc: 0x00000000,  black_pc_crc: 0x00000000
+ x2rgb10be       total_size:  12288,  black_unknown_crc: 0x00000000,  black_tv_crc: 0x00000000,  black_pc_crc: 0x00000000
+ x2bgr10le       total_size:  12288,  black_unknown_crc: 0x00000000,  black_tv_crc: 0x00000000,  black_pc_crc: 0x00000000
+diff --git a/tests/ref/fate/source b/tests/ref/fate/source
+index 78d3a2e0fa58..a0ab58558556 100644
+--- a/tests/ref/fate/source
++++ b/tests/ref/fate/source
+@@ -1,5 +1,8 @@
+ Files without standard license headers:
+ libavcodec/file_open.c
++libavcodec/hevc-ctrls-v1.h
++libavcodec/hevc-ctrls-v2.h
++libavcodec/hevc-ctrls-v3.h
+ libavcodec/interplayacm.c
+ libavcodec/log2_tab.c
+ libavcodec/reverse.c
+@@ -26,6 +29,9 @@ compat/float/float.h
+ compat/float/limits.h
+ compat/stdbit/stdbit.h
+ libavcodec/bitstream_template.h
++libavcodec/hevc-ctrls-v1.h
++libavcodec/hevc-ctrls-v2.h
++libavcodec/hevc-ctrls-v3.h
+ tools/decode_simple.h
+ Use of av_clip() where av_clip_uintp2() could be used:
+ Use of av_clip() where av_clip_intp2() could be used:
+diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query
+index fff93bbf0efd..96fefd5b8ab7 100644
+--- a/tests/ref/fate/sws-pixdesc-query
++++ b/tests/ref/fate/sws-pixdesc-query
+@@ -23,6 +23,7 @@ is16BPS:
+   rgba64le
+   rgbaf16be
+   rgbaf16le
++  sand64_16
+   ya16be
+   ya16le
+   yuv420p16be
+@@ -75,6 +76,7 @@ isNBPS:
+   p410le
+   p412be
+   p412le
++  sand64_10
+   x2bgr10be
+   x2bgr10le
+   x2rgb10be
+@@ -245,6 +247,9 @@ isYUV:
+   p412le
+   p416be
+   p416le
++  sand128
++  sand64_10
++  sand64_16
+   uyvy422
+   uyyvyy411
+   vuya
+@@ -818,6 +823,9 @@ Packed:
+   rgbaf32le
+   rgbf32be
+   rgbf32le
++  sand128
++  sand64_10
++  sand64_16
+   uyvy422
+   uyyvyy411
+   vuya