From ac7906e00b77f6734c196224242ea207365b583e Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Fri, 13 Dec 2024 21:50:24 +0100 Subject: [PATCH] ffmpeg: update rpi patch Patch created using revisions b08d796..5f39f6c from branch test/7.1/main of https://github.com/jc-kynesim/rpi-ffmpeg --- .../ffmpeg/patches/rpi/ffmpeg-001-rpi.patch | 58391 ++++++---------- 1 file changed, 20798 insertions(+), 37593 deletions(-) diff --git a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch index f0e0ffe03b..d9dc15a210 100644 --- a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch +++ b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch @@ -1,5266 +1,33 @@ -From bedd295922f7df955c45801720503eb632711525 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 26 Apr 2021 12:34:50 +0100 -Subject: [PATCH 001/186] Add pi configs and scripts - ---- - pi-util/BUILD.txt | 59 ++++++++ - pi-util/NOTES.txt | 69 +++++++++ - pi-util/TESTMESA.txt | 82 +++++++++++ - pi-util/clean_usr_libs.sh | 26 ++++ - pi-util/conf_arm64_native.sh | 45 ++++++ - pi-util/conf_h265.2016.csv | 195 ++++++++++++++++++++++++++ - pi-util/conf_h265.2016_HEVC_v1.csv | 147 ++++++++++++++++++++ - pi-util/conf_h265.csv | 144 +++++++++++++++++++ - pi-util/conf_native.sh | 108 +++++++++++++++ - pi-util/ffconf.py | 215 +++++++++++++++++++++++++++++ - pi-util/ffperf.py | 128 +++++++++++++++++ - pi-util/genpatch.sh | 35 +++++ - pi-util/make_array.py | 23 +++ - pi-util/mkinst.sh | 5 + - pi-util/patkodi.sh | 9 ++ - pi-util/perfcmp.py | 101 ++++++++++++++ - pi-util/qem.sh | 9 ++ - pi-util/v3dusage.py | 128 +++++++++++++++++ - 18 files changed, 1528 insertions(+) - create mode 100644 pi-util/BUILD.txt - create mode 100644 pi-util/NOTES.txt - create mode 100644 pi-util/TESTMESA.txt - create mode 100755 pi-util/clean_usr_libs.sh - create mode 100644 pi-util/conf_arm64_native.sh - create mode 100644 pi-util/conf_h265.2016.csv - create mode 100644 pi-util/conf_h265.2016_HEVC_v1.csv - create mode 100644 pi-util/conf_h265.csv - create mode 100755 pi-util/conf_native.sh - create mode 100755 pi-util/ffconf.py - create mode 100755 pi-util/ffperf.py - create mode 100755 pi-util/genpatch.sh - create mode 100755 pi-util/make_array.py - create mode 100755 pi-util/mkinst.sh - create mode 100644 pi-util/patkodi.sh - create mode 100755 pi-util/perfcmp.py - create mode 100755 pi-util/qem.sh - create mode 100755 pi-util/v3dusage.py - -diff --git a/pi-util/BUILD.txt b/pi-util/BUILD.txt -new file mode 100644 -index 000000000000..b050971f63c5 ---- /dev/null -+++ b/pi-util/BUILD.txt -@@ -0,0 +1,59 @@ -+Building Pi FFmpeg -+================== -+ -+Current only building on a Pi is supported. -+This builds ffmpeg the way I've tested it -+ -+Get all dependencies - the current package dependencies are good enough -+ -+$ sudo apt-get build-dep ffmpeg -+ -+Configure using the pi-util/conf_native.sh script -+------------------------------------------------- -+ -+This sets the normal release options and creates an ouutput dir to build into -+The directory name will depend on system and options but will be under out/ -+ -+There are a few choices here -+ --mmal build including the legacy mmal-based decoders and zero-copy code -+ this requires appropriate libraries which currently will exist for -+ armv7 but not arm64 -+ --noshared -+ Build a static image rather than a shared library one. Static is -+ easier for testing as there is no need to worry about library -+ paths being confused and therefore running the wrong code, Shared -+ is what is needed, in most cases, when building for use by other -+ programs. -+ -+So for a static build -+--------------------- -+ -+$ pi-util/conf_native.sh --noshared -+ -+$ make -j8 -C out/ -+ -+You can now run ffmpeg directly from where it was built -+ -+For a shared build -+------------------ -+ -+$ pi-util/conf_native.sh -+ -+You will normally want an install target if shared. Note that the script has -+set this up to be generated in out//install, you don't have to worry -+about overwriting your system libs. -+ -+$ make -j8 -C out/ install -+ -+You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was -+built or install the image on the system - you have to be careful to get rid -+of all other ffmpeg libs or confusion may result. There is a little script -+that wipes all other versions - obviously use with care! -+ -+$ sudo pi-util/clean_usr_libs.sh -+ -+Then simply copying from the install to /usr works -+ -+$ sudo cp -r out//install/* /usr -+ -+ -diff --git a/pi-util/NOTES.txt b/pi-util/NOTES.txt -new file mode 100644 -index 000000000000..fcce72226a32 ---- /dev/null -+++ b/pi-util/NOTES.txt -@@ -0,0 +1,69 @@ -+Notes on the hevc_rpi decoder & associated support code -+------------------------------------------------------- -+ -+There are 3 main parts to the existing code: -+ -+1) The decoder - this is all in libavcodec as rpi_hevc*. -+ -+2) A few filters to deal with Sand frames and a small patch to -+automatically select the sand->i420 converter when required. -+ -+3) A kludge in ffmpeg.c to display the decoded video. This could & should -+be converted into a proper ffmpeg display module. -+ -+ -+Decoder -+------- -+ -+The decoder is a modified version of the existing ffmpeg hevc decoder. -+Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder. -+More complex bitstreams can be up to ~200% faster but particularly easy -+streams can cut its advantage down to ~50%. This means that a Pi3+ can -+display nearly all 8-bit 1080p30 streams and with some overclocking it can -+display most lower bitrate 10-bit 1080p30 streams - this latter case is -+not helped by the requirement to downsample to 8-bit before display on a -+Pi. -+ -+It has had co-processor offload added for inter-pred and large block -+residual transform. Various parts have had optimized ARM NEON assembler -+added and the existing ARM asm sections have been profiled and -+re-optimized for A53. The main C code has been substantially reworked at -+its lower levels in an attempt to optimize it and minimize memory -+bandwidth. To some extent code paths that deal with frame types that it -+doesn't support have been pruned. -+ -+It outputs frames in Broadcom Sand format. This is a somewhat annoying -+layout that doesn't fit into ffmpegs standard frame descriptions. It has -+vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for -+the stripe followed by interleaved U & V, that is then followed by the Y -+for the next stripe, etc. The final stripe is always padded to -+stripe-width. This is used in an attempt to help with cache locality and -+cut down on the number of dram bank switches. It is annoying to use for -+inter-pred with conventional processing but the way the Pi QPU (which is -+used for inter-pred) works means that it has negligible downsides here and -+the improved memory performance exceeds the overhead of the increased -+complexity in the rest of the code. -+ -+Frames must be allocated out of GPU memory (as otherwise they can't be -+accessed by the co-processors). Utility functions (in rpi_zc.c) have been -+written to make this easier. As the frames are already in GPU memory they -+can be displayed by the Pi h/w without any further copying. -+ -+ -+Known non-features -+------------------ -+ -+Frame allocation should probably be done in some other way in order to fit -+into the standard framework better. -+ -+Sand frames are currently declared as software frames, there is an -+argument that they should be hardware frames but they aren't really. -+ -+There must be a better way of auto-selecting the hevc_rpi decoder over the -+normal s/w hevc decoder, but I became confused by the existing h/w -+acceleration framework and what I wanted to do didn't seem to fit in -+neatly. -+ -+Display should be a proper device rather than a kludge in ffmpeg.c -+ -+ -diff --git a/pi-util/TESTMESA.txt b/pi-util/TESTMESA.txt -new file mode 100644 -index 000000000000..92bc13a3dfa1 ---- /dev/null -+++ b/pi-util/TESTMESA.txt -@@ -0,0 +1,82 @@ -+# Setup & Build instructions for testing Argon30 mesa support (on Pi4) -+ -+# These assume that the drm_mmal test for Sand8 has been built on this Pi -+# as build relies on many of the same files -+ -+# 1st get everything required to build ffmpeg -+# If sources aren't already enabled on your Pi then enable them -+sudo su -+sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list -+sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list -+mv /tmp/sources.list /etc/apt/ -+mv /tmp/raspi.list /etc/apt/sources.list.d/ -+apt update -+ -+# Get dependancies -+sudo apt build-dep ffmpeg -+ -+sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev -+ -+# Enable H265 V4L2 request decoder -+sudo su -+echo dtoverlay=rpivid-v4l2 >> /boot/config.txt -+# You may also want to add more CMA if you are going to try 4k videos -+# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read -+# dtoverlay=vc4-fkms-v3d,cma-512 -+reboot -+# Check it has turned up -+ls -la /dev/video* -+# This should include video19 -+# crw-rw----+ 1 root video 81, 7 Aug 4 17:25 /dev/video19 -+ -+# Currently on the Pi the linux headers from the debian distro don't match -+# the kernel that we ship and we need to update them - hopefully this step -+# will be unneeded in the future -+sudo apt install git bc bison flex libssl-dev make -+git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y -+cd linux -+KERNEL=kernel7l -+make bcm2711_defconfig -+make headers_install -+sudo cp -r usr/include/linux /usr/include -+cd .. -+ -+# Config - this builds a staticly linked ffmpeg which is easier for testing -+pi-util/conf_native.sh --noshared -+ -+# Build (this is a bit dull) -+# If you want to poke the source the libavdevice/egl_vout.c contains the -+# output code - -+cd out/armv7-static-rel -+ -+# Check that you have actually configured V4L2 request -+grep HEVC_V4L2REQUEST config.h -+# You are hoping for -+# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1 -+# if you get 0 then the config has failed -+ -+make -j6 -+ -+# Grab test streams -+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv -+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv -+wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv -+ -+# Test i420 output (works currently) -+./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl - -+ -+# Test Sand8 output - doesn't currently work but should once you have -+# Sand8 working in drm_mmal. I can't guarantee that this will work as -+# I can't test this path with a known working format, but the debug looks -+# good. If this doesn't work & drm_mmal does with sand8 then come back to me -+# The "show_all 1" forces vout to display every frame otherwise it drops any -+# frame that would cause it to block -+./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl - -+ -+# Test Sand30 - doesn't currently work -+# (Beware that when FFmpeg errors out it often leaves your teminal window -+# in a state where you need to reset it) -+./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl - -+ -+ -+ -diff --git a/pi-util/clean_usr_libs.sh b/pi-util/clean_usr_libs.sh -new file mode 100755 -index 000000000000..b3b2d5509de0 ---- /dev/null -+++ b/pi-util/clean_usr_libs.sh -@@ -0,0 +1,26 @@ -+set -e -+U=/usr/lib/arm-linux-gnueabihf -+rm -f $U/libavcodec.* -+rm -f $U/libavdevice.* -+rm -f $U/libavfilter.* -+rm -f $U/libavformat.* -+rm -f $U/libavutil.* -+rm -f $U/libswresample.* -+rm -f $U/libswscale.* -+U=/usr/lib/arm-linux-gnueabihf/neon/vfp -+rm -f $U/libavcodec.* -+rm -f $U/libavdevice.* -+rm -f $U/libavfilter.* -+rm -f $U/libavformat.* -+rm -f $U/libavutil.* -+rm -f $U/libswresample.* -+rm -f $U/libswscale.* -+U=/usr/lib/aarch64-linux-gnu -+rm -f $U/libavcodec.* -+rm -f $U/libavdevice.* -+rm -f $U/libavfilter.* -+rm -f $U/libavformat.* -+rm -f $U/libavutil.* -+rm -f $U/libswresample.* -+rm -f $U/libswscale.* -+ -diff --git a/pi-util/conf_arm64_native.sh b/pi-util/conf_arm64_native.sh -new file mode 100644 -index 000000000000..9e3bbfa1908a ---- /dev/null -+++ b/pi-util/conf_arm64_native.sh -@@ -0,0 +1,45 @@ -+echo "Configure for ARM64 native build" -+ -+#RPI_KEEPS="-save-temps=obj" -+ -+SHARED_LIBS="--enable-shared" -+if [ "$1" == "--noshared" ]; then -+ SHARED_LIBS="--disable-shared" -+ echo Static libs -+ OUT=out/arm64-static-rel -+else -+ echo Shared libs -+ OUT=out/arm64-shared-rel -+fi -+ -+mkdir -p $OUT -+cd $OUT -+ -+A=aarch64-linux-gnu -+USR_PREFIX=`pwd`/install -+LIB_PREFIX=$USR_PREFIX/lib/$A -+INC_PREFIX=$USR_PREFIX/include/$A -+ -+../../configure \ -+ --prefix=$USR_PREFIX\ -+ --libdir=$LIB_PREFIX\ -+ --incdir=$INC_PREFIX\ -+ --disable-stripping\ -+ --disable-thumb\ -+ --disable-mmal\ -+ --enable-sand\ -+ --enable-v4l2-request\ -+ --enable-libdrm\ -+ --enable-epoxy\ -+ --enable-libudev\ -+ --enable-vout-drm\ -+ --enable-vout-egl\ -+ $SHARED_LIBS\ -+ --extra-cflags="-ggdb" -+ -+# --enable-decoder=hevc_rpi\ -+# --enable-extra-warnings\ -+# --arch=armv71\ -+ -+# gcc option for getting asm listing -+# -Wa,-ahls -diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv -new file mode 100644 -index 000000000000..4efd5d1c676d ---- /dev/null -+++ b/pi-util/conf_h265.2016.csv -@@ -0,0 +1,195 @@ -+1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8 -+1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8 -+1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8 -+1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8 -+1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8 -+1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8 -+1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8 -+1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8 -+1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8 -+1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8 -+1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8 -+1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8 -+1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8 -+1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8 -+1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8 -+1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8 -+1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8 -+1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8 -+1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8 -+1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8 -+1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8 -+1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10 -+1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8 -+1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8 -+1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8 -+1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8 -+1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8 -+1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8 -+1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8 -+1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8 -+1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8 -+1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8 -+1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8 -+1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8 -+1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8 -+1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8 -+1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8 -+1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8 -+1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8 -+1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8 -+1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8 -+1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8 -+1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10 -+1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8 -+1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8 -+1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8 -+1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8 -+1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8 -+1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8 -+1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8 -+1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8 -+1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8 -+1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8 -+1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8 -+1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8 -+1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8 -+1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8 -+1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8 -+1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8 -+1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8 -+1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8 -+1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8 -+1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8 -+1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8 -+1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8 -+1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8 -+1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8 -+1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8 -+1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8 -+1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8 -+1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8 -+1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8 -+1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8 -+1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8 -+1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8 -+1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8 -+1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8 -+1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8 -+1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8 -+1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8 -+1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8 -+1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8 -+1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8 -+1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8 -+1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8 -+1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8 -+1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8 -+1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8 -+1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8 -+1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8 -+1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8 -+1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8 -+1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8 -+1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8 -+1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8 -+1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8 -+1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8 -+1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8 -+1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8 -+1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8 -+1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8 -+1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8 -+1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8 -+1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8 -+1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8 -+1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8 -+1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8 -+1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8 -+1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8 -+1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8 -+1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8 -+1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8 -+1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8 -+1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8 -+1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8 -+1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8 -+1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8 -+1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8 -+1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8 -+1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8 -+1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8 -+1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8 -+1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8 -+1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8 -+1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8 -+1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8 -+1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8 -+3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10 -+1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8 -+1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8 -+3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8 -+1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10 -+1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8 -+1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8 -+1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10 -+1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10 -+1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8 -+1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10 -+1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8 -+1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10 -+1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8 -+1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10 -+1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8 -+1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10 -+1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8 -+1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10 -+1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8 -+1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0 -+0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8 -+0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8 -+0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10 -+0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8 -+0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8 -+1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0 -+0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8 -+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10 -+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8 -+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8 -+0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8 -+0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10 -+0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8 -+0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8 -+0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8 -+1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10 -+1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0 -+1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0 -+1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0 -+1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0 -+1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0 -+1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0 -+0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0 -+0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8 -+0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8 -+1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0 -+1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8 -+1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0 -+1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0 -+1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0 -+1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0 -+1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0 -+1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0 -+1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0 -+0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8 -+0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10 -+0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10 -+0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8 -+0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8 -+0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8 -+0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8 -+0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8 -+1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8 -+1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8 -+1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8 -+1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8 -+1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8 -diff --git a/pi-util/conf_h265.2016_HEVC_v1.csv b/pi-util/conf_h265.2016_HEVC_v1.csv -new file mode 100644 -index 000000000000..60826412715c ---- /dev/null -+++ b/pi-util/conf_h265.2016_HEVC_v1.csv -@@ -0,0 +1,147 @@ -+1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5 -+1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5 -+1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5 -+1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5 -+1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5 -+1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5 -+1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5 -+1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5 -+1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5 -+1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5 -+1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5 -+1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5 -+1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5 -+1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5 -+1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5 -+1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5 -+1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5 -+1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5 -+1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5 -+1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5 -+1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5 -+1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5 -+1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5 -+1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5 -+1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5 -+1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5 -+1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5 -+1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5 -+1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5 -+1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5 -+1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5 -+1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5 -+1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5 -+1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5 -+1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5 -+1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5 -+1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5 -+1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5 -+1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5 -+1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5 -+1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5 -+1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5 -+1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5 -+1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5 -+1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5 -+1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5 -+1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5 -+1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5 -+1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5 -+1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5 -+1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5 -+1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5 -+1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5 -+1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5 -+1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5 -+1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5 -+1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5 -+1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5 -+1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5 -+1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5 -+1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5 -+1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5 -+1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5 -+1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5 -+1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5 -+1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5 -+1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5 -+1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5 -+1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5 -+1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5 -+1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5 -+1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5 -+1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5 -+1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5 -+1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5 -+1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5 -+1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5 -+1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5 -+1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5 -+1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5 -+1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5 -+1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5 -+1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5 -+1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5 -+1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5 -+1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5 -+1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5 -+1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5 -+1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5 -+1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5 -+1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5 -+1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5 -+1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5 -+1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5 -+1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5 -+1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5 -+1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5 -+1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5 -+1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5 -+1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5 -+1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5 -+1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5 -+1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5 -+1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5 -+1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5 -+1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5 -+1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5 -+1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5 -+1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5 -+1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5 -+2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt -+2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt -+1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5 -+1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5 -+1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5 -+1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5 -+1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5 -+1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5 -+1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5 -+1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5 -+1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5 -+1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5 -+1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5 -+1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5 -+1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5 -+1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5 -+1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5 -+3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth -+1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5 -+1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5 -+3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ??? -+1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5 -+1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5 -+1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5 -+1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5 -+1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5 -+1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5 -+1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5 -+1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5 -+1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5 -+1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5 -+1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5 -+1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5 -+1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5 -+1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5 -+1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5 -+1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5 -diff --git a/pi-util/conf_h265.csv b/pi-util/conf_h265.csv -new file mode 100644 -index 000000000000..fc14f2a3c2bb ---- /dev/null -+++ b/pi-util/conf_h265.csv -@@ -0,0 +1,144 @@ -+1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5 -+1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5 -+1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5 -+1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5 -+1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5 -+1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5 -+1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5 -+1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5 -+1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5 -+1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5 -+1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5 -+1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5 -+1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5 -+1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5 -+1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5 -+1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5 -+1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5 -+1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5 -+1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5 -+1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5 -+1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5 -+1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5 -+1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5 -+1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5 -+1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5 -+1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5 -+1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5 -+1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5 -+1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5 -+1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5 -+1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5 -+1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5 -+1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5 -+1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5 -+1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5 -+1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5 -+1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5 -+1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5 -+1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5 -+1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5 -+1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5 -+1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5 -+1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5 -+1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5 -+1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5 -+1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5 -+1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5 -+1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5 -+1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5 -+1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5 -+1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5 -+1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5 -+1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5 -+1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5 -+1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5 -+1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5 -+1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5 -+1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5 -+1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5 -+1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5 -+1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5 -+1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5 -+1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5 -+1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5 -+1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5 -+1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5 -+1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5 -+1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5 -+1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5 -+1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5 -+1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5 -+1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5 -+1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5 -+1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5 -+1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5 -+1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5 -+1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5 -+1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5 -+1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5 -+1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5 -+1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5 -+1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5 -+1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5 -+1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5 -+1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5 -+1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5 -+1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5 -+1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5 -+1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5 -+1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5 -+1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5 -+1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5 -+1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5 -+1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5 -+1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5 -+1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5 -+1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5 -+1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5 -+1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5 -+1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5 -+1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5 -+1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5 -+1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5 -+1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5 -+1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5 -+1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5 -+1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5 -+1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5 -+1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5 -+1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5 -+1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5 -+1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5 -+1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5 -+1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5 -+1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5 -+1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5 -+1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5 -+1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5 -+1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5 -+1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5 -+1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5 -+1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5 -+1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5 -+1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5 -+1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5 -+0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched -+1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5 -+1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5 -+1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5 -+1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5 -+1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5 -+1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5 -+1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5 -+1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5 -+1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5 -+1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5 -+1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5 -+1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5 -+1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5 -+1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5 -+1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5 -+1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5 -+1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5 -+1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5 -diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh -new file mode 100755 -index 000000000000..65576846e81f ---- /dev/null -+++ b/pi-util/conf_native.sh -@@ -0,0 +1,108 @@ -+echo "Configure for native build" -+ -+FFSRC=`pwd` -+MC=`dpkg --print-architecture` -+BUILDBASE=$FFSRC/out -+ -+#RPI_KEEPS="-save-temps=obj" -+RPI_KEEPS="" -+ -+NOSHARED= -+MMAL= -+ -+while [ "$1" != "" ] ; do -+ case $1 in -+ --noshared) -+ NOSHARED=1 -+ ;; -+ --mmal) -+ MMAL=1 -+ ;; -+ *) -+ echo "Usage $0: [--noshared] [--mmal]" -+ exit 1 -+ ;; -+ esac -+ shift -+done -+ -+ -+MCOPTS= -+RPI_INCLUDES= -+RPI_LIBDIRS= -+RPI_DEFINES= -+RPI_EXTRALIBS= -+ -+if [ "$MC" == "arm64" ]; then -+ echo "M/C aarch64" -+ A=aarch64-linux-gnu -+ B=arm64 -+elif [ "$MC" == "armhf" ]; then -+ echo "M/C armv7" -+ A=arm-linux-gnueabihf -+ B=armv7 -+ MCOPTS="--arch=armv6t2 --cpu=cortex-a7" -+ RPI_DEFINES=-mfpu=neon-vfpv4 -+else -+ echo Unexpected architecture $MC -+ exit 1 -+fi -+ -+if [ $MMAL ]; then -+ RPI_OPT_VC=/opt/vc -+ RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux" -+ RPI_LIBDIRS="-L$RPI_OPT_VC/lib" -+ RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000" -+ RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group" -+ RPIOPTS="--enable-mmal --enable-rpi" -+else -+ RPIOPTS="--disable-mmal --enable-sand" -+fi -+ -+C=`lsb_release -sc` -+V=`cat RELEASE` -+ -+SHARED_LIBS="--enable-shared" -+if [ $NOSHARED ]; then -+ SHARED_LIBS="--disable-shared" -+ OUT=$BUILDBASE/$B-$C-$V-static-rel -+ echo Static libs -+else -+ echo Shared libs -+ OUT=$BUILDBASE/$B-$C-$V-shared-rel -+fi -+ -+USR_PREFIX=$OUT/install -+LIB_PREFIX=$USR_PREFIX/lib/$A -+INC_PREFIX=$USR_PREFIX/include/$A -+ -+echo Destination directory: $OUT -+mkdir -p $OUT -+# Nothing under here need worry git - including this .gitignore! -+echo "**" > $BUILDBASE/.gitignore -+cd $OUT -+ -+$FFSRC/configure \ -+ --prefix=$USR_PREFIX\ -+ --libdir=$LIB_PREFIX\ -+ --incdir=$INC_PREFIX\ -+ $MCOPTS\ -+ --disable-stripping\ -+ --disable-thumb\ -+ --enable-v4l2-request\ -+ --enable-libdrm\ -+ --enable-epoxy\ -+ --enable-libudev\ -+ --enable-vout-egl\ -+ --enable-vout-drm\ -+ $SHARED_LIBS\ -+ $RPIOPTS\ -+ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\ -+ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\ -+ --extra-ldflags="$RPI_LIBDIRS"\ -+ --extra-libs="$RPI_EXTRALIBS"\ -+ --extra-version="rpi" -+ -+ -+# gcc option for getting asm listing -+# -Wa,-ahls -diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py -new file mode 100755 -index 000000000000..657568014e57 ---- /dev/null -+++ b/pi-util/ffconf.py -@@ -0,0 +1,215 @@ -+#!/usr/bin/env python3 -+ -+import string -+import os -+import subprocess -+import re -+import argparse -+import sys -+import csv -+from stat import * -+ -+CODEC_HEVC_RPI = 1 -+HWACCEL_RPI = 2 -+HWACCEL_DRM = 3 -+HWACCEL_VAAPI = 4 -+ -+def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec): -+ hwaccel = "" -+ if dectype == HWACCEL_RPI: -+ hwaccel = "rpi" -+ elif dectype == HWACCEL_DRM: -+ hwaccel = "drm" -+ elif dectype == HWACCEL_VAAPI: -+ hwaccel = "vaapi" -+ -+ pix_fmt = [] -+ if pix == "8": -+ pix_fmt = ["-pix_fmt", "yuv420p"] -+ elif pix == "10": -+ pix_fmt = ["-pix_fmt", "yuv420p10le"] -+ elif pix == "12": -+ pix_fmt = ["-pix_fmt", "yuv420p12le"] -+ -+ tmp_root = "/tmp" -+ -+ names = srcname.split('/') -+ while len(names) > 1: -+ tmp_root = os.path.join(tmp_root, names[0]) -+ del names[0] -+ name = names[0] -+ -+ if not os.path.exists(tmp_root): -+ os.makedirs(tmp_root) -+ -+ dec_file = os.path.join(tmp_root, name + ".dec.md5") -+ try: -+ os.remove(dec_file) -+ except: -+ pass -+ -+ flog = open(os.path.join(tmp_root, name + ".log"), "wt") -+ -+ ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file] -+ -+ # Unaligned needed for cropping conformance -+ if hwaccel: -+ rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT) -+ else: -+ rstr = subprocess.call( -+ [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file], -+ stdout=flog, stderr=subprocess.STDOUT) -+ -+ try: -+ m1 = None -+ m2 = None -+ with open(os.path.join(fileroot, md5_file)) as f: -+ for line in f: -+ m1 = re.search("[0-9a-f]{32}", line.lower()) -+ if m1: -+ break -+ -+ with open(dec_file) as f: -+ m2 = re.search("[0-9a-f]{32}", f.readline()) -+ except: -+ pass -+ -+ if m1 and m2 and m1.group() == m2.group(): -+ print("Match: " + m1.group(), file=flog) -+ rv = 0 -+ elif not m1: -+ print("****** Cannot find m1", file=flog) -+ rv = 3 -+ elif not m2: -+ print("****** Cannot find m2", file=flog) -+ rv = 2 -+ else: -+ print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog) -+ rv = 1 -+ flog.close() -+ return rv -+ -+def scandir(root): -+ aconf = [] -+ ents = os.listdir(root) -+ ents.sort(key=str.lower) -+ for name in ents: -+ test_path = os.path.join(root, name) -+ if S_ISDIR(os.stat(test_path).st_mode): -+ files = os.listdir(test_path) -+ es_file = "?" -+ md5_file = "?" -+ for f in files: -+ (base, ext) = os.path.splitext(f) -+ if base[0] == '.': -+ pass -+ elif ext == ".bit" or ext == ".bin": -+ es_file = f -+ elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")): -+ if md5_file == "?": -+ md5_file = f -+ elif base[-3:] == "yuv": -+ md5_file = f -+ aconf.append((1, name, es_file, md5_file)) -+ return aconf -+ -+def runtest(name, tests): -+ if not tests: -+ return True -+ for t in tests: -+ if name[0:len(t)] == t or name.find("/" + t) != -1: -+ return True -+ return False -+ -+def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec): -+ unx_failures = [] -+ unx_success = [] -+ failures = 0 -+ successes = 0 -+ for a in csva: -+ exp_test = int(a[0]) -+ if (exp_test and runtest(a[1], tests)): -+ name = a[1] -+ print ("==== ", name, end="") -+ sys.stdout.flush() -+ -+ rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec) -+ if (rv == 0): -+ successes += 1 -+ else: -+ failures += 1 -+ -+ if (rv == 0): -+ if exp_test == 2: -+ print(": * OK *") -+ unx_success.append(name) -+ else: -+ print(": ok") -+ elif exp_test == 2 and rv == 1: -+ print(": fail") -+ elif exp_test == 3 and rv == 2: -+ # Call an expected "crash" an abort -+ print(": abort") -+ else: -+ unx_failures.append(name) -+ if rv == 1: -+ print(": * FAIL *") -+ elif (rv == 2) : -+ print(": * CRASH *") -+ elif (rv == 3) : -+ print(": * MD5 MISSING *") -+ else : -+ print(": * BANG *") -+ -+ if unx_failures or unx_success: -+ print("Unexpected Failures:", unx_failures) -+ print("Unexpected Success: ", unx_success) -+ else: -+ print("All tests normal:", successes, "ok,", failures, "failed") -+ -+ -+class ConfCSVDialect(csv.Dialect): -+ delimiter = ',' -+ doublequote = True -+ lineterminator = '\n' -+ quotechar='"' -+ quoting = csv.QUOTE_MINIMAL -+ skipinitialspace = True -+ strict = True -+ -+if __name__ == '__main__': -+ -+ argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester") -+ argp.add_argument("tests", nargs='*') -+ argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line") -+ argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line") -+ argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line") -+ argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test") -+ argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir") -+ argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename") -+ argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use") -+ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") -+ args = argp.parse_args() -+ -+ if args.csvgen: -+ csv.writer(sys.stdout).writerows(scandir(args.test_root)) -+ exit(0) -+ -+ with open(args.csv, 'rt') as csvfile: -+ csva = [a for a in csv.reader(csvfile, ConfCSVDialect())] -+ -+ dectype = CODEC_HEVC_RPI -+ if os.path.exists("/dev/rpivid-hevcmem"): -+ dectype = HWACCEL_RPI -+ if args.drm or os.path.exists("/sys/module/rpivid_hevc"): -+ dectype = HWACCEL_DRM -+ -+ if args.pi4: -+ dectype = HWACCEL_RPI -+ elif args.drm: -+ dectype = HWACCEL_DRM -+ elif args.vaapi: -+ dectype = HWACCEL_VAAPI -+ -+ doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg) -+ -diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py -new file mode 100755 -index 000000000000..65c5224cd8fb ---- /dev/null -+++ b/pi-util/ffperf.py -@@ -0,0 +1,128 @@ -+#!/usr/bin/env python3 -+ -+import time -+import string -+import os -+import tempfile -+import subprocess -+import re -+import argparse -+import sys -+import csv -+from stat import * -+ -+class tstats: -+ close_threshold = 0.01 -+ -+ def __init__(self, stats_dict=None): -+ if stats_dict != None: -+ self.name = stats_dict["name"] -+ self.elapsed = float(stats_dict["elapsed"]) -+ self.user = float(stats_dict["user"]) -+ self.sys = float(stats_dict["sys"]) -+ -+ def times_str(self): -+ ctime = self.sys + self.user -+ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) -+ -+ def dict(self): -+ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} -+ -+ def is_close(self, other): -+ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold -+ -+ def __lt__(self, other): -+ return self.elapsed < other.elapsed -+ def __gt__(self, other): -+ return self.elapsed > other.elapsed -+ -+ def time_file(name, prefix, ffmpeg="./ffmpeg"): -+ stats = tstats() -+ stats.name = name -+ start_time = time.clock_gettime(time.CLOCK_MONOTONIC); -+ cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw", -+ "-vcodec", "hevc_rpi", -+ "-t", "30", "-i", prefix + name, -+ "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog); -+ pinfo = os.wait4(cproc.pid, 0) -+ end_time = time.clock_gettime(time.CLOCK_MONOTONIC); -+ stats.elapsed = end_time - start_time -+ stats.user = pinfo[2].ru_utime -+ stats.sys = pinfo[2].ru_stime -+ return stats -+ -+ -+def common_prefix(s1, s2): -+ for i in range(min(len(s1),len(s2))): -+ if s1[i] != s2[i]: -+ return s1[:i] -+ return s1[:i+1] -+ -+def main(): -+ global flog -+ -+ argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog=""" -+To blank the screen before starting use "xdg-screensaver activate" -+(For some reason this doesn't seem to work from within python). -+""") -+ -+ argp.add_argument("streams", nargs='*') -+ argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename") -+ argp.add_argument("--csv_in", help="CSV input filename") -+ argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).") -+ argp.add_argument("--repeat", default=3, type=int, help="Run repeat count") -+ argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable") -+ -+ args = argp.parse_args() -+ -+ csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"]) -+ csv_out.writeheader() -+ -+ stats_in = {} -+ if args.csv_in != None: -+ with open(args.csv_in, 'r', newline='') as f_in: -+ stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} -+ -+ flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt") -+ -+ streams = args.streams -+ if not streams: -+ if not stats_in: -+ print ("No source streams specified") -+ return 1 -+ prefix = "" if args.prefix == None else args.prefix -+ streams = [k for k in stats_in] -+ elif args.prefix != None: -+ prefix = args.prefix -+ else: -+ prefix = streams[0] -+ for f in streams[1:]: -+ prefix = common_prefix(prefix, f) -+ pp = prefix.rpartition(os.sep) -+ prefix = pp[0] + pp[1] -+ streams = [s[len(prefix):] for s in streams] -+ -+ for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()): -+ print ("====", f) -+ -+ t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999}) -+ for i in range(args.repeat): -+ t = tstats.time_file(f, prefix, args.ffmpeg) -+ print ("...", t.times_str()) -+ if t0 > t: -+ t0 = t -+ -+ if t0.name in stats_in: -+ pstat = stats_in[t0.name] -+ print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str()) -+ -+ csv_out.writerow(t0.dict()) -+ -+ print () -+ -+ return 0 -+ -+ -+if __name__ == '__main__': -+ exit(main()) -+ -diff --git a/pi-util/genpatch.sh b/pi-util/genpatch.sh -new file mode 100755 -index 000000000000..0948a68a7ad7 ---- /dev/null -+++ b/pi-util/genpatch.sh -@@ -0,0 +1,35 @@ -+set -e -+ -+NOPATCH= -+if [ "$1" == "--notag" ]; then -+ shift -+ NOPATCH=1 -+fi -+ -+if [ "$1" == "" ]; then -+ echo Usage: $0 [--notag] \ -+ echo e.g.: $0 mmal_4 -+ exit 1 -+fi -+ -+VERSION=`cat RELEASE` -+if [ "$VERSION" == "" ]; then -+ echo Can\'t find version RELEASE -+ exit 1 -+fi -+ -+PATCHFILE=../ffmpeg-$VERSION-$1.patch -+ -+if [ $NOPATCH ]; then -+ echo Not tagged -+else -+ # Only continue if we are all comitted -+ git diff --name-status --exit-code -+ -+ PATCHTAG=pi/$VERSION/$1 -+ echo Tagging: $PATCHTAG -+ -+ git tag $PATCHTAG -+fi -+echo Generating patch: $PATCHFILE -+git diff n$VERSION -- > $PATCHFILE -diff --git a/pi-util/make_array.py b/pi-util/make_array.py -new file mode 100755 -index 000000000000..67b22d2d517f ---- /dev/null -+++ b/pi-util/make_array.py -@@ -0,0 +1,23 @@ -+#!/usr/bin/env python -+ -+# Usage -+# make_array file.bin -+# Produces file.h with array of bytes. -+# -+import sys -+for file in sys.argv[1:]: -+ prefix,suffix = file.split('.') -+ assert suffix=='bin' -+ name=prefix.split('/')[-1] -+ print 'Converting',file -+ with open(prefix+'.h','wb') as out: -+ print >>out, 'static const unsigned char',name,'[] = {' -+ with open(file,'rb') as fd: -+ i = 0 -+ for byte in fd.read(): -+ print >>out, '0x%02x, ' % ord(byte), -+ i = i + 1 -+ if i % 8 == 0: -+ print >>out, ' // %04x' % (i - 8) -+ print >>out,'};' -+ -diff --git a/pi-util/mkinst.sh b/pi-util/mkinst.sh -new file mode 100755 -index 000000000000..271a39e8460a ---- /dev/null -+++ b/pi-util/mkinst.sh -@@ -0,0 +1,5 @@ -+set -e -+ -+make install -+ -+cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr -diff --git a/pi-util/patkodi.sh b/pi-util/patkodi.sh -new file mode 100644 -index 000000000000..dcd05a606e85 ---- /dev/null -+++ b/pi-util/patkodi.sh -@@ -0,0 +1,9 @@ -+set -e -+KODIBASE=/home/jc/rpi/kodi/xbmc -+JOBS=-j20 -+make $JOBS -+git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch -+make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS -+make -C $KODIBASE/build install -+ -+ -diff --git a/pi-util/perfcmp.py b/pi-util/perfcmp.py -new file mode 100755 -index 000000000000..e44cfa0c3c4d ---- /dev/null -+++ b/pi-util/perfcmp.py -@@ -0,0 +1,101 @@ -+#!/usr/bin/env python3 -+ -+import time -+import string -+import os -+import tempfile -+import subprocess -+import re -+import argparse -+import sys -+import csv -+from stat import * -+ -+class tstats: -+ close_threshold = 0.01 -+ -+ def __init__(self, stats_dict=None): -+ if stats_dict != None: -+ self.name = stats_dict["name"] -+ self.elapsed = float(stats_dict["elapsed"]) -+ self.user = float(stats_dict["user"]) -+ self.sys = float(stats_dict["sys"]) -+ -+ def times_str(self): -+ ctime = self.sys + self.user -+ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) -+ -+ def dict(self): -+ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} -+ -+ def is_close(self, other): -+ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold -+ -+ def __lt__(self, other): -+ return self.elapsed < other.elapsed -+ def __gt__(self, other): -+ return self.elapsed > other.elapsed -+ -+ def time_file(name, prefix): -+ stats = tstats() -+ stats.name = name -+ start_time = time.clock_gettime(time.CLOCK_MONOTONIC); -+ cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name, -+ "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog); -+ pinfo = os.wait4(cproc.pid, 0) -+ end_time = time.clock_gettime(time.CLOCK_MONOTONIC); -+ stats.elapsed = end_time - start_time -+ stats.user = pinfo[2].ru_utime -+ stats.sys = pinfo[2].ru_stime -+ return stats -+ -+ -+def common_prefix(s1, s2): -+ for i in range(min(len(s1),len(s2))): -+ if s1[i] != s2[i]: -+ return s1[:i] -+ return s1[:i+1] -+ -+def main(): -+ argp = argparse.ArgumentParser(description="FFmpeg performance compare") -+ -+ argp.add_argument("stream0", help="CSV to compare") -+ argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare") -+ -+ args = argp.parse_args() -+ -+ with open(args.stream0, 'r', newline='') as f_in: -+ stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} -+ with open(args.stream1, 'r', newline='') as f_in: -+ stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} -+ -+ print (args.stream0, "<<-->>", args.stream1) -+ print () -+ -+ for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()): -+ if not (f in stats0) : -+ print (" XX :", f) -+ continue -+ if not (f in stats1) : -+ print (" XX :", f) -+ continue -+ -+ s0 = stats0[f] -+ s1 = stats1[f] -+ -+ pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0 -+ thresh = 0.3 -+ tc = 6 -+ -+ nchar = min(tc - 1, int(abs(pcent) / thresh)) -+ cc = " -- " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar -+ -+ print ("%6.2f %s%6.2f (%+5.2f) : %s" % -+ (s0.elapsed, cc, s1.elapsed, pcent, f)) -+ -+ return 0 -+ -+ -+if __name__ == '__main__': -+ exit(main()) -+ -diff --git a/pi-util/qem.sh b/pi-util/qem.sh -new file mode 100755 -index 000000000000..a4dbb6eacd18 ---- /dev/null -+++ b/pi-util/qem.sh -@@ -0,0 +1,9 @@ -+TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex -+QASM=python\ ../local/bin/qasm.py -+SRC_FILE=libavcodec/rpi_hevc_shader.qasm -+DST_BASE=shader -+ -+cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR -+$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c -+$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h -+ -diff --git a/pi-util/v3dusage.py b/pi-util/v3dusage.py -new file mode 100755 -index 000000000000..5935a11ca553 ---- /dev/null -+++ b/pi-util/v3dusage.py -@@ -0,0 +1,128 @@ -+#!/usr/bin/env python -+ -+import sys -+import argparse -+import re -+ -+def do_logparse(logname): -+ -+ rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ') -+ rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$') -+ rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$') -+ rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$') -+ -+ ttotal = {'idle':0.0} -+ tstart = {} -+ qctotal = {} -+ qtstotal = {} -+ l2hits = {} -+ l2total = {} -+ time0 = None -+ idle_start = None -+ qpu_op_no = 0 -+ op_count = 0 -+ -+ with open(logname, "rt") as infile: -+ for line in infile: -+ match = rmatch.match(line) -+ if match: -+# print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":" -+ time = float(match.group(1)) -+ unit = match.group(3) -+ opstart = not match.group(2) -+ optype = match.group(7) -+ hascb = match.group(8) != "0" -+ -+ if unit == 'qpu1': -+ unit = unit + "." + str(qpu_op_no) -+ if not opstart: -+ if hascb or optype == 'EXECUTE_SYNC': -+ qpu_op_no = 0 -+ else: -+ qpu_op_no += 1 -+ -+ # Ignore sync type -+ if optype == 'EXECUTE_SYNC': -+ continue -+ -+ if not time0: -+ time0 = time -+ -+ if opstart: -+ tstart[unit] = time; -+ elif unit in tstart: -+ op_count += 1 -+ if not unit in ttotal: -+ ttotal[unit] = 0.0 -+ ttotal[unit] += time - tstart[unit] -+ del tstart[unit] -+ -+ if not idle_start and not tstart: -+ idle_start = time -+ elif idle_start and tstart: -+ ttotal['idle'] += time - idle_start -+ idle_start = None -+ -+ match = rqcycle.match(line) -+ if match: -+ unit = "qpu1." + str(qpu_op_no) -+ if not unit in qctotal: -+ qctotal[unit] = 0 -+ qctotal[unit] += int(match.group(2)) -+ -+ match = rqtscycle.match(line) -+ if match: -+ unit = "qpu1." + str(qpu_op_no) -+ if not unit in qtstotal: -+ qtstotal[unit] = 0 -+ qtstotal[unit] += int(match.group(2)) -+ -+ match = rl2hits.match(line) -+ if match: -+ unit = "qpu1." + str(qpu_op_no) -+ if not unit in l2total: -+ l2total[unit] = 0 -+ l2hits[unit] = 0 -+ l2total[unit] += int(match.group(3)) -+ if match.group(2) == "hits": -+ l2hits[unit] += int(match.group(3)) -+ -+ -+ if not time0: -+ print "No v3d profile records found" -+ else: -+ tlogged = time - time0 -+ -+ print "Logged time:", tlogged, " Op count:", op_count -+ for unit in sorted(ttotal): -+ print b'%6s: %10.3f %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged) -+ print -+ for unit in sorted(qctotal): -+ if not unit in qtstotal: -+ qtstotal[unit] = 0; -+ print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit]) -+ if unit in l2total: -+ print b' L2Total: %10d, hits: %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit]) -+ -+ -+ -+if __name__ == '__main__': -+ argp = argparse.ArgumentParser( -+ formatter_class=argparse.RawDescriptionHelpFormatter, -+ description="QPU/VPU perf summary from VC logging", -+ epilog = """ -+Will also summarise TMU stalls if logging requests set in qpu noflush param -+in the profiled code. -+ -+Example use: -+ vcgencmd set_logging level=0xc0 -+ -+ sudo vcdbg log msg >& t.log -+ v3dusage.py t.log -+""") -+ -+ argp.add_argument("logfile") -+ args = argp.parse_args() -+ -+ do_logparse(args.logfile) -+ - -From b6b137b1d039b42b15325f87f55cb7c38e2270b0 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 11:27:39 +0100 -Subject: [PATCH 002/186] Add sand pix fmts & conversion fns - ---- - configure | 3 + - libavutil/Makefile | 3 + - libavutil/arm/Makefile | 1 + - libavutil/arm/rpi_sand_neon.S | 768 ++++++++++++++++++++++++++++++++++ - libavutil/arm/rpi_sand_neon.h | 99 +++++ - libavutil/pixdesc.c | 44 ++ - libavutil/pixfmt.h | 6 + - libavutil/rpi_sand_fn_pw.h | 227 ++++++++++ - libavutil/rpi_sand_fns.c | 353 ++++++++++++++++ - libavutil/rpi_sand_fns.h | 183 ++++++++ - 10 files changed, 1687 insertions(+) - create mode 100644 libavutil/arm/rpi_sand_neon.S - create mode 100644 libavutil/arm/rpi_sand_neon.h - create mode 100644 libavutil/rpi_sand_fn_pw.h - create mode 100644 libavutil/rpi_sand_fns.c - create mode 100644 libavutil/rpi_sand_fns.h - diff --git a/configure b/configure -index 3cd3bdfb4496..5a5ada20711f 100755 +index d77a55b653c1..5c0854f6270b 100755 --- a/configure +++ b/configure -@@ -344,6 +344,7 @@ External library support: +@@ -202,6 +202,7 @@ External library support: + --disable-bzlib disable bzlib [autodetect] + --disable-coreimage disable Apple CoreImage framework [autodetect] + --enable-chromaprint enable audio fingerprinting with chromaprint [no] ++ --disable-epoxy disable epoxy [autodetect] + --enable-frei0r enable frei0r video filtering [no] + --enable-gcrypt enable gcrypt, needed for rtmp(t)e support + if openssl, librtmp or gmp is not used [no] +@@ -287,6 +288,7 @@ External library support: + --enable-libtorch enable Torch as one DNN backend [no] + --enable-libtwolame enable MP2 encoding via libtwolame [no] + --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] ++ --disable-libudev disable libudev [autodetect] + --enable-libv4l2 enable libv4l2/v4l-utils [no] + --enable-libvidstab enable video stabilization using vid.stab [no] + --enable-libvmaf enable vmaf filter via libvmaf [no] +@@ -353,12 +355,16 @@ External library support: --enable-libvpl enable Intel oneVPL code via libvpl if libmfx is not used [no] --enable-libnpp enable Nvidia Performance Primitives-based code [no] --enable-mmal enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no] + --enable-sand enable sand video formats [rpi] ++ --enable-vout-drm enable the vout_drm module - for internal testing only [no] ++ --enable-vout-egl enable the vout_egl module - for internal testing only [no] --disable-nvdec disable Nvidia video decoding acceleration (via hwaccel) [autodetect] --disable-nvenc disable Nvidia video encoding code [autodetect] --enable-omx enable OpenMAX IL code [no] -@@ -1930,6 +1931,7 @@ FEATURE_LIST=" - omx_rpi - runtime_cpudetect - safe_bitstream_reader -+ sand - shared - small - static -@@ -2495,6 +2497,7 @@ CONFIG_EXTRA=" - rtpdec - rtpenc_chain - rv34dsp -+ sand - scene_sad - sinewin - snappy -diff --git a/libavutil/Makefile b/libavutil/Makefile -index dc9012f9a83a..e33f5db0996a 100644 ---- a/libavutil/Makefile -+++ b/libavutil/Makefile -@@ -73,6 +73,7 @@ HEADERS = adler32.h \ - rational.h \ - replaygain.h \ - ripemd.h \ -+ rpi_sand_fns.h \ - samplefmt.h \ - sha.h \ - sha512.h \ -@@ -192,6 +193,7 @@ OBJS-$(CONFIG_MACOS_KPERF) += macos_kperf.o - OBJS-$(CONFIG_MEDIACODEC) += hwcontext_mediacodec.o - OBJS-$(CONFIG_OPENCL) += hwcontext_opencl.o - OBJS-$(CONFIG_QSV) += hwcontext_qsv.o -+OBJS-$(CONFIG_SAND) += rpi_sand_fns.o - OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o - OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o - OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o -@@ -212,6 +214,7 @@ SKIPHEADERS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.h - SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h - SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h - SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h -+SKIPHEADERS-$(CONFIG-RPI) += rpi_sand_fn_pw.h - SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h - SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h - SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h -diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile -index 5da44b05427a..b74b7c4e2f25 100644 ---- a/libavutil/arm/Makefile -+++ b/libavutil/arm/Makefile -@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o \ - - NEON-OBJS += arm/float_dsp_init_neon.o \ - arm/float_dsp_neon.o \ -+ arm/rpi_sand_neon.o \ -diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S -new file mode 100644 -index 000000000000..80890fe9854b ---- /dev/null -+++ b/libavutil/arm/rpi_sand_neon.S -@@ -0,0 +1,768 @@ -+/* -+Copyright (c) 2018 Raspberry Pi (Trading) Ltd. -+All rights reserved. -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ * Neither the name of the copyright holder nor the -+ names of its contributors may be used to endorse or promote products -+ derived from this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY -+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+Authors: John Cox -+*/ -+ -+#include "libavutil/arm/asm.S" -+ -+ -+@ General notes: -+@ Having done some timing on this in sand8->y8 (Pi4) -+@ vst1 (680fps) is a bit faster than vstm (660fps) -+@ vldm (680fps) is noticably faster than vld1 (480fps) -+@ (or it might be that a mix is what is required) -+@ -+@ At least on a Pi4 it is no more expensive to have a single auto-inc register -+@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted -+@ the latter was better) -+@ -+@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless -+@ the memory is uncached. -+@ As these are Sand -> planar we can assume that src is going to be aligned but -+@ it is possible that dest isn't (converting to .yuv or other packed format). -+@ Luckily vst1 is faster than vstm :-) so all is well -+@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4 -+@ .8 stores would let us do non-word aligned stores into uncached but it -+@ probably isn't worth it. -+ -+ -+ -+ -+@ void ff_rpi_sand128b_stripe_to_8_10( -+@ uint8_t * dest, // [r0] -+@ const uint8_t * src1, // [r1] -+@ const uint8_t * src2, // [r2] -+@ unsigned int lines); // [r3] -+ -+.macro stripe2_to_8, bit_depth -+ vpush {q4-q7} -+1: -+ vldm r1!, {q0-q7} -+ subs r3, #1 -+ vldm r2!, {q8-q15} -+ vqrshrn.u16 d0, q0, #\bit_depth - 8 -+ vqrshrn.u16 d1, q1, #\bit_depth - 8 -+ vqrshrn.u16 d2, q2, #\bit_depth - 8 -+ vqrshrn.u16 d3, q3, #\bit_depth - 8 -+ vqrshrn.u16 d4, q4, #\bit_depth - 8 -+ vqrshrn.u16 d5, q5, #\bit_depth - 8 -+ vqrshrn.u16 d6, q6, #\bit_depth - 8 -+ vqrshrn.u16 d7, q7, #\bit_depth - 8 -+ vqrshrn.u16 d8, q8, #\bit_depth - 8 -+ vqrshrn.u16 d9, q9, #\bit_depth - 8 -+ vqrshrn.u16 d10, q10, #\bit_depth - 8 -+ vqrshrn.u16 d11, q11, #\bit_depth - 8 -+ vqrshrn.u16 d12, q12, #\bit_depth - 8 -+ vqrshrn.u16 d13, q13, #\bit_depth - 8 -+ vqrshrn.u16 d14, q14, #\bit_depth - 8 -+ vqrshrn.u16 d15, q15, #\bit_depth - 8 -+ vstm r0!, {q0-q7} -+ bne 1b -+ vpop {q4-q7} -+ bx lr -+.endm -+ -+function ff_rpi_sand128b_stripe_to_8_10, export=1 -+ stripe2_to_8 10 -+endfunc -+ -+@ void ff_rpi_sand8_lines_to_planar_y8( -+@ uint8_t * dest, // [r0] -+@ unsigned int dst_stride, // [r1] -+@ const uint8_t * src, // [r2] -+@ unsigned int src_stride1, // [r3] Ignored - assumed 128 -+@ unsigned int src_stride2, // [sp, #0] -> r3 -+@ unsigned int _x, // [sp, #4] Ignored - 0 -+@ unsigned int y, // [sp, #8] (r7 in prefix) -+@ unsigned int _w, // [sp, #12] -> r6 (cur r5) -+@ unsigned int h); // [sp, #16] -> r7 -+@ -+@ Assumes that we are starting on a stripe boundary and that overreading -+@ within the stripe is OK. However it does respect the dest size for writing -+ -+function ff_rpi_sand8_lines_to_planar_y8, export=1 -+ push {r4-r8, lr} @ +24 L -+ ldr r3, [sp, #24] -+ ldr r6, [sp, #36] -+ ldr r7, [sp, #32] @ y -+ lsl r3, #7 -+ sub r1, r6 -+ add r8, r2, r7, lsl #7 -+ ldr r7, [sp, #40] -+ -+10: -+ mov r2, r8 -+ add r4, r0, #24 -+ mov r5, r6 -+ mov lr, #0 -+1: -+ vldm r2, {q8-q15} -+ add r2, r3 -+ subs r5, #128 -+ blt 2f -+ vst1.8 {d16, d17, d18, d19}, [r0]! -+ vst1.8 {d20, d21, d22, d23}, [r0]! -+ vst1.8 {d24, d25, d26, d27}, [r0]! -+ vst1.8 {d28, d29, d30, d31}, [r0]! -+ bne 1b -+11: -+ subs r7, #1 -+ add r0, r1 -+ add r8, #128 -+ bne 10b -+ -+ pop {r4-r8, pc} -+ -+@ Partial final write -+2: -+ cmp r5, #64-128 -+ blt 1f -+ vst1.8 {d16, d17, d18, d19}, [r0]! -+ vst1.8 {d20, d21, d22, d23}, [r0]! -+ beq 11b -+ vmov q8, q12 -+ vmov q9, q13 -+ sub r5, #64 -+ vmov q10, q14 -+ vmov q11, q15 -+1: -+ cmp r5, #32-128 -+ blt 1f -+ vst1.8 {d16, d17, d18, d19}, [r0]! -+ beq 11b -+ vmov q8, q10 -+ sub r5, #32 -+ vmov q9, q11 -+1: -+ cmp r5, #16-128 -+ blt 1f -+ vst1.8 {d16, d17}, [r0]! -+ beq 11b -+ sub r5, #16 -+ vmov q8, q9 -+1: -+ cmp r5, #8-128 -+ blt 1f -+ vst1.8 {d16}, [r0]! -+ beq 11b -+ sub r5, #8 -+ vmov d16, d17 -+1: -+ cmp r5, #4-128 -+ blt 1f -+ vst1.32 {d16[0]}, [r0]! -+ beq 11b -+ sub r5, #4 -+ vshr.u64 d16, #32 -+1: -+ cmp r5, #2-128 -+ blt 1f -+ vst1.16 {d16[0]}, [r0]! -+ beq 11b -+ vst1.8 {d16[2]}, [r0]! -+ b 11b -+1: -+ vst1.8 {d16[0]}, [r0]! -+ b 11b -+endfunc -+ -+@ void ff_rpi_sand8_lines_to_planar_c8( -+@ uint8_t * dst_u, // [r0] -+@ unsigned int dst_stride_u, // [r1] -+@ uint8_t * dst_v, // [r2] -+@ unsigned int dst_stride_v, // [r3] -+@ const uint8_t * src, // [sp, #0] -> r4, r5 -+@ unsigned int stride1, // [sp, #4] 128 -+@ unsigned int stride2, // [sp, #8] -> r8 -+@ unsigned int _x, // [sp, #12] 0 -+@ unsigned int y, // [sp, #16] (r7 in prefix) -+@ unsigned int _w, // [sp, #20] -> r12, r6 -+@ unsigned int h); // [sp, #24] -> r7 -+@ -+@ Assumes that we are starting on a stripe boundary and that overreading -+@ within the stripe is OK. However it does respect the dest size for writing -+ -+function ff_rpi_sand8_lines_to_planar_c8, export=1 -+ push {r4-r8, lr} @ +24 -+ -+ ldr r5, [sp, #24] -+ ldr r8, [sp, #32] -+ ldr r7, [sp, #40] -+ ldr r6, [sp, #44] -+ lsl r8, #7 -+ add r5, r5, r7, lsl #7 -+ sub r1, r1, r6 -+ sub r3, r3, r6 -+ ldr r7, [sp, #48] -+ vpush {q4-q7} -+ -+10: -+ mov r4, r5 -+ mov r12, r6 -+1: -+ subs r12, #64 -+ vldm r4, {q0-q7} -+ add r4, r8 -+ it gt -+ vldmgt r4, {q8-q15} -+ add r4, r8 -+ -+ vuzp.8 q0, q1 -+ vuzp.8 q2, q3 -+ vuzp.8 q4, q5 -+ vuzp.8 q6, q7 -+ -+ vuzp.8 q8, q9 -+ vuzp.8 q10, q11 -+ vuzp.8 q12, q13 -+ vuzp.8 q14, q15 -+ subs r12, #64 -+ -+ @ Rearrange regs so we can use vst1 with 4 regs -+ vswp q1, q2 -+ vswp q5, q6 -+ vswp q9, q10 -+ vswp q13, q14 -+ blt 2f -+ -+ vst1.8 {d0, d1, d2, d3 }, [r0]! -+ vst1.8 {d8, d9, d10, d11}, [r0]! -+ vst1.8 {d16, d17, d18, d19}, [r0]! -+ vst1.8 {d24, d25, d26, d27}, [r0]! -+ -+ vst1.8 {d4, d5, d6, d7 }, [r2]! -+ vst1.8 {d12, d13, d14, d15}, [r2]! -+ vst1.8 {d20, d21, d22, d23}, [r2]! -+ vst1.8 {d28, d29, d30, d31}, [r2]! -+ bne 1b -+11: -+ subs r7, #1 -+ add r5, #128 -+ add r0, r1 -+ add r2, r3 -+ bne 10b -+ vpop {q4-q7} -+ pop {r4-r8,pc} -+ -+2: -+ cmp r12, #64-128 -+ blt 1f -+ vst1.8 {d0, d1, d2, d3 }, [r0]! -+ vst1.8 {d8, d9, d10, d11}, [r0]! -+ vst1.8 {d4, d5, d6, d7 }, [r2]! -+ vst1.8 {d12, d13, d14, d15}, [r2]! -+ beq 11b -+ sub r12, #64 -+ vmov q0, q8 -+ vmov q1, q9 -+ vmov q2, q10 -+ vmov q3, q11 -+ vmov q4, q12 -+ vmov q5, q13 -+ vmov q6, q14 -+ vmov q7, q15 -+1: -+ cmp r12, #32-128 -+ blt 1f -+ vst1.8 {d0, d1, d2, d3 }, [r0]! -+ vst1.8 {d4, d5, d6, d7 }, [r2]! -+ beq 11b -+ sub r12, #32 -+ vmov q0, q4 -+ vmov q1, q5 -+ vmov q2, q6 -+ vmov q3, q7 -+1: -+ cmp r12, #16-128 -+ blt 1f -+ vst1.8 {d0, d1 }, [r0]! -+ vst1.8 {d4, d5 }, [r2]! -+ beq 11b -+ sub r12, #16 -+ vmov q0, q1 -+ vmov q2, q3 -+1: -+ cmp r12, #8-128 -+ blt 1f -+ vst1.8 {d0}, [r0]! -+ vst1.8 {d4}, [r2]! -+ beq 11b -+ sub r12, #8 -+ vmov d0, d1 -+ vmov d4, d5 -+1: -+ cmp r12, #4-128 -+ blt 1f -+ vst1.32 {d0[0]}, [r0]! -+ vst1.32 {d4[0]}, [r2]! -+ beq 11b -+ sub r12, #4 -+ vmov s0, s1 -+ vmov s8, s9 -+1: -+ cmp r12, #2-128 -+ blt 1f -+ vst1.16 {d0[0]}, [r0]! -+ vst1.16 {d4[0]}, [r2]! -+ beq 11b -+ vst1.8 {d0[2]}, [r0]! -+ vst1.8 {d4[2]}, [r2]! -+ b 11b -+1: -+ vst1.8 {d0[0]}, [r0]! -+ vst1.8 {d4[0]}, [r2]! -+ b 11b -+endfunc -+ -+ -+ -+@ void ff_rpi_sand30_lines_to_planar_y16( -+@ uint8_t * dest, // [r0] -+@ unsigned int dst_stride, // [r1] -+@ const uint8_t * src, // [r2] -+@ unsigned int src_stride1, // [r3] Ignored - assumed 128 -+@ unsigned int src_stride2, // [sp, #0] -> r3 -+@ unsigned int _x, // [sp, #4] Ignored - 0 -+@ unsigned int y, // [sp, #8] (r7 in prefix) -+@ unsigned int _w, // [sp, #12] -> r6 (cur r5) -+@ unsigned int h); // [sp, #16] -> r7 -+@ -+@ Assumes that we are starting on a stripe boundary and that overreading -+@ within the stripe is OK. However it does respect the dest size for writing -+ -+function ff_rpi_sand30_lines_to_planar_y16, export=1 -+ push {r4-r8, lr} @ +24 -+ ldr r3, [sp, #24] -+ ldr r6, [sp, #36] -+ ldr r7, [sp, #32] @ y -+ mov r12, #48 -+ vmov.u16 q15, #0x3ff -+ sub r3, #1 -+ lsl r3, #7 -+ sub r1, r1, r6, lsl #1 -+ add r8, r2, r7, lsl #7 -+ ldr r7, [sp, #40] -+ -+10: -+ mov r2, r8 -+ add r4, r0, #24 -+ mov r5, r6 -+ mov lr, #0 -+1: -+ vldm r2!, {q10-q13} -+ add lr, #64 -+ -+ vshr.u32 q14, q10, #20 @ Cannot vshrn.u32 #20! -+ ands lr, #127 -+ vshrn.u32 d2, q10, #10 -+ vmovn.u32 d0, q10 -+ vmovn.u32 d4, q14 -+ -+ vshr.u32 q14, q11, #20 -+ it eq -+ addeq r2, r3 -+ vshrn.u32 d3, q11, #10 -+ vmovn.u32 d1, q11 -+ vmovn.u32 d5, q14 -+ -+ subs r5, #48 -+ vand q0, q15 -+ vand q1, q15 -+ vand q2, q15 -+ -+ vshr.u32 q14, q12, #20 -+ vshrn.u32 d18, q12, #10 -+ vmovn.u32 d16, q12 -+ vmovn.u32 d20, q14 -+ -+ vshr.u32 q14, q13, #20 -+ vshrn.u32 d19, q13, #10 -+ vmovn.u32 d17, q13 -+ vmovn.u32 d21, q14 -+ -+ vand q8, q15 -+ vand q9, q15 -+ vand q10, q15 -+ blt 2f -+ -+ vst3.16 {d0, d2, d4}, [r0], r12 -+ vst3.16 {d1, d3, d5}, [r4], r12 -+ vst3.16 {d16, d18, d20}, [r0], r12 -+ vst3.16 {d17, d19, d21}, [r4], r12 -+ -+ bne 1b -+ -+11: -+ subs r7, #1 -+ add r0, r1 -+ add r8, #128 -+ bne 10b -+ -+ pop {r4-r8, pc} -+ -+@ Partial final write -+2: -+ cmp r5, #24-48 -+ blt 1f -+ vst3.16 {d0, d2, d4}, [r0], r12 -+ vst3.16 {d1, d3, d5}, [r4] -+ beq 11b -+ vmov q0, q8 -+ sub r5, #24 -+ vmov q1, q9 -+ vmov q2, q10 -+1: -+ cmp r5, #12-48 -+ blt 1f -+ vst3.16 {d0, d2, d4}, [r0]! -+ beq 11b -+ vmov d0, d1 -+ sub r5, #12 -+ vmov d2, d3 -+ vmov d4, d5 -+1: -+ cmp r5, #6-48 -+ add r4, r0, #6 @ avoid [r0]! on sequential instructions -+ blt 1f -+ vst3.16 {d0[0], d2[0], d4[0]}, [r0] -+ vst3.16 {d0[1], d2[1], d4[1]}, [r4] -+ add r0, #12 -+ beq 11b -+ vmov s0, s1 -+ sub r5, #6 -+ vmov s4, s5 -+ vmov s8, s9 -+1: -+ cmp r5, #3-48 -+ blt 1f -+ vst3.16 {d0[0], d2[0], d4[0]}, [r0]! -+ beq 11b -+ sub r5, #3 -+ vshr.u32 d0, #16 -+ vshr.u32 d2, #16 -+1: -+ cmp r5, #2-48 -+ blt 1f -+ vst2.16 {d0[0], d2[0]}, [r0]! -+ b 11b -+1: -+ vst1.16 {d0[0]}, [r0]! -+ b 11b -+ -+endfunc -+ -+ -+@ void ff_rpi_sand30_lines_to_planar_c16( -+@ uint8_t * dst_u, // [r0] -+@ unsigned int dst_stride_u, // [r1] -+@ uint8_t * dst_v, // [r2] -+@ unsigned int dst_stride_v, // [r3] -+@ const uint8_t * src, // [sp, #0] -> r4, r5 -+@ unsigned int stride1, // [sp, #4] 128 -+@ unsigned int stride2, // [sp, #8] -> r8 -+@ unsigned int _x, // [sp, #12] 0 -+@ unsigned int y, // [sp, #16] (r7 in prefix) -+@ unsigned int _w, // [sp, #20] -> r6, r9 -+@ unsigned int h); // [sp, #24] -> r7 -+@ -+@ Assumes that we are starting on a stripe boundary and that overreading -+@ within the stripe is OK. However it does respect the dest size for writing -+ -+function ff_rpi_sand30_lines_to_planar_c16, export=1 -+ push {r4-r10, lr} @ +32 -+ ldr r5, [sp, #32] -+ ldr r8, [sp, #40] -+ ldr r7, [sp, #48] -+ ldr r9, [sp, #52] -+ mov r12, #48 -+ vmov.u16 q15, #0x3ff -+ sub r8, #1 -+ lsl r8, #7 -+ add r5, r5, r7, lsl #7 -+ sub r1, r1, r9, lsl #1 -+ sub r3, r3, r9, lsl #1 -+ ldr r7, [sp, #56] -+10: -+ mov lr, #0 -+ mov r4, r5 -+ mov r6, r9 -+1: -+ vldm r4!, {q0-q3} -+ add lr, #64 -+ -+ @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2 -+ vshr.u32 q14, q0, #20 -+ vshrn.u32 d16, q0, #10 -+ vmovn.u32 d18, q0 -+ ands lr, #127 -+ vmovn.u32 d20, q14 -+ -+ vshr.u32 q14, q1, #20 -+ vshrn.u32 d17, q1, #10 -+ vmovn.u32 d19, q1 -+ vmovn.u32 d21, q14 -+ -+ vshr.u32 q14, q2, #20 -+ vshrn.u32 d22, q2, #10 -+ vmovn.u32 d24, q2 -+ vmovn.u32 d26, q14 -+ -+ vshr.u32 q14, q3, #20 -+ vshrn.u32 d23, q3, #10 -+ vmovn.u32 d25, q3 -+ add r10, r0, #24 -+ vmovn.u32 d27, q14 -+ -+ it eq -+ addeq r4, r8 -+ vuzp.16 q8, q11 -+ vuzp.16 q9, q12 -+ vuzp.16 q10, q13 -+ -+ @ q8 V0, V3,.. -> q0 -+ @ q9 U0, U3... -+ @ q10 U1, U4... -+ @ q11 U2, U5,.. -+ @ q12 V1, V4,.. -> q1 -+ @ q13 V2, V5,.. -> q2 -+ -+ subs r6, #24 -+ vand q11, q15 -+ vand q9, q15 -+ vand q10, q15 -+ vand q0, q8, q15 -+ vand q1, q12, q15 -+ vand q2, q13, q15 -+ -+ blt 2f -+ -+ vst3.16 {d18, d20, d22}, [r0], r12 -+ vst3.16 {d19, d21, d23}, [r10] -+ add r10, r2, #24 -+ vst3.16 {d0, d2, d4}, [r2], r12 -+ vst3.16 {d1, d3, d5}, [r10] -+ -+ bne 1b -+ -+11: -+ subs r7, #1 -+ add r5, #128 -+ add r0, r1 -+ add r2, r3 -+ bne 10b -+ -+ pop {r4-r10, pc} -+ -+@ Partial final write -+2: -+ cmp r6, #-12 -+ blt 1f -+ vst3.16 {d18, d20, d22}, [r0]! -+ vst3.16 {d0, d2, d4}, [r2]! -+ beq 11b -+ vmov d18, d19 -+ vmov d20, d21 -+ vmov d22, d23 -+ sub r6, #12 -+ vmov d0, d1 -+ vmov d2, d3 -+ vmov d4, d5 -+1: -+ cmp r6, #-18 -+ @ Rezip here as it makes the remaining tail handling easier -+ vzip.16 d0, d18 -+ vzip.16 d2, d20 -+ vzip.16 d4, d22 -+ blt 1f -+ vst3.16 {d0[1], d2[1], d4[1]}, [r0]! -+ vst3.16 {d0[0], d2[0], d4[0]}, [r2]! -+ vst3.16 {d0[3], d2[3], d4[3]}, [r0]! -+ vst3.16 {d0[2], d2[2], d4[2]}, [r2]! -+ beq 11b -+ vmov d0, d18 -+ vmov d2, d20 -+ sub r6, #6 -+ vmov d4, d22 -+1: -+ cmp r6, #-21 -+ blt 1f -+ vst3.16 {d0[1], d2[1], d4[1]}, [r0]! -+ vst3.16 {d0[0], d2[0], d4[0]}, [r2]! -+ beq 11b -+ vmov s4, s5 -+ sub r6, #3 -+ vmov s0, s1 -+1: -+ cmp r6, #-22 -+ blt 1f -+ vst2.16 {d0[1], d2[1]}, [r0]! -+ vst2.16 {d0[0], d2[0]}, [r2]! -+ b 11b -+1: -+ vst1.16 {d0[1]}, [r0]! -+ vst1.16 {d0[0]}, [r2]! -+ b 11b -+ -+endfunc -+ -+@ void ff_rpi_sand30_lines_to_planar_p010( -+@ uint8_t * dest, // [r0] -+@ unsigned int dst_stride, // [r1] -+@ const uint8_t * src, // [r2] -+@ unsigned int src_stride1, // [r3] Ignored - assumed 128 -+@ unsigned int src_stride2, // [sp, #0] -> r3 -+@ unsigned int _x, // [sp, #4] Ignored - 0 -+@ unsigned int y, // [sp, #8] (r7 in prefix) -+@ unsigned int _w, // [sp, #12] -> r6 (cur r5) -+@ unsigned int h); // [sp, #16] -> r7 -+@ -+@ Assumes that we are starting on a stripe boundary and that overreading -+@ within the stripe is OK. However it does respect the dest size for writing -+ -+function ff_rpi_sand30_lines_to_planar_p010, export=1 -+ push {r4-r8, lr} @ +24 -+ ldr r3, [sp, #24] -+ ldr r6, [sp, #36] -+ ldr r7, [sp, #32] @ y -+ mov r12, #48 -+ vmov.u16 q15, #0xffc0 -+ sub r3, #1 -+ lsl r3, #7 -+ sub r1, r1, r6, lsl #1 -+ add r8, r2, r7, lsl #7 -+ ldr r7, [sp, #40] -+ -+10: -+ mov r2, r8 -+ add r4, r0, #24 -+ mov r5, r6 -+ mov lr, #0 -+1: -+ vldm r2!, {q10-q13} -+ add lr, #64 -+ -+ vshl.u32 q14, q10, #6 -+ ands lr, #127 -+ vshrn.u32 d4, q10, #14 -+ vshrn.u32 d2, q10, #4 -+ vmovn.u32 d0, q14 -+ -+ vshl.u32 q14, q11, #6 -+ it eq -+ addeq r2, r3 -+ vshrn.u32 d5, q11, #14 -+ vshrn.u32 d3, q11, #4 -+ vmovn.u32 d1, q14 -+ -+ subs r5, #48 -+ vand q2, q15 -+ vand q1, q15 -+ vand q0, q15 -+ -+ vshl.u32 q14, q12, #6 -+ vshrn.u32 d20, q12, #14 -+ vshrn.u32 d18, q12, #4 -+ vmovn.u32 d16, q14 -+ -+ vshl.u32 q14, q13, #6 -+ vshrn.u32 d21, q13, #14 -+ vshrn.u32 d19, q13, #4 -+ vmovn.u32 d17, q14 -+ -+ vand q10, q15 -+ vand q9, q15 -+ vand q8, q15 -+ blt 2f -+ -+ vst3.16 {d0, d2, d4}, [r0], r12 -+ vst3.16 {d1, d3, d5}, [r4], r12 -+ vst3.16 {d16, d18, d20}, [r0], r12 -+ vst3.16 {d17, d19, d21}, [r4], r12 -+ -+ bne 1b -+ -+11: -+ subs r7, #1 -+ add r0, r1 -+ add r8, #128 -+ bne 10b -+ -+ pop {r4-r8, pc} -+ -+@ Partial final write -+2: -+ cmp r5, #24-48 -+ blt 1f -+ vst3.16 {d0, d2, d4}, [r0], r12 -+ vst3.16 {d1, d3, d5}, [r4] -+ beq 11b -+ vmov q0, q8 -+ sub r5, #24 -+ vmov q1, q9 -+ vmov q2, q10 -+1: -+ cmp r5, #12-48 -+ blt 1f -+ vst3.16 {d0, d2, d4}, [r0]! -+ beq 11b -+ vmov d0, d1 -+ sub r5, #12 -+ vmov d2, d3 -+ vmov d4, d5 -+1: -+ cmp r5, #6-48 -+ add r4, r0, #6 @ avoid [r0]! on sequential instructions -+ blt 1f -+ vst3.16 {d0[0], d2[0], d4[0]}, [r0] -+ vst3.16 {d0[1], d2[1], d4[1]}, [r4] -+ add r0, #12 -+ beq 11b -+ vmov s0, s1 -+ sub r5, #6 -+ vmov s4, s5 -+ vmov s8, s9 -+1: -+ cmp r5, #3-48 -+ blt 1f -+ vst3.16 {d0[0], d2[0], d4[0]}, [r0]! -+ beq 11b -+ sub r5, #3 -+ vshr.u32 d0, #16 -+ vshr.u32 d2, #16 -+1: -+ cmp r5, #2-48 -+ blt 1f -+ vst2.16 {d0[0], d2[0]}, [r0]! -+ b 11b -+1: -+ vst1.16 {d0[0]}, [r0]! -+ b 11b -+ -+endfunc -+ -+ -+ -diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h -new file mode 100644 -index 000000000000..447f367bea8f ---- /dev/null -+++ b/libavutil/arm/rpi_sand_neon.h -@@ -0,0 +1,99 @@ -+/* -+Copyright (c) 2020 Raspberry Pi (Trading) Ltd. -+All rights reserved. -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ * Neither the name of the copyright holder nor the -+ names of its contributors may be used to endorse or promote products -+ derived from this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY -+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+Authors: John Cox -+*/ -+ -+#ifndef AVUTIL_ARM_SAND_NEON_H -+#define AVUTIL_ARM_SAND_NEON_H -+ -+void ff_rpi_sand128b_stripe_to_8_10( -+ uint8_t * dest, // [r0] -+ const uint8_t * src1, // [r1] -+ const uint8_t * src2, // [r2] -+ unsigned int lines); // [r3] -+ -+void ff_rpi_sand8_lines_to_planar_y8( -+ uint8_t * dest, // [r0] -+ unsigned int dst_stride, // [r1] -+ const uint8_t * src, // [r2] -+ unsigned int src_stride1, // [r3] Ignored - assumed 128 -+ unsigned int src_stride2, // [sp, #0] -> r3 -+ unsigned int _x, // [sp, #4] Ignored - 0 -+ unsigned int y, // [sp, #8] (r7 in prefix) -+ unsigned int _w, // [sp, #12] -> r6 (cur r5) -+ unsigned int h); // [sp, #16] -> r7 -+ -+void ff_rpi_sand8_lines_to_planar_c8( -+ uint8_t * dst_u, // [r0] -+ unsigned int dst_stride_u, // [r1] -+ uint8_t * dst_v, // [r2] -+ unsigned int dst_stride_v, // [r3] -+ const uint8_t * src, // [sp, #0] -> r4, r5 -+ unsigned int stride1, // [sp, #4] 128 -+ unsigned int stride2, // [sp, #8] -> r8 -+ unsigned int _x, // [sp, #12] 0 -+ unsigned int y, // [sp, #16] (r7 in prefix) -+ unsigned int _w, // [sp, #20] -> r12, r6 -+ unsigned int h); // [sp, #24] -> r7 -+ -+void ff_rpi_sand30_lines_to_planar_y16( -+ uint8_t * dest, // [r0] -+ unsigned int dst_stride, // [r1] -+ const uint8_t * src, // [r2] -+ unsigned int src_stride1, // [r3] Ignored - assumed 128 -+ unsigned int src_stride2, // [sp, #0] -> r3 -+ unsigned int _x, // [sp, #4] Ignored - 0 -+ unsigned int y, // [sp, #8] (r7 in prefix) -+ unsigned int _w, // [sp, #12] -> r6 (cur r5) -+ unsigned int h); // [sp, #16] -> r7 -+ -+void ff_rpi_sand30_lines_to_planar_c16( -+ uint8_t * dst_u, // [r0] -+ unsigned int dst_stride_u, // [r1] -+ uint8_t * dst_v, // [r2] -+ unsigned int dst_stride_v, // [r3] -+ const uint8_t * src, // [sp, #0] -> r4, r5 -+ unsigned int stride1, // [sp, #4] 128 -+ unsigned int stride2, // [sp, #8] -> r8 -+ unsigned int _x, // [sp, #12] 0 -+ unsigned int y, // [sp, #16] (r7 in prefix) -+ unsigned int _w, // [sp, #20] -> r6, r9 -+ unsigned int h); // [sp, #24] -> r7 -+ -+void ff_rpi_sand30_lines_to_planar_p010( -+ uint8_t * dest, // [r0] -+ unsigned int dst_stride, // [r1] -+ const uint8_t * src, // [r2] -+ unsigned int src_stride1, // [r3] Ignored - assumed 128 -+ unsigned int src_stride2, // [sp, #0] -> r3 -+ unsigned int _x, // [sp, #4] Ignored - 0 -+ unsigned int y, // [sp, #8] (r7 in prefix) -+ unsigned int _w, // [sp, #12] -> r6 (cur r5) -+ unsigned int h); // [sp, #16] -> r7 -+ -+#endif // AVUTIL_ARM_SAND_NEON_H -+ -diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c -index 62a2ae08d907..cb73521ea75c 100644 ---- a/libavutil/pixdesc.c -+++ b/libavutil/pixdesc.c -@@ -2717,6 +2717,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { - .flags = AV_PIX_FMT_FLAG_RGB | AV_PIX_FMT_FLAG_FLOAT | - AV_PIX_FMT_FLAG_ALPHA, - }, -+ [AV_PIX_FMT_SAND128] = { -+ .name = "sand128", -+ .nb_components = 3, -+ .log2_chroma_w = 1, -+ .log2_chroma_h = 1, -+ .comp = { -+ { 0, 1, 0, 0, 8 }, /* Y */ -+ { 1, 2, 0, 0, 8 }, /* U */ -+ { 1, 2, 1, 0, 8 }, /* V */ -+ }, -+ .flags = 0, -+ }, -+ [AV_PIX_FMT_SAND64_10] = { -+ .name = "sand64_10", -+ .nb_components = 3, -+ .log2_chroma_w = 1, -+ .log2_chroma_h = 1, -+ .comp = { -+ { 0, 2, 0, 0, 10 }, /* Y */ -+ { 1, 4, 0, 0, 10 }, /* U */ -+ { 1, 4, 2, 0, 10 }, /* V */ -+ }, -+ .flags = 0, -+ }, -+ [AV_PIX_FMT_SAND64_16] = { -+ .name = "sand64_16", -+ .nb_components = 3, -+ .log2_chroma_w = 1, -+ .log2_chroma_h = 1, -+ .comp = { -+ { 0, 2, 0, 0, 16 }, /* Y */ -+ { 1, 4, 0, 0, 16 }, /* U */ -+ { 1, 4, 2, 0, 16 }, /* V */ -+ }, -+ .flags = 0, -+ }, -+ [AV_PIX_FMT_RPI4_8] = { -+ .name = "rpi4_8", -+ .flags = AV_PIX_FMT_FLAG_HWACCEL, -+ }, -+ [AV_PIX_FMT_RPI4_10] = { -+ .name = "rpi4_10", -+ .flags = AV_PIX_FMT_FLAG_HWACCEL, -+ }, - }; - - static const char * const color_range_names[] = { -diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h -index 37c2c79e0140..22f70007c3df 100644 ---- a/libavutil/pixfmt.h -+++ b/libavutil/pixfmt.h -@@ -377,6 +377,12 @@ enum AVPixelFormat { - - AV_PIX_FMT_Y210BE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian - AV_PIX_FMT_Y210LE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian -+// RPI - not on ifdef so can be got at by calling progs -+ AV_PIX_FMT_SAND128, ///< 4:2:0 8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding -+ AV_PIX_FMT_SAND64_10, ///< 4:2:0 10-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding -+ AV_PIX_FMT_SAND64_16, ///< 4:2:0 16-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding -+ AV_PIX_FMT_RPI4_8, -+ AV_PIX_FMT_RPI4_10, - - AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined - AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined -diff --git a/libavutil/rpi_sand_fn_pw.h b/libavutil/rpi_sand_fn_pw.h -new file mode 100644 -index 000000000000..0324f6826dde ---- /dev/null -+++ b/libavutil/rpi_sand_fn_pw.h -@@ -0,0 +1,227 @@ -+/* -+Copyright (c) 2018 Raspberry Pi (Trading) Ltd. -+All rights reserved. -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ * Neither the name of the copyright holder nor the -+ names of its contributors may be used to endorse or promote products -+ derived from this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY -+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+Authors: John Cox -+*/ -+ -+// * Included twice from rpi_sand_fn with different PW -+ -+#define STRCAT(x,y) x##y -+ -+#if PW == 1 -+#define pixel uint8_t -+#define FUNC(f) STRCAT(f, 8) -+#elif PW == 2 -+#define pixel uint16_t -+#define FUNC(f) STRCAT(f, 16) -+#else -+#error Unexpected PW -+#endif -+ -+// Fetches a single patch - offscreen fixup not done here -+// w <= stride1 -+// unclipped -+void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h) -+{ -+ const unsigned int x = _x; -+ const unsigned int w = _w; -+ const unsigned int mask = stride1 - 1; -+ -+#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64) -+ if (_x == 0) { -+ ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride, -+ src, stride1, stride2, _x, y, _w, h); -+ return; -+ } -+#endif -+ -+ if ((x & ~mask) == ((x + w) & ~mask)) { -+ // All in one sand stripe -+ const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; -+ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) { -+ memcpy(dst, p, w); -+ } -+ } -+ else -+ { -+ // Two+ stripe -+ const unsigned int sstride = stride1 * stride2; -+ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; -+ const uint8_t * p2 = p1 + sstride - (x & mask); -+ const unsigned int w1 = stride1 - (x & mask); -+ const unsigned int w3 = (x + w) & mask; -+ const unsigned int w2 = w - (w1 + w3); -+ -+ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) { -+ unsigned int j; -+ const uint8_t * p = p2; -+ uint8_t * d = dst; -+ memcpy(d, p1, w1); -+ d += w1; -+ for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) { -+ memcpy(d, p, stride1); -+ } -+ memcpy(d, p, w3); -+ } -+ } -+} -+ -+// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V) -+ -+void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u, -+ uint8_t * dst_v, const unsigned int dst_stride_v, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h) -+{ -+ const unsigned int x = _x * 2; -+ const unsigned int w = _w * 2; -+ const unsigned int mask = stride1 - 1; -+ -+#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64) -+ if (_x == 0) { -+ ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v, -+ src, stride1, stride2, _x, y, _w, h); -+ return; -+ } -+#endif -+ -+ if ((x & ~mask) == ((x + w) & ~mask)) { -+ // All in one sand stripe -+ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; -+ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) { -+ pixel * du = (pixel *)dst_u; -+ pixel * dv = (pixel *)dst_v; -+ const pixel * p = (const pixel *)p1; -+ for (unsigned int k = 0; k < w; k += 2 * PW) { -+ *du++ = *p++; -+ *dv++ = *p++; -+ } -+ } -+ } -+ else -+ { -+ // Two+ stripe -+ const unsigned int sstride = stride1 * stride2; -+ const unsigned int sstride_p = (sstride - stride1) / PW; -+ -+ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; -+ const uint8_t * p2 = p1 + sstride - (x & mask); -+ const unsigned int w1 = stride1 - (x & mask); -+ const unsigned int w3 = (x + w) & mask; -+ const unsigned int w2 = w - (w1 + w3); -+ -+ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) { -+ unsigned int j; -+ const pixel * p = (const pixel *)p1; -+ pixel * du = (pixel *)dst_u; -+ pixel * dv = (pixel *)dst_v; -+ for (unsigned int k = 0; k < w1; k += 2 * PW) { -+ *du++ = *p++; -+ *dv++ = *p++; -+ } -+ for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) { -+ for (unsigned int k = 0; k < stride1; k += 2 * PW) { -+ *du++ = *p++; -+ *dv++ = *p++; -+ } -+ } -+ for (unsigned int k = 0; k < w3; k += 2 * PW) { -+ *du++ = *p++; -+ *dv++ = *p++; -+ } -+ } -+ } -+} -+ -+void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c, -+ unsigned int stride1, unsigned int stride2, -+ const uint8_t * src_u, const unsigned int src_stride_u, -+ const uint8_t * src_v, const unsigned int src_stride_v, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h) -+{ -+ const unsigned int x = _x * 2; -+ const unsigned int w = _w * 2; -+ const unsigned int mask = stride1 - 1; -+ if ((x & ~mask) == ((x + w) & ~mask)) { -+ // All in one sand stripe -+ uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; -+ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) { -+ const pixel * su = (const pixel *)src_u; -+ const pixel * sv = (const pixel *)src_v; -+ pixel * p = (pixel *)p1; -+ for (unsigned int k = 0; k < w; k += 2 * PW) { -+ *p++ = *su++; -+ *p++ = *sv++; -+ } -+ } -+ } -+ else -+ { -+ // Two+ stripe -+ const unsigned int sstride = stride1 * stride2; -+ const unsigned int sstride_p = (sstride - stride1) / PW; -+ -+ const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; -+ const uint8_t * p2 = p1 + sstride - (x & mask); -+ const unsigned int w1 = stride1 - (x & mask); -+ const unsigned int w3 = (x + w) & mask; -+ const unsigned int w2 = w - (w1 + w3); -+ -+ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) { -+ unsigned int j; -+ const pixel * su = (const pixel *)src_u; -+ const pixel * sv = (const pixel *)src_v; -+ pixel * p = (pixel *)p1; -+ for (unsigned int k = 0; k < w1; k += 2 * PW) { -+ *p++ = *su++; -+ *p++ = *sv++; -+ } -+ for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) { -+ for (unsigned int k = 0; k < stride1; k += 2 * PW) { -+ *p++ = *su++; -+ *p++ = *sv++; -+ } -+ } -+ for (unsigned int k = 0; k < w3; k += 2 * PW) { -+ *p++ = *su++; -+ *p++ = *sv++; -+ } -+ } -+ } -+} -+ -+ -+#undef pixel -+#undef STRCAT -+#undef FUNC -+ -diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c -new file mode 100644 -index 000000000000..ed0261b02f07 ---- /dev/null -+++ b/libavutil/rpi_sand_fns.c -@@ -0,0 +1,353 @@ -+/* -+Copyright (c) 2018 Raspberry Pi (Trading) Ltd. -+All rights reserved. -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ * Neither the name of the copyright holder nor the -+ names of its contributors may be used to endorse or promote products -+ derived from this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY -+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+Authors: John Cox -+*/ -+ -+#include "config.h" -+#include -+#include -+#include "rpi_sand_fns.h" -+#include "avassert.h" -+#include "frame.h" -+ -+#if ARCH_ARM && HAVE_NEON -+#include "arm/rpi_sand_neon.h" -+#define HAVE_SAND_ASM 1 -+#else -+#define HAVE_SAND_ASM 0 -+#endif -+ -+#define PW 1 -+#include "rpi_sand_fn_pw.h" -+#undef PW -+ -+#define PW 2 -+#include "rpi_sand_fn_pw.h" -+#undef PW -+ -+#if 1 -+// Simple round -+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) -+{ -+ const unsigned int rnd = (1 << shr) >> 1; -+ const uint16_t * src = (const uint16_t *)_src; -+ -+ for (; n != 0; --n) { -+ *dst++ = (*src++ + rnd) >> shr; -+ } -+} -+#else -+// Dithered variation -+static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) -+{ -+ unsigned int rnd = (1 << shr) >> 1; -+ const unsigned int mask = ((1 << shr) - 1); -+ const uint16_t * src = (const uint16_t *)_src; -+ -+ for (; n != 0; --n) { -+ rnd = *src++ + (rnd & mask); -+ *dst++ = rnd >> shr; -+ } -+} -+#endif -+ -+// Fetches a single patch - offscreen fixup not done here -+// w <= stride1 -+// unclipped -+// _x & _w in pixels, strides in bytes -+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h) -+{ -+ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word -+ const unsigned int xskip0 = _x - (x0 >> 2) * 3; -+ const unsigned int x1 = ((_x + _w) / 3) * 4; -+ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; -+ const unsigned int mask = stride1 - 1; -+ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; -+ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words -+ -+#if HAVE_SAND_ASM -+ if (_x == 0) { -+ ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); -+ return; -+ } -+#endif -+ -+ if (x0 == x1) { -+ // ******************* -+ // Partial single word xfer -+ return; -+ } -+ -+ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) -+ { -+ unsigned int x = x0; -+ const uint32_t * p = (const uint32_t *)p0; -+ uint16_t * d = (uint16_t *)dst; -+ -+ if (xskip0 != 0) { -+ const uint32_t p3 = *p++; -+ -+ if (xskip0 == 1) -+ *d++ = (p3 >> 10) & 0x3ff; -+ *d++ = (p3 >> 20) & 0x3ff; -+ -+ if (((x += 4) & mask) == 0) -+ p += slice_inc; -+ } -+ -+ while (x != x1) { -+ const uint32_t p3 = *p++; -+ *d++ = p3 & 0x3ff; -+ *d++ = (p3 >> 10) & 0x3ff; -+ *d++ = (p3 >> 20) & 0x3ff; -+ -+ if (((x += 4) & mask) == 0) -+ p += slice_inc; -+ } -+ -+ if (xrem1 != 0) { -+ const uint32_t p3 = *p; -+ -+ *d++ = p3 & 0x3ff; -+ if (xrem1 == 2) -+ *d++ = (p3 >> 10) & 0x3ff; -+ } -+ } -+} -+ -+ -+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, -+ uint8_t * dst_v, const unsigned int dst_stride_v, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h) -+{ -+ const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word -+ const unsigned int xskip0 = _x - (x0 >> 3) * 3; -+ const unsigned int x1 = ((_x + _w) / 3) * 8; -+ const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3; -+ const unsigned int mask = stride1 - 1; -+ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; -+ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words -+ -+#if HAVE_SAND_ASM -+ if (_x == 0) { -+ ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v, -+ src, stride1, stride2, _x, y, _w, h); -+ return; -+ } -+#endif -+ -+ if (x0 == x1) { -+ // ******************* -+ // Partial single word xfer -+ return; -+ } -+ -+ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1) -+ { -+ unsigned int x = x0; -+ const uint32_t * p = (const uint32_t *)p0; -+ uint16_t * du = (uint16_t *)dst_u; -+ uint16_t * dv = (uint16_t *)dst_v; -+ -+ if (xskip0 != 0) { -+ const uint32_t p3a = *p++; -+ const uint32_t p3b = *p++; -+ -+ if (xskip0 == 1) -+ { -+ *du++ = (p3a >> 20) & 0x3ff; -+ *dv++ = (p3b >> 0) & 0x3ff; -+ } -+ *du++ = (p3b >> 10) & 0x3ff; -+ *dv++ = (p3b >> 20) & 0x3ff; -+ -+ if (((x += 8) & mask) == 0) -+ p += slice_inc; -+ } -+ -+ while (x != x1) { -+ const uint32_t p3a = *p++; -+ const uint32_t p3b = *p++; -+ -+ *du++ = p3a & 0x3ff; -+ *dv++ = (p3a >> 10) & 0x3ff; -+ *du++ = (p3a >> 20) & 0x3ff; -+ *dv++ = p3b & 0x3ff; -+ *du++ = (p3b >> 10) & 0x3ff; -+ *dv++ = (p3b >> 20) & 0x3ff; -+ -+ if (((x += 8) & mask) == 0) -+ p += slice_inc; -+ } -+ -+ if (xrem1 != 0) { -+ const uint32_t p3a = *p++; -+ const uint32_t p3b = *p++; -+ -+ *du++ = p3a & 0x3ff; -+ *dv++ = (p3a >> 10) & 0x3ff; -+ if (xrem1 == 2) -+ { -+ *du++ = (p3a >> 20) & 0x3ff; -+ *dv++ = p3b & 0x3ff; -+ } -+ } -+ } -+} -+ -+ -+// w/h in pixels -+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, -+ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, -+ unsigned int w, unsigned int h, const unsigned int shr) -+{ -+ const unsigned int n = dst_stride1 / 2; -+ unsigned int j; -+ -+ // This is true for our current layouts -+ av_assert0(dst_stride1 == src_stride1); -+ -+ // As we have the same stride1 for src & dest and src is wider than dest -+ // then if we loop on src we can always write contiguously to dest -+ // We make no effort to copy an exact width - round up to nearest src stripe -+ // as we will always have storage in dest for that -+ -+#if ARCH_ARM && HAVE_NEON -+ if (shr == 3 && src_stride1 == 128) { -+ for (j = 0; j + n < w; j += dst_stride1) { -+ uint8_t * d = dst + j * dst_stride2; -+ const uint8_t * s1 = src + j * 2 * src_stride2; -+ const uint8_t * s2 = s1 + src_stride1 * src_stride2; -+ -+ ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h); -+ } -+ } -+ else -+#endif -+ { -+ for (j = 0; j + n < w; j += dst_stride1) { -+ uint8_t * d = dst + j * dst_stride2; -+ const uint8_t * s1 = src + j * 2 * src_stride2; -+ const uint8_t * s2 = s1 + src_stride1 * src_stride2; -+ -+ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) { -+ cpy16_to_8(d, s1, n, shr); -+ cpy16_to_8(d + n, s2, n, shr); -+ } -+ } -+ } -+ -+ // Fix up a trailing dest half stripe -+ if (j < w) { -+ uint8_t * d = dst + j * dst_stride2; -+ const uint8_t * s1 = src + j * 2 * src_stride2; -+ -+ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) { -+ cpy16_to_8(d, s1, n, shr); -+ } -+ } -+} -+ -+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src) -+{ -+ const int w = av_frame_cropped_width(src); -+ const int h = av_frame_cropped_height(src); -+ const int x = src->crop_left; -+ const int y = src->crop_top; -+ -+ // We will crop as part of the conversion -+ dst->crop_top = 0; -+ dst->crop_left = 0; -+ dst->crop_bottom = 0; -+ dst->crop_right = 0; -+ -+ switch (src->format){ -+ case AV_PIX_FMT_SAND128: -+ case AV_PIX_FMT_RPI4_8: -+ switch (dst->format){ -+ case AV_PIX_FMT_YUV420P: -+ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], -+ src->data[0], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x, y, w, h); -+ av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ src->data[1], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x/2, y/2, w/2, h/2); -+ break; -+ default: -+ return -1; -+ } -+ break; -+ case AV_PIX_FMT_SAND64_10: -+ switch (dst->format){ -+ case AV_PIX_FMT_YUV420P10: -+ av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0], -+ src->data[0], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x*2, y, w*2, h); -+ av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ src->data[1], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x, y/2, w, h/2); -+ break; -+ default: -+ return -1; -+ } -+ break; -+ case AV_PIX_FMT_RPI4_10: -+ switch (dst->format){ -+ case AV_PIX_FMT_YUV420P10: -+ av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], -+ src->data[0], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x, y, w, h); -+ av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ src->data[1], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x/2, y/2, w/2, h/2); -+ break; -+ default: -+ return -1; -+ } -+ break; -+ default: -+ return -1; -+ } -+ -+ return av_frame_copy_props(dst, src); -+} -diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h -new file mode 100644 -index 000000000000..634b55e800dc ---- /dev/null -+++ b/libavutil/rpi_sand_fns.h -@@ -0,0 +1,183 @@ -+/* -+Copyright (c) 2018 Raspberry Pi (Trading) Ltd. -+All rights reserved. -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ * Neither the name of the copyright holder nor the -+ names of its contributors may be used to endorse or promote products -+ derived from this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY -+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+Authors: John Cox -+*/ -+ -+#ifndef AVUTIL_RPI_SAND_FNS -+#define AVUTIL_RPI_SAND_FNS -+ -+#include "libavutil/frame.h" -+ -+// For all these fns _x & _w are measured as coord * PW -+// For the C fns coords are in chroma pels (so luma / 2) -+// Strides are in bytes -+ -+void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+ -+void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u, -+ uint8_t * dst_v, const unsigned int dst_stride_v, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, -+ uint8_t * dst_v, const unsigned int dst_stride_v, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+ -+void av_rpi_planar_to_sand_c8(uint8_t * dst_c, -+ unsigned int stride1, unsigned int stride2, -+ const uint8_t * src_u, const unsigned int src_stride_u, -+ const uint8_t * src_v, const unsigned int src_stride_v, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+void av_rpi_planar_to_sand_c16(uint8_t * dst_c, -+ unsigned int stride1, unsigned int stride2, -+ const uint8_t * src_u, const unsigned int src_stride_u, -+ const uint8_t * src_v, const unsigned int src_stride_v, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+ -+void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, -+ uint8_t * dst_v, const unsigned int dst_stride_v, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+ -+ -+// w/h in pixels -+void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, -+ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, -+ unsigned int w, unsigned int h, const unsigned int shr); -+ -+ -+// dst must contain required pixel format & allocated data buffers -+// Cropping on the src buffer will be honoured and dst crop will be set to zero -+int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src); -+ -+ -+static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame) -+{ -+#ifdef RPI_ZC_SAND128_ONLY -+ // If we are sure we only only support 128 byte sand formats replace the -+ // var with a constant which should allow for better optimisation -+ return 128; -+#else -+ return frame->linesize[0]; -+#endif -+} -+ -+static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame) -+{ -+ return frame->linesize[3]; -+} -+ -+ -+static inline int av_rpi_is_sand_format(const int format) -+{ -+ return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10); -+} -+ -+static inline int av_rpi_is_sand_frame(const AVFrame * const frame) -+{ -+ return av_rpi_is_sand_format(frame->format); -+} -+ -+static inline int av_rpi_is_sand8_frame(const AVFrame * const frame) -+{ -+ return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8); -+} -+ -+static inline int av_rpi_is_sand16_frame(const AVFrame * const frame) -+{ -+ return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16); -+} -+ -+static inline int av_rpi_is_sand30_frame(const AVFrame * const frame) -+{ -+ return (frame->format == AV_PIX_FMT_RPI4_10); -+} -+ -+static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame) -+{ -+ return av_rpi_is_sand8_frame(frame) ? 0 : 1; -+} -+ -+// If x is measured in bytes (not pixels) then this works for sand64_16 as -+// well as sand128 - but in the general case we work that out -+ -+static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y) -+{ -+ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); -+ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); -+ const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame); -+ const unsigned int x1 = x & (stride1 - 1); -+ const unsigned int x2 = x ^ x1; -+ -+ return x1 + stride1 * y + stride2 * x2; -+} -+ -+static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c) -+{ -+ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); -+ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); -+ const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1); -+ const unsigned int x1 = x & (stride1 - 1); -+ const unsigned int x2 = x ^ x1; -+ -+ return x1 + stride1 * y_c + stride2 * x2; -+} -+ -+static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y) -+{ -+ return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y); -+} -+ -+static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y) -+{ -+ return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y); -+} -+ -+#endif -+ - -From c1b879de52690fb978f344b05cb213c34c35021f Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 11:36:47 +0100 -Subject: [PATCH 003/186] Add aarch64 asm sand conv functions - -Many thanks to eiler.mike@gmail.com (Michael Eiler) for these -optimizations ---- - libavutil/aarch64/Makefile | 2 + - libavutil/aarch64/rpi_sand_neon.S | 676 ++++++++++++++++++++++++++++++ - libavutil/aarch64/rpi_sand_neon.h | 55 +++ - libavutil/rpi_sand_fn_pw.h | 4 +- - libavutil/rpi_sand_fns.c | 3 + - 5 files changed, 738 insertions(+), 2 deletions(-) - create mode 100644 libavutil/aarch64/rpi_sand_neon.S - create mode 100644 libavutil/aarch64/rpi_sand_neon.h - -diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile -index eba01513379a..1b44beab3942 100644 ---- a/libavutil/aarch64/Makefile -+++ b/libavutil/aarch64/Makefile -@@ -4,3 +4,5 @@ OBJS += aarch64/cpu.o \ - - NEON-OBJS += aarch64/float_dsp_neon.o \ - aarch64/tx_float_neon.o \ -+ aarch64/rpi_sand_neon.o \ -+ -diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S -new file mode 100644 -index 000000000000..cdcf71ee6740 ---- /dev/null -+++ b/libavutil/aarch64/rpi_sand_neon.S -@@ -0,0 +1,676 @@ -+/* -+Copyright (c) 2021 Michael Eiler -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ * Neither the name of the copyright holder nor the -+ names of its contributors may be used to endorse or promote products -+ derived from this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY -+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+Authors: Michael Eiler -+*/ -+ -+#include "asm.S" -+ -+// void ff_rpi_sand8_lines_to_planar_y8( -+// uint8_t * dest, : x0 -+// unsigned int dst_stride, : w1 -+// const uint8_t * src, : x2 -+// unsigned int src_stride1, : w3, always 128 -+// unsigned int src_stride2, : w4 -+// unsigned int _x, : w5 -+// unsigned int y, : w6 -+// unsigned int _w, : w7 -+// unsigned int h); : [sp, #0] -+ -+function ff_rpi_sand8_lines_to_planar_y8, export=1 -+ // w15 contains the number of rows we need to process -+ ldr w15, [sp, #0] -+ -+ // w8 will contain the number of blocks per row -+ // w8 = floor(_w/stride1) -+ // stride1 is assumed to always be 128 -+ mov w8, w1 -+ lsr w8, w8, #7 -+ -+ // in case the width of the image is not a multiple of 128, there will -+ // be an incomplete block at the end of every row -+ // w9 contains the number of pixels stored within this block -+ // w9 = _w - w8 * 128 -+ lsl w9, w8, #7 -+ sub w9, w7, w9 -+ -+ // this is the value we have to add to the src pointer after reading a complete block -+ // it will move the address to the start of the next block -+ // w10 = stride2 * stride1 - stride1 -+ mov w10, w4 -+ lsl w10, w10, #7 -+ sub w10, w10, #128 -+ -+ // w11 is the row offset, meaning the start offset of the first block of every collumn -+ // this will be increased with stride1 within every iteration of the row_loop -+ eor w11, w11, w11 -+ -+ // w12 = 0, processed row count -+ eor w12, w12, w12 -+row_loop: -+ // start of the first block within the current row -+ // x13 = row offset + src -+ mov x13, x2 -+ add x13, x13, x11 -+ -+ // w14 = 0, processed block count -+ eor w14, w14, w14 -+ -+ cmp w8, #0 -+ beq no_main_y8 -+ -+block_loop: -+ // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128 -+ // fortunately these aren't callee saved ones, meaning we don't need to backup them -+ ld1 { v0.16b, v1.16b, v2.16b, v3.16b}, [x13], #64 -+ ld1 { v4.16b, v5.16b, v6.16b, v7.16b}, [x13], #64 -+ -+ // write these registers back to the destination vector and increase the dst address by 128 -+ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 -+ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x0], #64 -+ -+ // move the source register to the beginning of the next block (x13 = src + block offset) -+ add x13, x13, x10 -+ // increase the block counter -+ add w14, w14, #1 -+ -+ // continue with the block_loop if we haven't copied all full blocks yet -+ cmp w8, w14 -+ bgt block_loop -+ -+ // handle the last block at the end of each row -+ // at most 127 byte values copied from src to dst -+no_main_y8: -+ eor w5, w5, w5 // i = 0 -+incomplete_block_loop_y8: -+ cmp w5, w9 -+ bge incomplete_block_loop_end_y8 -+ -+ ldrb w6, [x13] -+ strb w6, [x0] -+ add x13, x13, #1 -+ add x0, x0, #1 -+ -+ add w5, w5, #1 -+ b incomplete_block_loop_y8 -+incomplete_block_loop_end_y8: -+ -+ -+ // increase the row offset by 128 (stride1) -+ add w11, w11, #128 -+ // increment the row counter -+ add w12, w12, #1 -+ -+ // process the next row if we haven't finished yet -+ cmp w15, w12 -+ bgt row_loop -+ -+ ret -+endfunc -+ -+ -+ -+// void ff_rpi_sand8_lines_to_planar_c8( -+// uint8_t * dst_u, : x0 -+// unsigned int dst_stride_u, : w1 == width -+// uint8_t * dst_v, : x2 -+// unsigned int dst_stride_v, : w3 == width -+// const uint8_t * src, : x4 -+// unsigned int stride1, : w5 == 128 -+// unsigned int stride2, : w6 -+// unsigned int _x, : w7 -+// unsigned int y, : [sp, #0] -+// unsigned int _w, : [sp, #8] -+// unsigned int h); : [sp, #16] -+ -+function ff_rpi_sand8_lines_to_planar_c8, export=1 -+ // w7 = width -+ ldr w7, [sp, #8] -+ -+ // w15 contains the number of rows we need to process -+ // counts down -+ ldr w15, [sp, #16] -+ -+ // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6 -+ mov w8, w7 -+ lsr w8, w8, #6 -+ -+ // number of pixels in block at the end of every row -+ // w9 = _w - (w8 * 64) -+ lsl w9, w8, #6 -+ sub w9, w7, w9 -+ -+ // Skip at the end of the line to account for stride -+ sub w12, w1, w7 -+ -+ // address delta to the beginning of the next block -+ // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128 -+ lsl w10, w6, #7 -+ sub w10, w10, #128 -+ -+ // w11 = row address start offset = 0 -+ eor w11, w11, w11 -+ -+row_loop_c8: -+ // start of the first block within the current row -+ // x13 = row offset + src -+ mov x13, x4 -+ add x13, x13, x11 -+ -+ // w14 = 0, processed block count -+ eor w14, w14, w14 -+ -+ cmp w8, #0 -+ beq no_main_c8 -+ -+block_loop_c8: -+ // load the full block -> 128 bytes, the block contains 64 interleaved U and V values -+ ld2 { v0.16b, v1.16b }, [x13], #32 -+ ld2 { v2.16b, v3.16b }, [x13], #32 -+ ld2 { v4.16b, v5.16b }, [x13], #32 -+ ld2 { v6.16b, v7.16b }, [x13], #32 -+ -+ // swap register so that we can write them out with a single instruction -+ mov v16.16b, v1.16b -+ mov v17.16b, v3.16b -+ mov v18.16b, v5.16b -+ mov v1.16b, v2.16b -+ mov v2.16b, v4.16b -+ mov v3.16b, v6.16b -+ mov v4.16b, v16.16b -+ mov v5.16b, v17.16b -+ mov v6.16b, v18.16b -+ -+ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 -+ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x2], #64 -+ -+ // increment row counter and move src to the beginning of the next block -+ add w14, w14, #1 -+ add x13, x13, x10 -+ -+ // jump to block_loop_c8 iff the block count is smaller than the number of full blocks -+ cmp w8, w14 -+ bgt block_loop_c8 -+ -+no_main_c8: -+ // handle incomplete block at the end of every row -+ eor w5, w5, w5 // point counter, this might be -+incomplete_block_loop_c8: -+ cmp w5, w9 -+ bge incomplete_block_loop_end_c8 -+ -+ ldrb w1, [x13] -+ strb w1, [x0] -+ add x13, x13, #1 -+ -+ ldrb w1, [x13] -+ strb w1, [x2] -+ add x13, x13, #1 -+ -+ add x0, x0, #1 -+ add x2, x2, #1 -+ -+ add w5, w5, #1 -+ b incomplete_block_loop_c8 -+incomplete_block_loop_end_c8: -+ -+ // increase row_offset by stride1 -+ add w11, w11, #128 -+ add x0, x0, w12, sxtw -+ add x2, x2, w12, sxtw -+ -+ // jump to row_Loop_c8 iff the row count is small than the height -+ subs w15, w15, #1 -+ bgt row_loop_c8 -+ -+ ret -+endfunc -+ -+//void ff_rpi_sand30_lines_to_planar_y16( -+// uint8_t * dest, // [x0] -+// unsigned int dst_stride, // [w1] -> assumed to be equal to _w -+// const uint8_t * src, // [x2] -+// unsigned int src_stride1, // [w3] -> 128 -+// unsigned int src_stride2, // [w4] -+// unsigned int _x, // [w5] -+// unsigned int y, // [w6] -+// unsigned int _w, // [w7] -+// unsigned int h); // [sp, #0] -+ -+function ff_rpi_sand30_lines_to_planar_y16, export=1 -+ stp x19, x20, [sp, #-48]! -+ stp x21, x22, [sp, #16] -+ stp x23, x24, [sp, #32] -+ -+ // w6 = argument h -+ ldr w6, [sp, #48] -+ -+ // slice_inc = ((stride2 - 1) * stride1) -+ mov w5, w4 -+ sub w5, w5, #1 -+ lsl w5, w5, #7 -+ -+ // total number of bytes per row = (width / 3) * 4 -+ mov w8, w7 -+ mov w9, #3 -+ udiv w8, w8, w9 -+ lsl w8, w8, #2 -+ -+ // number of full 128 byte blocks to be processed -+ mov w9, #96 -+ udiv w9, w7, w9 // = (width * 4) / (3*128) = width/96 -+ -+ // w10 = number of full integers to process (4 bytes) -+ // w11 = remaning zero to two 10bit values still to copy over -+ mov w12, #96 -+ mul w12, w9, w12 -+ sub w12, w7, w12 // width - blocks*96 = remaining points per row -+ mov w11, #3 -+ udiv w10, w12, w11 // full integers to process = w12 / 3 -+ mul w11, w10, w11 // #integers *3 -+ sub w11, w12, w11 // remaining 0-2 points = remaining points - integers*3 -+ -+ // increase w9 by one if w10+w11 is not zero, and decrease the row count by one -+ // this is to efficiently copy incomplete blocks at the end of the rows -+ // the last row is handled explicitly to avoid writing out of bounds -+ add w22, w10, w11 -+ cmp w22, #0 -+ cset w22, ne // 1 iff w10+w11 not zero, 0 otherwise -+ add w9, w9, w22 -+ sub w6, w6, #1 -+ -+ // store the number of bytes in w20 which we copy too much for every row -+ // when the width of the frame is not a multiple of 96 (128bytes storing 96 10bit values) -+ mov w20, #96*2 -+ mul w20, w20, w9 -+ sub w20, w1, w20 -+ -+ mov w23, #0 // flag to check whether the last line had already been processed -+ -+ // bitmask to clear the uppper 6bits of the result values -+ mov x19, #0x03ff03ff03ff03ff -+ dup v22.2d, x19 -+ -+ // row counter = 0 -+ eor w12, w12, w12 -+row_loop_y16: -+ cmp w12, w6 // jump to row_loop_y16_fin if we processed all rows -+ bge row_loop_y16_fin -+ -+ mov x13, x2 // row src -+ eor w14, w14, w14 // full block counter -+block_loop_y16: -+ cmp w14, w9 -+ bge block_loop_y16_fin -+ -+ // load 64 bytes -+ ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x13], #64 -+ -+ // process v0 and v1 -+ xtn v16.4h, v0.4s -+ ushr v0.4s, v0.4s, #10 -+ xtn v17.4h, v0.4s -+ ushr v0.4s, v0.4s, #10 -+ xtn v18.4h, v0.4s -+ -+ xtn2 v16.8h, v1.4s -+ and v16.16b, v16.16b, v22.16b -+ ushr v1.4s, v1.4s, #10 -+ xtn2 v17.8h, v1.4s -+ and v17.16b, v17.16b, v22.16b -+ ushr v1.4s, v1.4s, #10 -+ xtn2 v18.8h, v1.4s -+ and v18.16b, v18.16b, v22.16b -+ -+ st3 { v16.8h, v17.8h, v18.8h }, [x0], #48 -+ -+ // process v2 and v3 -+ xtn v23.4h, v2.4s -+ ushr v2.4s, v2.4s, #10 -+ xtn v24.4h, v2.4s -+ ushr v2.4s, v2.4s, #10 -+ xtn v25.4h, v2.4s -+ -+ xtn2 v23.8h, v3.4s -+ and v23.16b, v23.16b, v22.16b -+ ushr v3.4s, v3.4s, #10 -+ xtn2 v24.8h, v3.4s -+ and v24.16b, v24.16b, v22.16b -+ ushr v3.4s, v3.4s, #10 -+ xtn2 v25.8h, v3.4s -+ and v25.16b, v25.16b, v22.16b -+ -+ st3 { v23.8h, v24.8h, v25.8h }, [x0], #48 -+ -+ // load the second half of the block -> 64 bytes into registers v4-v7 -+ ld1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x13], #64 -+ -+ // process v4 and v5 -+ xtn v16.4h, v4.4s -+ ushr v4.4s, v4.4s, #10 -+ xtn v17.4h, v4.4s -+ ushr v4.4s, v4.4s, #10 -+ xtn v18.4h, v4.4s -+ -+ xtn2 v16.8h, v5.4s -+ and v16.16b, v16.16b, v22.16b -+ ushr v5.4s, v5.4s, #10 -+ xtn2 v17.8h, v5.4s -+ and v17.16b, v17.16b, v22.16b -+ ushr v5.4s, v5.4s, #10 -+ xtn2 v18.8h, v5.4s -+ and v18.16b, v18.16b, v22.16b -+ -+ st3 { v16.8h, v17.8h, v18.8h }, [x0], #48 -+ -+ // v6 and v7 -+ xtn v23.4h, v6.4s -+ ushr v6.4s, v6.4s, #10 -+ xtn v24.4h, v6.4s -+ ushr v6.4s, v6.4s, #10 -+ xtn v25.4h, v6.4s -+ -+ xtn2 v23.8h, v7.4s -+ and v23.16b, v23.16b, v22.16b -+ ushr v7.4s, v7.4s, #10 -+ xtn2 v24.8h, v7.4s -+ and v24.16b, v24.16b, v22.16b -+ ushr v7.4s, v7.4s, #10 -+ xtn2 v25.8h, v7.4s -+ and v25.16b, v25.16b, v22.16b -+ -+ st3 { v23.8h, v24.8h, v25.8h }, [x0], #48 -+ -+ add x13, x13, x5 // row src += slice_inc -+ add w14, w14, #1 -+ b block_loop_y16 -+block_loop_y16_fin: -+ -+ -+ -+ -+ add x2, x2, #128 // src += stride1 (start of the next row) -+ add x0, x0, w20, sxtw // subtract the bytes we copied too much from dst -+ add w12, w12, #1 -+ b row_loop_y16 -+row_loop_y16_fin: -+ -+ // check whether we have incomplete blocks at the end of every row -+ // in that case decrease row block count by one -+ // change height back to it's original value (meaning increase it by 1) -+ // and jump back to another iteration of row_loop_y16 -+ -+ cmp w23, #1 -+ beq row_loop_y16_fin2 // don't continue here if we already processed the last row -+ add w6, w6, #1 // increase height to the original value -+ sub w9, w9, w22 // block count - 1 or 0, depending on the remaining bytes count -+ mov w23, #1 -+ b row_loop_y16 -+row_loop_y16_fin2: -+ -+ sub x0, x0, w20, sxtw // with the last row we didn't actually move the dst ptr to far ahead, therefore readd the diference -+ -+ // now we've got to handle the last block in the last row -+ eor w12, w12, w12 // w12 = 0 = counter -+integer_loop_y16: -+ cmp w12, w10 -+ bge integer_loop_y16_fin -+ ldr w14, [x13], #4 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ add w12, w12, #1 -+ b integer_loop_y16 -+integer_loop_y16_fin: -+ -+final_values_y16: -+ // remaining point count = w11 -+ ldr w14, [x13], #4 -+ cmp w11, #0 -+ beq final_values_y16_fin -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ cmp w11, #1 -+ beq final_values_y16_fin -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+final_values_y16_fin: -+ -+ ldp x23, x24, [sp, #32] -+ ldp x21, x22, [sp, #16] -+ ldp x19, x20, [sp], #48 -+ ret -+endfunc -+ -+//void ff_rpi_sand30_lines_to_planar_c16( -+// uint8_t * dst_u, // [x0] -+// unsigned int dst_stride_u, // [w1] == _w*2 -+// uint8_t * dst_v, // [x2] -+// unsigned int dst_stride_v, // [w3] == _w*2 -+// const uint8_t * src, // [x4] -+// unsigned int stride1, // [w5] == 128 -+// unsigned int stride2, // [w6] -+// unsigned int _x, // [w7] == 0 -+// unsigned int y, // [sp, #0] == 0 -+// unsigned int _w, // [sp, #8] -> w3 -+// unsigned int h); // [sp, #16] -> w7 -+ -+.macro rpi_sand30_lines_to_planar_c16_block_half -+ ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x13], #64 -+ -+ xtn v4.4h, v0.4s -+ ushr v0.4s, v0.4s, #10 -+ xtn v5.4h, v0.4s -+ ushr v0.4s, v0.4s, #10 -+ xtn v6.4h, v0.4s -+ xtn2 v4.8h, v1.4s -+ ushr v1.4s, v1.4s, #10 -+ xtn2 v5.8h, v1.4s -+ ushr v1.4s, v1.4s, #10 -+ xtn2 v6.8h, v1.4s -+ and v4.16b, v4.16b, v16.16b -+ and v5.16b, v5.16b, v16.16b -+ and v6.16b, v6.16b, v16.16b -+ st3 { v4.8h, v5.8h, v6.8h }, [sp], #48 -+ -+ xtn v4.4h, v2.4s -+ ushr v2.4s, v2.4s, #10 -+ xtn v5.4h, v2.4s -+ ushr v2.4s, v2.4s, #10 -+ xtn v6.4h, v2.4s -+ xtn2 v4.8h, v3.4s -+ ushr v3.4s, v3.4s, #10 -+ xtn2 v5.8h, v3.4s -+ ushr v3.4s, v3.4s, #10 -+ xtn2 v6.8h, v3.4s -+ and v4.16b, v4.16b, v16.16b -+ and v5.16b, v5.16b, v16.16b -+ and v6.16b, v6.16b, v16.16b -+ st3 { v4.8h, v5.8h, v6.8h }, [sp] -+ sub sp, sp, #48 -+.endm -+ -+function ff_rpi_sand30_lines_to_planar_c16, export=1 -+ stp x19, x20, [sp, #-48]! -+ stp x21, x22, [sp, #16] -+ stp x23, x24, [sp, #32] -+ -+ ldr w3, [sp, #48+8] // w3 = width -+ ldr w7, [sp, #48+16] // w7 = height -+ -+ // reserve space on the stack for intermediate results -+ sub sp, sp, #256 -+ -+ // number of 128byte blocks per row, w8 = width / 48 -+ mov w9, #48 -+ udiv w8, w3, w9 -+ -+ // remaining pixels (rem_pix) per row, w9 = width - w8 * 48 -+ mul w9, w8, w9 -+ sub w9, w3, w9 -+ -+ // row offset, the beginning of the next row to process -+ eor w10, w10, w10 -+ -+ // offset to the beginning of the next block, w11 = stride2 * 128 - 128 -+ lsl w11, w6, #7 -+ sub w11, w11, #128 -+ -+ // decrease the height by one and in case of remaining pixels increase the block count by one -+ sub w7, w7, #1 -+ cmp w9, #0 -+ cset w19, ne // w19 == 1 iff reamining pixels != 0 -+ add w8, w8, w19 -+ -+ // bytes we have to move dst back by at the end of every row -+ mov w21, #48*2 -+ mul w21, w21, w8 -+ sub w21, w1, w21 -+ -+ mov w20, #0 // w20 = flag, last row processed -+ -+ mov x12, #0x03ff03ff03ff03ff -+ dup v16.2d, x12 -+ -+ // iterate through rows, row counter = w12 = 0 -+ eor w12, w12, w12 -+row_loop_c16: -+ cmp w12, w7 -+ bge row_loop_c16_fin -+ -+ // address of row data = src + row_offset -+ mov x13, x4 -+ add x13, x13, x10 -+ -+ eor w14, w14, w14 -+block_loop_c16: -+ cmp w14, w8 -+ bge block_loop_c16_fin -+ -+ rpi_sand30_lines_to_planar_c16_block_half -+ -+ ld2 { v0.8h, v1.8h }, [sp], #32 -+ ld2 { v2.8h, v3.8h }, [sp], #32 -+ ld2 { v4.8h, v5.8h }, [sp] -+ sub sp, sp, #64 -+ -+ st1 { v0.8h }, [x0], #16 -+ st1 { v2.8h }, [x0], #16 -+ st1 { v4.8h }, [x0], #16 -+ st1 { v1.8h }, [x2], #16 -+ st1 { v3.8h }, [x2], #16 -+ st1 { v5.8h }, [x2], #16 -+ -+ rpi_sand30_lines_to_planar_c16_block_half -+ -+ ld2 { v0.8h, v1.8h }, [sp], #32 -+ ld2 { v2.8h, v3.8h }, [sp], #32 -+ ld2 { v4.8h, v5.8h }, [sp] -+ sub sp, sp, #64 -+ -+ st1 { v0.8h }, [x0], #16 -+ st1 { v2.8h }, [x0], #16 -+ st1 { v4.8h }, [x0], #16 -+ st1 { v1.8h }, [x2], #16 -+ st1 { v3.8h }, [x2], #16 -+ st1 { v5.8h }, [x2], #16 -+ -+ add x13, x13, x11 // offset to next block -+ add w14, w14, #1 -+ b block_loop_c16 -+block_loop_c16_fin: -+ -+ add w10, w10, #128 -+ add w12, w12, #1 -+ add x0, x0, w21, sxtw // move dst pointers back by x21 -+ add x2, x2, w21, sxtw -+ b row_loop_c16 -+row_loop_c16_fin: -+ -+ cmp w20, #1 -+ beq row_loop_c16_fin2 -+ mov w20, #1 -+ sub w8, w8, w19 // decrease block count by w19 -+ add w7, w7, #1 // increase height -+ b row_loop_c16 -+ -+row_loop_c16_fin2: -+ sub x0, x0, w21, sxtw // readd x21 in case of the last row -+ sub x2, x2, w21, sxtw // so that we can write out the few remaining pixels -+ -+ // last incomplete block to be finished -+ // read operations are fine, stride2 is more than large enough even if rem_pix is 0 -+ rpi_sand30_lines_to_planar_c16_block_half -+ ld2 { v0.8h, v1.8h }, [sp], #32 -+ ld2 { v2.8h, v3.8h }, [sp], #32 -+ ld2 { v4.8h, v5.8h }, [sp], #32 -+ rpi_sand30_lines_to_planar_c16_block_half -+ ld2 { v0.8h, v1.8h }, [sp], #32 -+ ld2 { v2.8h, v3.8h }, [sp], #32 -+ ld2 { v4.8h, v5.8h }, [sp] -+ sub sp, sp, #160 -+ -+ mov x4, sp -+ eor w20, w20, w20 -+rem_pix_c16_loop: -+ cmp w20, w9 -+ bge rem_pix_c16_fin -+ -+ ldr w22, [x4], #4 -+ str w22, [x0], #2 -+ lsr w22, w22, #16 -+ str w22, [x2], #2 -+ -+ add w20, w20, #1 -+ b rem_pix_c16_loop -+rem_pix_c16_fin: -+ -+ add sp, sp, #256 -+ -+ ldp x23, x24, [sp, #32] -+ ldp x21, x22, [sp, #16] -+ ldp x19, x20, [sp], #48 -+ ret -+endfunc -+ -+ -+ -+//void ff_rpi_sand30_lines_to_planar_p010( -+// uint8_t * dest, -+// unsigned int dst_stride, -+// const uint8_t * src, -+// unsigned int src_stride1, -+// unsigned int src_stride2, -+// unsigned int _x, -+// unsigned int y, -+// unsigned int _w, -+// unsigned int h); -+ -diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h -new file mode 100644 -index 000000000000..b3aa481ea497 ---- /dev/null -+++ b/libavutil/aarch64/rpi_sand_neon.h -@@ -0,0 +1,55 @@ -+/* -+Copyright (c) 2021 Michael Eiler -+ -+Redistribution and use in source and binary forms, with or without -+modification, are permitted provided that the following conditions are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in the -+ documentation and/or other materials provided with the distribution. -+ * Neither the name of the copyright holder nor the -+ names of its contributors may be used to endorse or promote products -+ derived from this software without specific prior written permission. -+ -+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY -+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ -+Authors: Michael Eiler -+*/ -+ -+#pragma once -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, -+ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, -+ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); -+ -+void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u, -+ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); -+ -+void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride, -+ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, -+ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); -+ -+void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u, -+ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1, -+ unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); -+ -+#ifdef __cplusplus -+} -+#endif -+ -diff --git a/libavutil/rpi_sand_fn_pw.h b/libavutil/rpi_sand_fn_pw.h -index 0324f6826dde..0d5d203dc3cd 100644 ---- a/libavutil/rpi_sand_fn_pw.h -+++ b/libavutil/rpi_sand_fn_pw.h -@@ -54,7 +54,7 @@ void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride, - const unsigned int w = _w; - const unsigned int mask = stride1 - 1; - --#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64) -+#if PW == 1 && HAVE_SAND_ASM - if (_x == 0) { - ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride, - src, stride1, stride2, _x, y, _w, h); -@@ -106,7 +106,7 @@ void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_strid - const unsigned int w = _w * 2; - const unsigned int mask = stride1 - 1; - --#if PW == 1 && (HAVE_SAND_ASM || HAVE_SAND_ASM64) -+#if PW == 1 && HAVE_SAND_ASM - if (_x == 0) { - ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v, - src, stride1, stride2, _x, y, _w, h); -diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c -index ed0261b02f07..1f543e935701 100644 ---- a/libavutil/rpi_sand_fns.c -+++ b/libavutil/rpi_sand_fns.c -@@ -37,6 +37,9 @@ Authors: John Cox - #if ARCH_ARM && HAVE_NEON - #include "arm/rpi_sand_neon.h" - #define HAVE_SAND_ASM 1 -+#elif ARCH_AARCH64 && HAVE_NEON -+#include "aarch64/rpi_sand_neon.h" -+#define HAVE_SAND_ASM 1 - #else - #define HAVE_SAND_ASM 0 - #endif - -From c45ddc15e96adf8d90eb0c849d60499849213a12 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 11:56:02 +0100 -Subject: [PATCH 004/186] Add raw encoding for sand - ---- - libavcodec/raw.c | 6 +++ - libavcodec/rawenc.c | 92 ++++++++++++++++++++++++++++++++++++++++++++- - 2 files changed, 96 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/raw.c b/libavcodec/raw.c -index 1e5b48d1e06c..1e689f9ee0b8 100644 ---- a/libavcodec/raw.c -+++ b/libavcodec/raw.c -@@ -295,6 +295,12 @@ static const PixelFormatTag raw_pix_fmt_tags[] = { - { AV_PIX_FMT_RGB565LE,MKTAG( 3 , 0 , 0 , 0 ) }, /* flipped RGB565LE */ - { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */ - -+ /* RPI (Might as well define for everything) */ -+ { AV_PIX_FMT_SAND128, MKTAG('S', 'A', 'N', 'D') }, -+ { AV_PIX_FMT_RPI4_8, MKTAG('S', 'A', 'N', 'D') }, -+ { AV_PIX_FMT_SAND64_10, MKTAG('S', 'N', 'D', 'A') }, -+ { AV_PIX_FMT_RPI4_10, MKTAG('S', 'N', 'D', 'B') }, -+ - { AV_PIX_FMT_NONE, 0 }, - }; - -diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c -index 8c577006d922..594a77c42a64 100644 ---- a/libavcodec/rawenc.c -+++ b/libavcodec/rawenc.c -@@ -24,6 +24,7 @@ - * Raw Video Encoder - */ - -+#include "config.h" - #include "avcodec.h" - #include "codec_internal.h" - #include "encode.h" -@@ -33,6 +34,10 @@ - #include "libavutil/intreadwrite.h" - #include "libavutil/imgutils.h" - #include "libavutil/internal.h" -+#include "libavutil/avassert.h" -+#if CONFIG_SAND -+#include "libavutil/rpi_sand_fns.h" -+#endif - - static av_cold int raw_encode_init(AVCodecContext *avctx) - { -@@ -46,12 +51,95 @@ static av_cold int raw_encode_init(AVCodecContext *avctx) - return 0; - } - -+#if CONFIG_SAND -+static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, -+ const AVFrame *frame) -+{ -+ const int width = av_frame_cropped_width(frame); -+ const int height = av_frame_cropped_height(frame); -+ const int x0 = frame->crop_left; -+ const int y0 = frame->crop_top; -+ const int size = width * height * 3 / 2; -+ uint8_t * dst; -+ int ret; -+ -+ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) -+ return ret; -+ -+ dst = pkt->data; -+ -+ av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); -+ dst += width * height; -+ av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2, -+ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2); -+ return 0; -+} -+ -+static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, -+ const AVFrame *frame) -+{ -+ const int width = av_frame_cropped_width(frame); -+ const int height = av_frame_cropped_height(frame); -+ const int x0 = frame->crop_left; -+ const int y0 = frame->crop_top; -+ const int size = width * height * 3; -+ uint8_t * dst; -+ int ret; -+ -+ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) -+ return ret; -+ -+ dst = pkt->data; -+ -+ av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height); -+ dst += width * height * 2; -+ av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width, -+ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2); -+ return 0; -+} -+ -+static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, -+ const AVFrame *frame) -+{ -+ const int width = av_frame_cropped_width(frame); -+ const int height = av_frame_cropped_height(frame); -+ const int x0 = frame->crop_left; -+ const int y0 = frame->crop_top; -+ const int size = width * height * 3; -+ uint8_t * dst; -+ int ret; -+ -+ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) -+ return ret; -+ -+ dst = pkt->data; -+ -+ av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); -+ dst += width * height * 2; -+ av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width, -+ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2); -+ return 0; -+} -+#endif -+ -+ - static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, - const AVFrame *frame, int *got_packet) - { -- int ret = av_image_get_buffer_size(frame->format, -- frame->width, frame->height, 1); -+ int ret; - -+#if CONFIG_SAND -+ if (av_rpi_is_sand_frame(frame)) { -+ ret = av_rpi_is_sand8_frame(frame) ? raw_sand8_as_yuv420(avctx, pkt, frame) : -+ av_rpi_is_sand16_frame(frame) ? raw_sand16_as_yuv420(avctx, pkt, frame) : -+ av_rpi_is_sand30_frame(frame) ? raw_sand30_as_yuv420(avctx, pkt, frame) : -1; -+ *got_packet = (ret == 0); -+ return ret; -+ } -+#endif -+ -+ ret = av_image_get_buffer_size(frame->format, -+ frame->width, frame->height, 1); - if (ret < 0) - return ret; - - -From 3e02e6190c567a58c8153ba2627f61677b58d6fb Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 12:02:09 +0100 -Subject: [PATCH 005/186] Deal with the lack of trivial sand cropping - ---- - fftools/ffmpeg.c | 4 ++-- - fftools/ffmpeg_filter.c | 4 ++-- - libavutil/frame.c | 11 +++++++++++ - libavutil/frame.h | 10 ++++++++++ - 4 files changed, 25 insertions(+), 4 deletions(-) - -diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c -index c819d30ca523..ca5431aeb401 100644 ---- a/fftools/ffmpeg.c -+++ b/fftools/ffmpeg.c -@@ -1996,8 +1996,8 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame, int keep_ref - av_channel_layout_compare(&ifilter->ch_layout, &frame->ch_layout); - break; - case AVMEDIA_TYPE_VIDEO: -- need_reinit |= ifilter->width != frame->width || -- ifilter->height != frame->height; -+ need_reinit |= ifilter->width != av_frame_cropped_width(frame) || -+ ifilter->height != av_frame_cropped_height(frame); - break; - } - -diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c -index 686a33c2bae7..cfe3351c522f 100644 ---- a/fftools/ffmpeg_filter.c -+++ b/fftools/ffmpeg_filter.c -@@ -1283,8 +1283,8 @@ int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame) - - ifilter->format = frame->format; - -- ifilter->width = frame->width; -- ifilter->height = frame->height; -+ ifilter->width = av_frame_cropped_width(frame); -+ ifilter->height = av_frame_cropped_height(frame); - ifilter->sample_aspect_ratio = frame->sample_aspect_ratio; - - ifilter->sample_rate = frame->sample_rate; -diff --git a/libavutil/frame.c b/libavutil/frame.c -index 9545477acc95..48621e40989f 100644 ---- a/libavutil/frame.c -+++ b/libavutil/frame.c -@@ -16,6 +16,8 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -+#include "config.h" -+ - #include "channel_layout.h" - #include "avassert.h" - #include "buffer.h" -@@ -27,6 +29,9 @@ - #include "mem.h" - #include "samplefmt.h" - #include "hwcontext.h" -+#if CONFIG_SAND -+#include "rpi_sand_fns.h" -+#endif - - #if FF_API_OLD_CHANNEL_LAYOUT - #define CHECK_CHANNELS_CONSISTENCY(frame) \ -@@ -874,6 +879,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags) - (frame->crop_top + frame->crop_bottom) >= frame->height) - return AVERROR(ERANGE); - -+#if CONFIG_SAND -+ // Sand cannot be cropped - do not try -+ if (av_rpi_is_sand_format(frame->format)) -+ return 0; -+#endif -+ - desc = av_pix_fmt_desc_get(frame->format); - if (!desc) - return AVERROR_BUG; -diff --git a/libavutil/frame.h b/libavutil/frame.h -index 25802695493a..3a9d323325a9 100644 ---- a/libavutil/frame.h -+++ b/libavutil/frame.h -@@ -957,6 +957,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags); - */ - const char *av_frame_side_data_name(enum AVFrameSideDataType type); - -+ -+static inline int av_frame_cropped_width(const AVFrame * const frame) -+{ -+ return frame->width - (frame->crop_left + frame->crop_right); -+} -+static inline int av_frame_cropped_height(const AVFrame * const frame) -+{ -+ return frame->height - (frame->crop_top + frame->crop_bottom); -+} -+ - /** - * @} - */ - -From c7d8474ffa5689abf99367c43ec2d39a1957f564 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 12:31:16 +0100 -Subject: [PATCH 006/186] Add an unsand filter - ---- - configure | 1 + - libavfilter/Makefile | 1 + - libavfilter/allfilters.c | 1 + - libavfilter/buffersrc.c | 2 +- - libavfilter/vf_unsand.c | 228 +++++++++++++++++++++++++++++++++++++++ - 5 files changed, 232 insertions(+), 1 deletion(-) - create mode 100644 libavfilter/vf_unsand.c - -diff --git a/configure b/configure -index 5a5ada20711f..986f51b75b78 100755 ---- a/configure -+++ b/configure -@@ -3754,6 +3754,7 @@ tonemap_opencl_filter_deps="opencl const_nan" - transpose_opencl_filter_deps="opencl" - transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" - transpose_vulkan_filter_deps="vulkan spirv_compiler" -+unsand_filter_select="sand" - unsharp_opencl_filter_deps="opencl" - uspp_filter_deps="gpl avcodec" - vaguedenoiser_filter_deps="gpl" -diff --git a/libavfilter/Makefile b/libavfilter/Makefile -index b3d3d981dd46..c14fc995a0b5 100644 ---- a/libavfilter/Makefile -+++ b/libavfilter/Makefile -@@ -518,6 +518,7 @@ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER) += vf_transpose_vaapi.o vaapi_vpp.o - OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vf_transpose_vulkan.o vulkan.o vulkan_filter.o - OBJS-$(CONFIG_TRIM_FILTER) += trim.o - OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o -+OBJS-$(CONFIG_UNSAND_FILTER) += vf_unsand.o - OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o - OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER) += vf_unsharp_opencl.o opencl.o \ - opencl/unsharp.o -diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c -index d7db46c2af92..b990a001529b 100644 ---- a/libavfilter/allfilters.c -+++ b/libavfilter/allfilters.c -@@ -490,6 +490,7 @@ extern const AVFilter ff_vf_trim; - extern const AVFilter ff_vf_unpremultiply; - extern const AVFilter ff_vf_unsharp; - extern const AVFilter ff_vf_unsharp_opencl; -+extern const AVFilter ff_vf_unsand; - extern const AVFilter ff_vf_untile; - extern const AVFilter ff_vf_uspp; - extern const AVFilter ff_vf_v360; -diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c -index ba17450b9378..0dbe5d23355c 100644 ---- a/libavfilter/buffersrc.c -+++ b/libavfilter/buffersrc.c -@@ -201,7 +201,7 @@ FF_ENABLE_DEPRECATION_WARNINGS - - switch (ctx->outputs[0]->type) { - case AVMEDIA_TYPE_VIDEO: -- CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height, -+ CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame), - frame->format, frame->pts); - break; - case AVMEDIA_TYPE_AUDIO: -diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c -new file mode 100644 -index 000000000000..7100f2fc9b1f ---- /dev/null -+++ b/libavfilter/vf_unsand.c -@@ -0,0 +1,228 @@ -+/* -+ * Copyright (c) 2007 Bobby Bingham -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/** -+ * @file -+ * format and noformat video filters -+ */ -+ -+#include -+ -+#include "libavutil/internal.h" -+#include "libavutil/mem.h" -+#include "libavutil/pixdesc.h" -+#include "libavutil/opt.h" -+#include "libavutil/rpi_sand_fns.h" -+ -+#include "avfilter.h" -+#include "formats.h" -+#include "internal.h" -+#include "video.h" -+ -+typedef struct UnsandContext { -+ const AVClass *class; -+} UnsandContext; -+ -+static av_cold void uninit(AVFilterContext *ctx) -+{ -+// UnsandContext *s = ctx->priv; -+} -+ -+static av_cold int init(AVFilterContext *ctx) -+{ -+// UnsandContext *s = ctx->priv; -+ -+ return 0; -+} -+ -+ -+static int filter_frame(AVFilterLink *link, AVFrame *in) -+{ -+ AVFilterLink * const outlink = link->dst->outputs[0]; -+ AVFrame *out = NULL; -+ int rv = 0; -+ -+ if (outlink->format == in->format) { -+ // If nothing to do then do nothing -+ out = in; -+ } -+ else -+ { -+ if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL) -+ { -+ rv = AVERROR(ENOMEM); -+ goto fail; -+ } -+ if (av_rpi_sand_to_planar_frame(out, in) != 0) -+ { -+ rv = -1; -+ goto fail; -+ } -+ -+ av_frame_free(&in); -+ } -+ -+ return ff_filter_frame(outlink, out); -+ -+fail: -+ av_frame_free(&out); -+ av_frame_free(&in); -+ return rv; -+} -+ -+#if 0 -+static void dump_fmts(const AVFilterFormats * fmts) -+{ -+ int i; -+ if (fmts== NULL) { -+ printf("NULL\n"); -+ return; -+ } -+ for (i = 0; i < fmts->nb_formats; ++i) { -+ printf(" %d", fmts->formats[i]); -+ } -+ printf("\n"); -+} -+#endif -+ -+static int query_formats(AVFilterContext *ctx) -+{ -+// UnsandContext *s = ctx->priv; -+ int ret; -+ -+ // If we aren't connected at both ends then just do nothing -+ if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL) -+ return 0; -+ -+ // Our output formats depend on our input formats and we can't/don't -+ // want to convert between bit depths so we need to wait for the source -+ // to have an opinion before we do -+ if (ctx->inputs[0]->incfg.formats == NULL) -+ return AVERROR(EAGAIN); -+ -+ // Accept anything -+ if (ctx->inputs[0]->outcfg.formats == NULL && -+ (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0) -+ return ret; -+ -+ // Filter out sand formats -+ -+ // Generate a container if we don't already have one -+ if (ctx->outputs[0]->incfg.formats == NULL) -+ { -+ // Somewhat rubbish way of ensuring we have a good structure -+ const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE}; -+ AVFilterFormats *formats = ff_make_format_list(out_fmts); -+ -+ if (formats == NULL) -+ return AVERROR(ENOMEM); -+ if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0) -+ return ret; -+ } -+ -+ // Replace old format list with new filtered list derived from what our -+ // input says it can do -+ { -+ const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats; -+ AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats; -+ enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats); -+ int i; -+ int n = 0; -+ int seen_420p = 0; -+ int seen_420p10 = 0; -+ -+ for (i = 0; i < src_ff->nb_formats; ++i) { -+ const enum AVPixelFormat f = src_ff->formats[i]; -+ -+ switch (f){ -+ case AV_PIX_FMT_YUV420P: -+ case AV_PIX_FMT_SAND128: -+ case AV_PIX_FMT_RPI4_8: -+ if (!seen_420p) { -+ seen_420p = 1; -+ dst_fmts[n++] = AV_PIX_FMT_YUV420P; -+ } -+ break; -+ case AV_PIX_FMT_SAND64_10: -+ case AV_PIX_FMT_YUV420P10: -+ case AV_PIX_FMT_RPI4_10: -+ if (!seen_420p10) { -+ seen_420p10 = 1; -+ dst_fmts[n++] = AV_PIX_FMT_YUV420P10; -+ } -+ break; -+ default: -+ dst_fmts[n++] = f; -+ break; -+ } -+ } -+ -+ av_freep(&dst_ff->formats); -+ dst_ff->formats = dst_fmts; -+ dst_ff->nb_formats = n; -+ } -+ -+// printf("Unsand: %s calc: ", __func__); -+// dump_fmts(ctx->outputs[0]->incfg.formats); -+ -+ return 0; -+} -+ -+ -+#define OFFSET(x) offsetof(UnsandContext, x) -+static const AVOption unsand_options[] = { -+ { NULL } -+}; -+ -+ -+AVFILTER_DEFINE_CLASS(unsand); -+ -+static const AVFilterPad avfilter_vf_unsand_inputs[] = { -+ { -+ .name = "default", -+ .type = AVMEDIA_TYPE_VIDEO, -+ .filter_frame = filter_frame, -+ }, -+ { NULL } -+}; -+ -+static const AVFilterPad avfilter_vf_unsand_outputs[] = { -+ { -+ .name = "default", -+ .type = AVMEDIA_TYPE_VIDEO -+ }, -+}; -+ -+AVFilter ff_vf_unsand = { -+ .name = "unsand", -+ .description = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"), -+ -+ .init = init, -+ .uninit = uninit, -+ -+ FILTER_QUERY_FUNC(query_formats), -+ -+ .priv_size = sizeof(UnsandContext), -+ .priv_class = &unsand_class, -+ -+ FILTER_INPUTS(avfilter_vf_unsand_inputs), -+ FILTER_OUTPUTS(avfilter_vf_unsand_outputs), -+}; -+ - -From d154e34686db628b84d74d0808b080c1d1ce5c41 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 12:37:07 +0100 -Subject: [PATCH 007/186] Reduce mmal compile warnings - ---- - libavcodec/mmaldec.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c -index 3092f5851077..6f41b41ac4c0 100644 ---- a/libavcodec/mmaldec.c -+++ b/libavcodec/mmaldec.c -@@ -24,6 +24,9 @@ - * MMAL Video Decoder - */ - -+#pragma GCC diagnostic push -+// Many many redundant decls in the header files -+#pragma GCC diagnostic ignored "-Wredundant-decls" - #include - #include - #include -@@ -31,6 +34,7 @@ - #include - #include - #include -+#pragma GCC diagnostic pop - #include - - #include "avcodec.h" - -From 1a7988f63e4137ebcc345470dcde20b62e71bdec Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 17:56:16 +0100 -Subject: [PATCH 008/186] Add chroma location to hevc parse - ---- - libavcodec/hevc_parser.c | 13 +++++++++++++ - libavcodec/hevcdec.c | 13 +++++++++++++ - 2 files changed, 26 insertions(+) - -diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c -index 59f9a0ff3e57..4ae7222e8b8d 100644 ---- a/libavcodec/hevc_parser.c -+++ b/libavcodec/hevc_parser.c -@@ -97,6 +97,19 @@ static int hevc_parse_slice_header(AVCodecParserContext *s, H2645NAL *nal, - avctx->profile = ps->sps->ptl.general_ptl.profile_idc; - avctx->level = ps->sps->ptl.general_ptl.level_idc; - -+ if (ps->sps->chroma_format_idc == 1) { -+ avctx->chroma_sample_location = ps->sps->vui.common.chroma_loc_info_present_flag ? -+ ps->sps->vui.common.chroma_sample_loc_type_top_field + 1 : -+ AVCHROMA_LOC_LEFT; -+ } -+ else if (ps->sps->chroma_format_idc == 2 || -+ ps->sps->chroma_format_idc == 3) { -+ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; -+ } -+ else { -+ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; -+ } -+ - if (ps->vps->vps_timing_info_present_flag) { - num = ps->vps->vps_num_units_in_tick; - den = ps->vps->vps_time_scale; -diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c -index 0e2844f47cef..88482fd5215b 100644 ---- a/libavcodec/hevcdec.c -+++ b/libavcodec/hevcdec.c -@@ -347,6 +347,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps) - else - avctx->color_range = AVCOL_RANGE_MPEG; - -+ if (sps->chroma_format_idc == 1) { -+ avctx->chroma_sample_location = sps->vui.common.chroma_loc_info_present_flag ? -+ sps->vui.common.chroma_sample_loc_type_top_field + 1 : -+ AVCHROMA_LOC_LEFT; -+ } -+ else if (sps->chroma_format_idc == 2 || -+ sps->chroma_format_idc == 3) { -+ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; -+ } -+ else { -+ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; -+ } -+ - if (sps->vui.common.colour_description_present_flag) { - avctx->color_primaries = sps->vui.common.colour_primaries; - avctx->color_trc = sps->vui.common.transfer_characteristics; - -From 8e5f8555b5908ca720c4ffd8b3a784d956883317 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 26 Sep 2022 18:20:50 +0100 -Subject: [PATCH 009/186] hwaccel: Add .abort_frame & use in hevcdec - ---- - libavcodec/avcodec.h | 11 +++++++++++ - libavcodec/hevcdec.c | 7 ++++++- - 2 files changed, 17 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h -index 39881a1d2bcf..32bc78e2be25 100644 ---- a/libavcodec/avcodec.h -+++ b/libavcodec/avcodec.h -@@ -2221,6 +2221,17 @@ typedef struct AVHWAccel { - * that avctx->hwaccel_priv_data is invalid. - */ - int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); -+ -+ /** -+ * Called if parsing fails -+ * -+ * An error has occured, end_frame will not be called -+ * start_frame & decode_slice may or may not have been called -+ * Optional -+ * -+ * @param avctx the codec context -+ */ -+ void (*abort_frame)(AVCodecContext *avctx); - } AVHWAccel; - - /** -diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c -index 88482fd5215b..4ee564f3e028 100644 ---- a/libavcodec/hevcdec.c -+++ b/libavcodec/hevcdec.c -@@ -3378,8 +3378,13 @@ static int hevc_decode_frame(AVCodecContext *avctx, AVFrame *rframe, - - s->ref = NULL; - ret = decode_nal_units(s, avpkt->data, avpkt->size); -- if (ret < 0) -+ if (ret < 0) { -+ // Ensure that hwaccel knows this frame is over -+ if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame) -+ s->avctx->hwaccel->abort_frame(s->avctx); -+ - return ret; -+ } - - if (avctx->hwaccel) { - if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) { - -From f09419a30e1beae74ae167d8b33de4214cae6f0a Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 26 Sep 2022 18:26:17 +0100 -Subject: [PATCH 010/186] hwaccel: Add CAP_MT_SAFE for accels that can use - multi-thread - ---- - libavcodec/hwconfig.h | 1 + - libavcodec/pthread_frame.c | 7 +++++-- - 2 files changed, 6 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h -index 721424912c46..c43ad55245ac 100644 ---- a/libavcodec/hwconfig.h -+++ b/libavcodec/hwconfig.h -@@ -24,6 +24,7 @@ - - - #define HWACCEL_CAP_ASYNC_SAFE (1 << 0) -+#define HWACCEL_CAP_MT_SAFE (1 << 1) - - - typedef struct AVCodecHWConfigInternal { -diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c -index d9d5afaa82d8..2cc89a41f55f 100644 ---- a/libavcodec/pthread_frame.c -+++ b/libavcodec/pthread_frame.c -@@ -204,7 +204,8 @@ static attribute_align_arg void *frame_worker_thread(void *arg) - - /* if the previous thread uses hwaccel then we take the lock to ensure - * the threads don't run concurrently */ -- if (avctx->hwaccel) { -+ if (avctx->hwaccel && -+ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { - pthread_mutex_lock(&p->parent->hwaccel_mutex); - p->hwaccel_serializing = 1; - } -@@ -590,7 +591,9 @@ void ff_thread_finish_setup(AVCodecContext *avctx) { - - if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return; - -- if (avctx->hwaccel && !p->hwaccel_serializing) { -+ if (avctx->hwaccel && -+ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) && -+ !p->hwaccel_serializing) { - pthread_mutex_lock(&p->parent->hwaccel_mutex); - p->hwaccel_serializing = 1; - } - -From 8142c4a1774c80ce059ddff3d6e4377803e3081d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 17:59:08 +0100 -Subject: [PATCH 011/186] Weak link utils - ---- - libavcodec/weak_link.c | 102 +++++++++++++++++++++++++++++++++++++++++ - libavcodec/weak_link.h | 23 ++++++++++ - 2 files changed, 125 insertions(+) - create mode 100644 libavcodec/weak_link.c - create mode 100644 libavcodec/weak_link.h - -diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c -new file mode 100644 -index 000000000000..f234a985b9c1 ---- /dev/null -+++ b/libavcodec/weak_link.c -@@ -0,0 +1,102 @@ -+#include -+#include -+#include -+#include "weak_link.h" -+ -+struct ff_weak_link_master { -+ atomic_int ref_count; /* 0 is single ref for easier atomics */ -+ pthread_rwlock_t lock; -+ void * ptr; -+}; -+ -+static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c) -+{ -+ return (struct ff_weak_link_master *)c; -+} -+ -+struct ff_weak_link_master * ff_weak_link_new(void * p) -+{ -+ struct ff_weak_link_master * w = malloc(sizeof(*w)); -+ if (!w) -+ return NULL; -+ w->ptr = p; -+ if (pthread_rwlock_init(&w->lock, NULL)) { -+ free(w); -+ return NULL; -+ } -+ return w; -+} -+ -+static void weak_link_do_unref(struct ff_weak_link_master * const w) -+{ -+ int n = atomic_fetch_sub(&w->ref_count, 1); -+ if (n) -+ return; -+ -+ pthread_rwlock_destroy(&w->lock); -+ free(w); -+} -+ -+// Unref & break link -+void ff_weak_link_break(struct ff_weak_link_master ** ppLink) -+{ -+ struct ff_weak_link_master * const w = *ppLink; -+ if (!w) -+ return; -+ -+ *ppLink = NULL; -+ pthread_rwlock_wrlock(&w->lock); -+ w->ptr = NULL; -+ pthread_rwlock_unlock(&w->lock); -+ -+ weak_link_do_unref(w); -+} -+ -+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w) -+{ -+ if (!w) -+ return NULL; -+ atomic_fetch_add(&w->ref_count, 1); -+ return (struct ff_weak_link_client*)w; -+} -+ -+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink) -+{ -+ struct ff_weak_link_master * const w = weak_link_x(*ppLink); -+ if (!w) -+ return; -+ -+ *ppLink = NULL; -+ weak_link_do_unref(w); -+} -+ -+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink) -+{ -+ struct ff_weak_link_master * const w = weak_link_x(*ppLink); -+ -+ if (!w) -+ return NULL; -+ -+ if (pthread_rwlock_rdlock(&w->lock)) -+ goto broken; -+ -+ if (w->ptr) -+ return w->ptr; -+ -+ pthread_rwlock_unlock(&w->lock); -+ -+broken: -+ *ppLink = NULL; -+ weak_link_do_unref(w); -+ return NULL; -+} -+ -+// Ignores a NULL c (so can be on the return path of both broken & live links) -+void ff_weak_link_unlock(struct ff_weak_link_client * c) -+{ -+ struct ff_weak_link_master * const w = weak_link_x(c); -+ if (w) -+ pthread_rwlock_unlock(&w->lock); -+} -+ -+ -diff --git a/libavcodec/weak_link.h b/libavcodec/weak_link.h -new file mode 100644 -index 000000000000..415b6a27a05c ---- /dev/null -+++ b/libavcodec/weak_link.h -@@ -0,0 +1,23 @@ -+struct ff_weak_link_master; -+struct ff_weak_link_client; -+ -+struct ff_weak_link_master * ff_weak_link_new(void * p); -+void ff_weak_link_break(struct ff_weak_link_master ** ppLink); -+ -+struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w); -+void ff_weak_link_unref(struct ff_weak_link_client ** ppLink); -+ -+// Returns NULL if link broken - in this case it will also zap -+// *ppLink and unref the weak_link. -+// Returns NULL if *ppLink is NULL (so a link once broken stays broken) -+// -+// The above does mean that there is a race if this is called simultainiously -+// by two threads using the same weak_link_client (so don't do that) -+void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink); -+void ff_weak_link_unlock(struct ff_weak_link_client * c); -+ -+ -+ -+ -+ -+ - -From 2610fffb3ca25f1a531876c80bf4c4b43c934386 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 19:23:26 +0100 -Subject: [PATCH 012/186] Add v4l2_req V4L2 request H265 drm_prime decode - -Has the abiliy to switch between kernel API versions at runtime. This -could be removed later once teher is no chance of usage on an old -kernel. ---- - configure | 14 + - libavcodec/Makefile | 4 + - libavcodec/hevc-ctrls-v1.h | 229 +++++ - libavcodec/hevc-ctrls-v2.h | 257 +++++ - libavcodec/hevcdec.c | 10 + - libavcodec/hwaccels.h | 1 + - libavcodec/hwconfig.h | 2 + - libavcodec/v4l2_req_decode_q.c | 84 ++ - libavcodec/v4l2_req_decode_q.h | 25 + - libavcodec/v4l2_req_devscan.c | 449 +++++++++ - libavcodec/v4l2_req_devscan.h | 23 + - libavcodec/v4l2_req_dmabufs.c | 266 ++++++ - libavcodec/v4l2_req_dmabufs.h | 40 + - libavcodec/v4l2_req_hevc_v1.c | 3 + - libavcodec/v4l2_req_hevc_v2.c | 3 + - libavcodec/v4l2_req_hevc_vx.c | 1213 +++++++++++++++++++++++ - libavcodec/v4l2_req_media.c | 1596 +++++++++++++++++++++++++++++++ - libavcodec/v4l2_req_media.h | 151 +++ - libavcodec/v4l2_req_pollqueue.c | 361 +++++++ - libavcodec/v4l2_req_pollqueue.h | 18 + - libavcodec/v4l2_req_utils.h | 27 + - libavcodec/v4l2_request_hevc.c | 297 ++++++ - libavcodec/v4l2_request_hevc.h | 102 ++ - 23 files changed, 5175 insertions(+) - create mode 100644 libavcodec/hevc-ctrls-v1.h - create mode 100644 libavcodec/hevc-ctrls-v2.h - create mode 100644 libavcodec/v4l2_req_decode_q.c - create mode 100644 libavcodec/v4l2_req_decode_q.h - create mode 100644 libavcodec/v4l2_req_devscan.c - create mode 100644 libavcodec/v4l2_req_devscan.h - create mode 100644 libavcodec/v4l2_req_dmabufs.c - create mode 100644 libavcodec/v4l2_req_dmabufs.h - create mode 100644 libavcodec/v4l2_req_hevc_v1.c - create mode 100644 libavcodec/v4l2_req_hevc_v2.c - create mode 100644 libavcodec/v4l2_req_hevc_vx.c - create mode 100644 libavcodec/v4l2_req_media.c - create mode 100644 libavcodec/v4l2_req_media.h - create mode 100644 libavcodec/v4l2_req_pollqueue.c - create mode 100644 libavcodec/v4l2_req_pollqueue.h - create mode 100644 libavcodec/v4l2_req_utils.h - create mode 100644 libavcodec/v4l2_request_hevc.c - create mode 100644 libavcodec/v4l2_request_hevc.h - -diff --git a/configure b/configure -index 986f51b75b78..c09144673050 100755 ---- a/configure -+++ b/configure -@@ -281,6 +281,7 @@ External library support: - if openssl, gnutls or mbedtls is not used [no] - --enable-libtwolame enable MP2 encoding via libtwolame [no] - --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] -+ --enable-libudev enable libudev [no] - --enable-libv4l2 enable libv4l2/v4l-utils [no] - --enable-libvidstab enable video stabilization using vid.stab [no] - --enable-libvmaf enable vmaf filter via libvmaf [no] -@@ -351,6 +352,7 @@ External library support: --enable-omx-rpi enable OpenMAX IL code for Raspberry Pi [no] --enable-rkmpp enable Rockchip Media Process Platform code [no] --disable-v4l2-m2m disable V4L2 mem2mem code [autodetect] @@ -5268,15 +35,25 @@ index 986f51b75b78..c09144673050 100755 --disable-vaapi disable Video Acceleration API (mainly Unix/Intel) code [autodetect] --disable-vdpau disable Nvidia Video Decode and Presentation API for Unix code [autodetect] --disable-videotoolbox disable VideoToolbox code [autodetect] -@@ -1858,6 +1860,7 @@ EXTERNAL_LIBRARY_LIST=" - libtheora - libtwolame - libuavs3d +@@ -1842,7 +1848,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST=" + avfoundation + bzlib + coreimage ++ epoxy + iconv + libudev - libv4l2 - libvmaf - libvorbis -@@ -1914,6 +1917,7 @@ HWACCEL_LIBRARY_LIST=" + libxcb + libxcb_shm + libxcb_shape +@@ -1916,6 +1924,7 @@ EXTERNAL_LIBRARY_LIST=" + libcodec2 + libdav1d + libdc1394 ++ libdrm + libflite + libfontconfig + libfreetype +@@ -2025,6 +2034,7 @@ HWACCEL_LIBRARY_LIST=" mmal omx opencl @@ -5284,15 +61,38 @@ index 986f51b75b78..c09144673050 100755 " DOCUMENT_LIST=" -@@ -3002,6 +3006,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder ID3D11VideoContext" - dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32" +@@ -2042,10 +2052,14 @@ FEATURE_LIST=" + omx_rpi + runtime_cpudetect + safe_bitstream_reader ++ sand + shared + small + static + swscale_alpha ++ v4l2_req_hevc_vx ++ vout_drm ++ vout_egl + " + + # this list should be kept in linking order +@@ -2621,6 +2635,7 @@ CONFIG_EXTRA=" + rtpdec + rtpenc_chain + rv34dsp ++ sand + scene_sad + sinewin + snappy +@@ -3146,6 +3161,7 @@ dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32" ffnvcodec_deps_any="libdl LoadLibrary" + mediacodec_deps="android mediandk" nvdec_deps="ffnvcodec" +v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev" vaapi_x11_deps="xlib_x11" videotoolbox_hwaccel_deps="videotoolbox pthreads" videotoolbox_hwaccel_extralibs="-framework QuartzCore" -@@ -3045,6 +3050,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC" +@@ -3200,6 +3216,8 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicParams_HEVC" hevc_dxva2_hwaccel_select="hevc_decoder" hevc_nvdec_hwaccel_deps="nvdec" hevc_nvdec_hwaccel_select="hevc_decoder" @@ -5301,56 +101,226 @@ index 986f51b75b78..c09144673050 100755 hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC" hevc_vaapi_hwaccel_select="hevc_decoder" hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC" -@@ -6696,6 +6703,7 @@ enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame - { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || - die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } - enabled libuavs3d && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uavs3d.h uavs3d_decode -+enabled libudev && require_pkg_config libudev libudev libudev.h udev_new - enabled libv4l2 && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl - enabled libvidstab && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit - enabled libvmaf && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init -@@ -6798,6 +6806,10 @@ enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/r +@@ -3746,8 +3764,11 @@ sndio_indev_deps="sndio" + sndio_outdev_deps="sndio" + v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h" + v4l2_indev_suggest="libv4l2" ++v4l2_outdev_deps="libdrm" + v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h" + v4l2_outdev_suggest="libv4l2" ++vout_drm_outdev_deps="libdrm" ++vout_egl_outdev_deps="xlib epoxy" + vfwcap_indev_deps="vfw32 vfwcap_defines" + xcbgrab_indev_deps="libxcb" + xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes" +@@ -3854,6 +3875,7 @@ cropdetect_filter_deps="gpl" + deinterlace_qsv_filter_deps="libmfx" + deinterlace_qsv_filter_select="qsvvpp" + deinterlace_vaapi_filter_deps="vaapi" ++deinterlace_v4l2m2m_filter_deps="libdrm v4l2_m2m" + delogo_filter_deps="gpl" + denoise_vaapi_filter_deps="vaapi" + derain_filter_select="dnn" +@@ -3962,6 +3984,7 @@ transpose_opencl_filter_deps="opencl" + transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags" + transpose_vt_filter_deps="videotoolbox VTPixelRotationSessionCreate" + transpose_vulkan_filter_deps="vulkan spirv_compiler" ++unsand_filter_select="sand" + unsharp_opencl_filter_deps="opencl" + uspp_filter_deps="gpl avcodec" + vaguedenoiser_filter_deps="gpl" +@@ -3973,6 +3996,7 @@ libvmaf_cuda_filter_deps="libvmaf libvmaf_cuda ffnvcodec" + zmq_filter_deps="libzmq" + zoompan_filter_deps="swscale" + zscale_filter_deps="libzimg const_nan" ++scale_v4l2m2m_filter_deps="libdrm v4l2_m2m" + scale_vaapi_filter_deps="vaapi" + scale_vt_filter_deps="videotoolbox VTPixelTransferSessionCreate" + scale_vulkan_filter_deps="vulkan spirv_compiler" +@@ -6623,6 +6647,12 @@ if enabled xlib; then + disable xlib + fi + ++enabled libudev && ++ check_pkg_config libudev libudev libudev.h udev_new ++ ++enabled epoxy && ++ check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version ++ + check_headers direct.h + check_headers dirent.h + check_headers dxgidebug.h +@@ -7113,8 +7143,16 @@ enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/r { enabled libdrm || die "ERROR: rkmpp requires --enable-libdrm"; } } +enabled v4l2_request && { enabled libdrm || + die "ERROR: v4l2-request requires --enable-libdrm"; } && + { enabled libudev || -+ die "ERROR: v4l2-request requires --enable-libudev"; } - enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init ++ die "ERROR: v4l2-request requires libudev"; } + enabled vapoursynth && require_headers "vapoursynth/VSScript4.h vapoursynth/VapourSynth4.h" ++enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; } ++ ++enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } && ++ { enabled xlib || die "ERROR: vout_egl requires xlib"; } -@@ -6880,6 +6892,8 @@ if enabled v4l2_m2m; then + if enabled gcrypt; then + GCRYPT_CONFIG="${cross_prefix}libgcrypt-config" +@@ -7197,6 +7235,10 @@ if enabled v4l2_m2m; then check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;" fi +check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns +check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;" ++disable v4l2_req_hevc_vx ++ check_headers sys/videoio.h test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete +@@ -7709,6 +7751,9 @@ enabled threads || warn \ + "that the libraries from this build MUST NOT be used in a multi-threaded"\ + "environment." + ++# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done ++enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx ++ + case $target_os in + haiku) + disable memalign +diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h +index 733d551fa419..b3a3b00580a2 100644 +--- a/fftools/ffmpeg.h ++++ b/fftools/ffmpeg.h +@@ -715,6 +715,8 @@ extern enum VideoSyncMethod video_sync_method; + extern float frame_drop_threshold; + extern int do_benchmark; + extern int do_benchmark_all; ++extern int no_cvt_hw; ++extern int do_deinterlace; + extern int do_hex_dump; + extern int do_pkt_dump; + extern int copy_ts; +diff --git a/fftools/ffmpeg_dec.c b/fftools/ffmpeg_dec.c +index 2723a0312e92..aee41267fb9c 100644 +--- a/fftools/ffmpeg_dec.c ++++ b/fftools/ffmpeg_dec.c +@@ -392,7 +392,7 @@ static int video_frame_process(DecoderPriv *dp, AVFrame *frame, + } + #endif + +- if (frame->format == dp->hwaccel_pix_fmt) { ++ if (!no_cvt_hw && frame->format == dp->hwaccel_pix_fmt) { + int err = hwaccel_retrieve_data(dp->dec_ctx, frame); + if (err < 0) + return err; +@@ -1333,12 +1333,15 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat + break; + + if (dp->hwaccel_id == HWACCEL_GENERIC || +- dp->hwaccel_id == HWACCEL_AUTO) { ++ dp->hwaccel_id == HWACCEL_AUTO || ++ no_cvt_hw) { + for (int i = 0;; i++) { + config = avcodec_get_hw_config(s->codec, i); + if (!config) + break; +- if (!(config->methods & ++ if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL)) ++ av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p); ++ else if (!(config->methods & + AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) + continue; + if (config->pix_fmt == *p) +diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c +index 7ec328e04eaa..de873732958b 100644 +--- a/fftools/ffmpeg_filter.c ++++ b/fftools/ffmpeg_filter.c +@@ -2782,8 +2782,8 @@ static int send_frame(FilterGraph *fg, FilterGraphThread *fgt, + break; + case AVMEDIA_TYPE_VIDEO: + if (ifp->format != frame->format || +- ifp->width != frame->width || +- ifp->height != frame->height || ++ ifp->width != av_frame_cropped_width(frame) || ++ ifp->height != av_frame_cropped_height(frame) || + ifp->color_space != frame->colorspace || + ifp->color_range != frame->color_range) + need_reinit |= VIDEO_CHANGED; +@@ -2804,6 +2804,9 @@ static int send_frame(FilterGraph *fg, FilterGraphThread *fgt, + (ifp->hw_frames_ctx && ifp->hw_frames_ctx->data != frame->hw_frames_ctx->data)) + need_reinit |= HWACCEL_CHANGED; + ++ if (no_cvt_hw && fgt->graph) ++ need_reinit = 0; ++ + if (need_reinit) { + ret = ifilter_parameters_from_frame(ifilter, frame); + if (ret < 0) +diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c +index 5d4c06c28e85..f39bd29b6e4a 100644 +--- a/fftools/ffmpeg_hw.c ++++ b/fftools/ffmpeg_hw.c +@@ -73,6 +73,8 @@ static char *hw_device_default_name(enum AVHWDeviceType type) + char *name; + size_t index_pos; + int index, index_limit = 1000; ++ if (!type_name) ++ return NULL; + index_pos = strlen(type_name); + name = av_malloc(index_pos + 4); + if (!name) +diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c +index f639a1cf0aab..9670585821e8 100644 +--- a/fftools/ffmpeg_opt.c ++++ b/fftools/ffmpeg_opt.c +@@ -62,6 +62,7 @@ enum VideoSyncMethod video_sync_method = VSYNC_AUTO; + float frame_drop_threshold = 0; + int do_benchmark = 0; + int do_benchmark_all = 0; ++int no_cvt_hw = 0; + int do_hex_dump = 0; + int do_pkt_dump = 0; + int copy_ts = 0; +@@ -1574,8 +1575,11 @@ const OptionDef options[] = { + { "benchmark_all", OPT_TYPE_BOOL, OPT_EXPERT, + { &do_benchmark_all }, + "add timings for each task" }, +- { "progress", OPT_TYPE_FUNC, OPT_FUNC_ARG | OPT_EXPERT, +- { .func_arg = opt_progress }, ++ { "no_cvt_hw", OPT_TYPE_BOOL, OPT_EXPERT, ++ { &no_cvt_hw }, ++ "do not auto-convert hw frames to sw" }, ++ { "progress", OPT_TYPE_FUNC, OPT_FUNC_ARG | OPT_EXPERT, ++ { .func_arg = opt_progress }, + "write program-readable progress information", "url" }, + { "stdin", OPT_TYPE_BOOL, OPT_EXPERT, + { &stdin_interaction }, diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 389253f5d08e..2d440b56486b 100644 +index a4fcce3b4215..2ce880c3db63 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile -@@ -170,6 +170,8 @@ OBJS-$(CONFIG_VP3DSP) += vp3dsp.o +@@ -175,7 +175,10 @@ OBJS-$(CONFIG_VIDEODSP) += videodsp.o + OBJS-$(CONFIG_VP3DSP) += vp3dsp.o OBJS-$(CONFIG_VP56DSP) += vp56dsp.o OBJS-$(CONFIG_VP8DSP) += vp8dsp.o - OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o +-OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o ++OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\ ++ weak_link.o v4l2_req_dmabufs.o +OBJS-$(CONFIG_V4L2_REQUEST) += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\ + v4l2_req_devscan.o weak_link.o OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o -@@ -996,6 +998,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o - OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o +@@ -1025,6 +1028,8 @@ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o + OBJS-$(CONFIG_HEVC_D3D12VA_HWACCEL) += dxva2_hevc.o d3d12va_hevc.o OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o -+OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o\ -+ v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o ++OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o ++OBJS-$(CONFIG_V4L2_REQ_HEVC_VX) += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o - OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o + OBJS-$(CONFIG_HEVC_VULKAN_HWACCEL) += vulkan_decode.o vulkan_hevc.o diff --git a/libavcodec/hevc-ctrls-v1.h b/libavcodec/hevc-ctrls-v1.h new file mode 100644 index 000000000000..72cbba0953dc @@ -5849,13997 +819,6 @@ index 000000000000..7cbbbf055f47 +}; + +#endif -diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c -index 4ee564f3e028..e892436f9405 100644 ---- a/libavcodec/hevcdec.c -+++ b/libavcodec/hevcdec.c -@@ -416,6 +416,7 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) - #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \ - CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \ - CONFIG_HEVC_NVDEC_HWACCEL + \ -+ CONFIG_HEVC_V4L2REQUEST_HWACCEL + \ - CONFIG_HEVC_VAAPI_HWACCEL + \ - CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ - CONFIG_HEVC_VDPAU_HWACCEL) -@@ -442,6 +443,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) - #endif - #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL - *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX; -+#endif -+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL -+ *fmt++ = AV_PIX_FMT_DRM_PRIME; - #endif - break; - case AV_PIX_FMT_YUV420P10: -@@ -463,6 +467,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) - #endif - #if CONFIG_HEVC_NVDEC_HWACCEL - *fmt++ = AV_PIX_FMT_CUDA; -+#endif -+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL -+ *fmt++ = AV_PIX_FMT_DRM_PRIME; - #endif - break; - case AV_PIX_FMT_YUV444P: -@@ -3752,6 +3759,9 @@ const FFCodec ff_hevc_decoder = { - #if CONFIG_HEVC_NVDEC_HWACCEL - HWACCEL_NVDEC(hevc), - #endif -+#if CONFIG_HEVC_V4L2REQUEST_HWACCEL -+ HWACCEL_V4L2REQUEST(hevc), -+#endif - #if CONFIG_HEVC_VAAPI_HWACCEL - HWACCEL_VAAPI(hevc), - #endif -diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h -index aca55831f32f..f32d1c4ec4f8 100644 ---- a/libavcodec/hwaccels.h -+++ b/libavcodec/hwaccels.h -@@ -40,6 +40,7 @@ extern const AVHWAccel ff_hevc_d3d11va_hwaccel; - extern const AVHWAccel ff_hevc_d3d11va2_hwaccel; - extern const AVHWAccel ff_hevc_dxva2_hwaccel; - extern const AVHWAccel ff_hevc_nvdec_hwaccel; -+extern const AVHWAccel ff_hevc_v4l2request_hwaccel; - extern const AVHWAccel ff_hevc_vaapi_hwaccel; - extern const AVHWAccel ff_hevc_vdpau_hwaccel; - extern const AVHWAccel ff_hevc_videotoolbox_hwaccel; -diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h -index c43ad55245ac..b8aa38307169 100644 ---- a/libavcodec/hwconfig.h -+++ b/libavcodec/hwconfig.h -@@ -71,6 +71,8 @@ typedef struct AVCodecHWConfigInternal { - HW_CONFIG_HWACCEL(1, 1, 0, D3D11, D3D11VA, ff_ ## codec ## _d3d11va2_hwaccel) - #define HWACCEL_NVDEC(codec) \ - HW_CONFIG_HWACCEL(1, 1, 0, CUDA, CUDA, ff_ ## codec ## _nvdec_hwaccel) -+#define HWACCEL_V4L2REQUEST(codec) \ -+ HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME, DRM, ff_ ## codec ## _v4l2request_hwaccel) - #define HWACCEL_VAAPI(codec) \ - HW_CONFIG_HWACCEL(1, 1, 1, VAAPI, VAAPI, ff_ ## codec ## _vaapi_hwaccel) - #define HWACCEL_VDPAU(codec) \ -diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c -new file mode 100644 -index 000000000000..5b3fb958fac3 ---- /dev/null -+++ b/libavcodec/v4l2_req_decode_q.c -@@ -0,0 +1,84 @@ -+#include -+#include -+#include -+ -+#include "v4l2_req_decode_q.h" -+ -+int decode_q_in_q(const req_decode_ent * const d) -+{ -+ return d->in_q; -+} -+ -+void decode_q_add(req_decode_q * const q, req_decode_ent * const d) -+{ -+ pthread_mutex_lock(&q->q_lock); -+ if (!q->head) { -+ q->head = d; -+ q->tail = d; -+ d->prev = NULL; -+ } -+ else { -+ q->tail->next = d; -+ d->prev = q->tail; -+ q->tail = d; -+ } -+ d->next = NULL; -+ d->in_q = 1; -+ pthread_mutex_unlock(&q->q_lock); -+} -+ -+// Remove entry from Q - if head wake-up anything that was waiting -+void decode_q_remove(req_decode_q * const q, req_decode_ent * const d) -+{ -+ int try_signal = 0; -+ -+ if (!d->in_q) -+ return; -+ -+ pthread_mutex_lock(&q->q_lock); -+ if (d->prev) -+ d->prev->next = d->next; -+ else { -+ try_signal = 1; // Only need to signal if we were head -+ q->head = d->next; -+ } -+ -+ if (d->next) -+ d->next->prev = d->prev; -+ else -+ q->tail = d->prev; -+ -+ // Not strictly needed but makes debug easier -+ d->next = NULL; -+ d->prev = NULL; -+ d->in_q = 0; -+ pthread_mutex_unlock(&q->q_lock); -+ -+ if (try_signal) -+ pthread_cond_broadcast(&q->q_cond); -+} -+ -+void decode_q_wait(req_decode_q * const q, req_decode_ent * const d) -+{ -+ pthread_mutex_lock(&q->q_lock); -+ -+ while (q->head != d) -+ pthread_cond_wait(&q->q_cond, &q->q_lock); -+ -+ pthread_mutex_unlock(&q->q_lock); -+} -+ -+void decode_q_uninit(req_decode_q * const q) -+{ -+ pthread_mutex_destroy(&q->q_lock); -+ pthread_cond_destroy(&q->q_cond); -+} -+ -+void decode_q_init(req_decode_q * const q) -+{ -+ memset(q, 0, sizeof(*q)); -+ pthread_mutex_init(&q->q_lock, NULL); -+ pthread_cond_init(&q->q_cond, NULL); -+} -+ -+ -diff --git a/libavcodec/v4l2_req_decode_q.h b/libavcodec/v4l2_req_decode_q.h -new file mode 100644 -index 000000000000..af7bbe1de462 ---- /dev/null -+++ b/libavcodec/v4l2_req_decode_q.h -@@ -0,0 +1,25 @@ -+#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H -+#define AVCODEC_V4L2_REQ_DECODE_Q_H -+ -+typedef struct req_decode_ent { -+ struct req_decode_ent * next; -+ struct req_decode_ent * prev; -+ int in_q; -+} req_decode_ent; -+ -+typedef struct req_decode_q { -+ pthread_mutex_t q_lock; -+ pthread_cond_t q_cond; -+ req_decode_ent * head; -+ req_decode_ent * tail; -+} req_decode_q; -+ -+int decode_q_in_q(const req_decode_ent * const d); -+void decode_q_add(req_decode_q * const q, req_decode_ent * const d); -+void decode_q_remove(req_decode_q * const q, req_decode_ent * const d); -+void decode_q_wait(req_decode_q * const q, req_decode_ent * const d); -+void decode_q_uninit(req_decode_q * const q); -+void decode_q_init(req_decode_q * const q); -+ -+#endif -+ -diff --git a/libavcodec/v4l2_req_devscan.c b/libavcodec/v4l2_req_devscan.c -new file mode 100644 -index 000000000000..cfa94d55c49b ---- /dev/null -+++ b/libavcodec/v4l2_req_devscan.c -@@ -0,0 +1,449 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+ -+#include "v4l2_req_devscan.h" -+#include "v4l2_req_utils.h" -+ -+struct decdev { -+ enum v4l2_buf_type src_type; -+ uint32_t src_fmt_v4l2; -+ const char * vname; -+ const char * mname; -+}; -+ -+struct devscan { -+ struct decdev env; -+ unsigned int dev_size; -+ unsigned int dev_count; -+ struct decdev *devs; -+}; -+ -+static int video_src_pixfmt_supported(uint32_t fmt) -+{ -+ return 1; -+} -+ -+static void v4l2_setup_format(struct v4l2_format *format, unsigned int type, -+ unsigned int width, unsigned int height, -+ unsigned int pixelformat) -+{ -+ unsigned int sizeimage; -+ -+ memset(format, 0, sizeof(*format)); -+ format->type = type; -+ -+ sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(type)) { -+ format->fmt.pix_mp.width = width; -+ format->fmt.pix_mp.height = height; -+ format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage; -+ format->fmt.pix_mp.pixelformat = pixelformat; -+ } else { -+ format->fmt.pix.width = width; -+ format->fmt.pix.height = height; -+ format->fmt.pix.sizeimage = sizeimage; -+ format->fmt.pix.pixelformat = pixelformat; -+ } -+} -+ -+static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat, -+ unsigned int width, unsigned int height) -+{ -+ struct v4l2_format format; -+ -+ v4l2_setup_format(&format, type, width, height, pixelformat); -+ -+ return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0; -+} -+ -+static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities) -+{ -+ struct v4l2_capability capability = { 0 }; -+ int rc; -+ -+ rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability); -+ if (rc < 0) -+ return -errno; -+ -+ if (capabilities != NULL) { -+ if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0) -+ *capabilities = capability.device_caps; -+ else -+ *capabilities = capability.capabilities; -+ } -+ -+ return 0; -+} -+ -+static int devscan_add(struct devscan *const scan, -+ enum v4l2_buf_type src_type, -+ uint32_t src_fmt_v4l2, -+ const char * vname, -+ const char * mname) -+{ -+ struct decdev *d; -+ -+ if (scan->dev_size <= scan->dev_count) { -+ unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2; -+ d = realloc(scan->devs, n * sizeof(*d)); -+ if (!d) -+ return -ENOMEM; -+ scan->devs = d; -+ scan->dev_size = n; -+ } -+ -+ d = scan->devs + scan->dev_count; -+ d->src_type = src_type; -+ d->src_fmt_v4l2 = src_fmt_v4l2; -+ d->vname = strdup(vname); -+ if (!d->vname) -+ return -ENOMEM; -+ d->mname = strdup(mname); -+ if (!d->mname) { -+ free((char *)d->vname); -+ return -ENOMEM; -+ } -+ ++scan->dev_count; -+ return 0; -+} -+ -+void devscan_delete(struct devscan **const pScan) -+{ -+ unsigned int i; -+ struct devscan * const scan = *pScan; -+ -+ if (!scan) -+ return; -+ *pScan = NULL; -+ -+ for (i = 0; i < scan->dev_count; ++i) { -+ free((char*)scan->devs[i].mname); -+ free((char*)scan->devs[i].vname); -+ } -+ free(scan->devs); -+ free(scan); -+} -+ -+#define REQ_BUF_CAPS (\ -+ V4L2_BUF_CAP_SUPPORTS_DMABUF |\ -+ V4L2_BUF_CAP_SUPPORTS_REQUESTS |\ -+ V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF) -+ -+static void probe_formats(void * const dc, -+ struct devscan *const scan, -+ const int fd, -+ const unsigned int type_v4l2, -+ const char *const mpath, -+ const char *const vpath) -+{ -+ unsigned int i; -+ for (i = 0;; ++i) { -+ struct v4l2_fmtdesc fmtdesc = { -+ .index = i, -+ .type = type_v4l2 -+ }; -+ struct v4l2_requestbuffers rbufs = { -+ .count = 0, -+ .type = type_v4l2, -+ .memory = V4L2_MEMORY_MMAP -+ }; -+ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { -+ if (errno == EINTR) -+ continue; -+ if (errno != EINVAL) -+ request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2); -+ return; -+ } -+ if (!video_src_pixfmt_supported(fmtdesc.pixelformat)) -+ continue; -+ -+ if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) { -+ request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat); -+ continue; -+ } -+ -+ while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) { -+ if (errno != EINTR) { -+ request_debug(dc, "%s: Reqbufs failed\n", vpath); -+ continue; -+ } -+ } -+ -+ if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) { -+ request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities); -+ continue; -+ } -+ -+ request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n", -+ mpath, vpath, fmtdesc.pixelformat, type_v4l2); -+ devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath); -+ } -+} -+ -+ -+static int probe_video_device(void * const dc, -+ struct udev_device *const device, -+ struct devscan *const scan, -+ const char *const mpath) -+{ -+ int ret; -+ unsigned int capabilities = 0; -+ int video_fd = -1; -+ -+ const char *path = udev_device_get_devnode(device); -+ if (!path) { -+ request_err(dc, "%s: get video device devnode failed\n", __func__); -+ ret = -EINVAL; -+ goto fail; -+ } -+ -+ video_fd = open(path, O_RDWR, 0); -+ if (video_fd == -1) { -+ ret = -errno; -+ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno); -+ goto fail; -+ } -+ -+ ret = v4l2_query_capabilities(video_fd, &capabilities); -+ if (ret < 0) { -+ request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret); -+ goto fail; -+ } -+ -+ request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities); -+ -+ if (!(capabilities & V4L2_CAP_STREAMING)) { -+ request_debug(dc, "%s: missing required streaming capability\n", __func__); -+ ret = -EINVAL; -+ goto fail; -+ } -+ -+ if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) { -+ request_debug(dc, "%s: missing required mem2mem capability\n", __func__); -+ ret = -EINVAL; -+ goto fail; -+ } -+ -+ /* Should check capture formats too... */ -+ if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0) -+ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path); -+ if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) -+ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path); -+ -+ close(video_fd); -+ return 0; -+ -+fail: -+ if (video_fd >= 0) -+ close(video_fd); -+ return ret; -+} -+ -+static int probe_media_device(void * const dc, -+ struct udev_device *const device, -+ struct devscan *const scan) -+{ -+ int ret; -+ int rv; -+ struct media_device_info device_info = { 0 }; -+ struct media_v2_topology topology = { 0 }; -+ struct media_v2_interface *interfaces = NULL; -+ struct udev *udev = udev_device_get_udev(device); -+ struct udev_device *video_device; -+ dev_t devnum; -+ int media_fd = -1; -+ -+ const char *path = udev_device_get_devnode(device); -+ if (!path) { -+ request_err(dc, "%s: get media device devnode failed\n", __func__); -+ ret = -EINVAL; -+ goto fail; -+ } -+ -+ media_fd = open(path, O_RDWR, 0); -+ if (media_fd < 0) { -+ ret = -errno; -+ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret); -+ goto fail; -+ } -+ -+ rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info); -+ if (rv < 0) { -+ ret = -errno; -+ request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret); -+ goto fail; -+ } -+ -+ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); -+ if (rv < 0) { -+ ret = -errno; -+ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); -+ goto fail; -+ } -+ -+ if (topology.num_interfaces <= 0) { -+ request_err(dc, "%s: media device has no interfaces\n", __func__); -+ ret = -EINVAL; -+ goto fail; -+ } -+ -+ interfaces = calloc(topology.num_interfaces, sizeof(*interfaces)); -+ if (!interfaces) { -+ request_err(dc, "%s: allocating media interface struct failed\n", __func__); -+ ret = -ENOMEM; -+ goto fail; -+ } -+ -+ topology.ptr_interfaces = (__u64)(uintptr_t)interfaces; -+ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); -+ if (rv < 0) { -+ ret = -errno; -+ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); -+ goto fail; -+ } -+ -+ for (int i = 0; i < topology.num_interfaces; i++) { -+ if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO) -+ continue; -+ -+ devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor); -+ video_device = udev_device_new_from_devnum(udev, 'c', devnum); -+ if (!video_device) { -+ ret = -errno; -+ request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device); -+ continue; -+ } -+ -+ ret = probe_video_device(dc, video_device, scan, path); -+ udev_device_unref(video_device); -+ -+ if (ret != 0) -+ goto fail; -+ } -+ -+fail: -+ free(interfaces); -+ if (media_fd != -1) -+ close(media_fd); -+ return ret; -+} -+ -+const char *decdev_media_path(const struct decdev *const dev) -+{ -+ return !dev ? NULL : dev->mname; -+} -+ -+const char *decdev_video_path(const struct decdev *const dev) -+{ -+ return !dev ? NULL : dev->vname; -+} -+ -+enum v4l2_buf_type decdev_src_type(const struct decdev *const dev) -+{ -+ return !dev ? 0 : dev->src_type; -+} -+ -+uint32_t decdev_src_pixelformat(const struct decdev *const dev) -+{ -+ return !dev ? 0 : dev->src_fmt_v4l2; -+} -+ -+ -+const struct decdev *devscan_find(struct devscan *const scan, -+ const uint32_t src_fmt_v4l2) -+{ -+ unsigned int i; -+ -+ if (scan->env.mname && scan->env.vname) -+ return &scan->env; -+ -+ if (!src_fmt_v4l2) -+ return scan->dev_count ? scan->devs + 0 : NULL; -+ -+ for (i = 0; i != scan->dev_count; ++i) { -+ if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2) -+ return scan->devs + i; -+ } -+ return NULL; -+} -+ -+int devscan_build(void * const dc, struct devscan **pscan) -+{ -+ int ret; -+ struct udev *udev; -+ struct udev_enumerate *enumerate; -+ struct udev_list_entry *devices; -+ struct udev_list_entry *entry; -+ struct udev_device *device; -+ struct devscan * scan; -+ -+ *pscan = NULL; -+ -+ scan = calloc(1, sizeof(*scan)); -+ if (!scan) { -+ ret = -ENOMEM; -+ goto fail; -+ } -+ -+ scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH"); -+ scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH"); -+ if (scan->env.mname && scan->env.vname) { -+ request_info(dc, "Media/video device env overrides found: %s,%s\n", -+ scan->env.mname, scan->env.vname); -+ *pscan = scan; -+ return 0; -+ } -+ -+ udev = udev_new(); -+ if (!udev) { -+ request_err(dc, "%s: allocating udev context failed\n", __func__); -+ ret = -ENOMEM; -+ goto fail; -+ } -+ -+ enumerate = udev_enumerate_new(udev); -+ if (!enumerate) { -+ request_err(dc, "%s: allocating udev enumerator failed\n", __func__); -+ ret = -ENOMEM; -+ goto fail; -+ } -+ -+ udev_enumerate_add_match_subsystem(enumerate, "media"); -+ udev_enumerate_scan_devices(enumerate); -+ -+ devices = udev_enumerate_get_list_entry(enumerate); -+ udev_list_entry_foreach(entry, devices) { -+ const char *path = udev_list_entry_get_name(entry); -+ if (!path) -+ continue; -+ -+ device = udev_device_new_from_syspath(udev, path); -+ if (!device) -+ continue; -+ -+ probe_media_device(dc, device, scan); -+ udev_device_unref(device); -+ } -+ -+ udev_enumerate_unref(enumerate); -+ -+ *pscan = scan; -+ return 0; -+ -+fail: -+ udev_unref(udev); -+ devscan_delete(&scan); -+ return ret; -+} -+ -diff --git a/libavcodec/v4l2_req_devscan.h b/libavcodec/v4l2_req_devscan.h -new file mode 100644 -index 000000000000..956d9234f11f ---- /dev/null -+++ b/libavcodec/v4l2_req_devscan.h -@@ -0,0 +1,23 @@ -+#ifndef _DEVSCAN_H_ -+#define _DEVSCAN_H_ -+ -+#include -+ -+struct devscan; -+struct decdev; -+enum v4l2_buf_type; -+ -+/* These return pointers to data in the devscan structure and so are vaild -+ * for the lifetime of that -+ */ -+const char *decdev_media_path(const struct decdev *const dev); -+const char *decdev_video_path(const struct decdev *const dev); -+enum v4l2_buf_type decdev_src_type(const struct decdev *const dev); -+uint32_t decdev_src_pixelformat(const struct decdev *const dev); -+ -+const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2); -+ -+int devscan_build(void * const dc, struct devscan **pscan); -+void devscan_delete(struct devscan **const pScan); -+ -+#endif -diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c -new file mode 100644 -index 000000000000..ae6c64836972 ---- /dev/null -+++ b/libavcodec/v4l2_req_dmabufs.c -@@ -0,0 +1,266 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "v4l2_req_dmabufs.h" -+#include "v4l2_req_utils.h" -+ -+#define DMABUF_NAME1 "/dev/dma_heap/linux,cma" -+#define DMABUF_NAME2 "/dev/dma_heap/reserved" -+ -+#define TRACE_ALLOC 0 -+ -+struct dmabufs_ctl { -+ int fd; -+ size_t page_size; -+}; -+ -+struct dmabuf_h { -+ int fd; -+ size_t size; -+ size_t len; -+ void * mapptr; -+}; -+ -+#if TRACE_ALLOC -+static unsigned int total_bufs = 0; -+static size_t total_size = 0; -+#endif -+ -+struct dmabuf_h * dmabuf_import(int fd, size_t size) -+{ -+ struct dmabuf_h *dh; -+ -+ fd = dup(fd); -+ if (fd < 0 || size == 0) -+ return NULL; -+ -+ dh = malloc(sizeof(*dh)); -+ if (!dh) { -+ close(fd); -+ return NULL; -+ } -+ -+ *dh = (struct dmabuf_h) { -+ .fd = fd, -+ .size = size, -+ .mapptr = MAP_FAILED -+ }; -+ -+#if TRACE_ALLOC -+ ++total_bufs; -+ total_size += dh->size; -+ request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); -+#endif -+ -+ return dh; -+} -+ -+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size) -+{ -+ struct dmabuf_h * dh; -+ struct dma_heap_allocation_data data = { -+ .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1), -+ .fd = 0, -+ .fd_flags = O_RDWR, -+ .heap_flags = 0 -+ }; -+ -+ if (old != NULL) { -+ if (old->size == data.len) { -+ return old; -+ } -+ dmabuf_free(old); -+ } -+ -+ if (size == 0 || -+ (dh = malloc(sizeof(*dh))) == NULL) -+ return NULL; -+ -+ while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) { -+ int err = errno; -+ request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n", -+ (uint64_t)data.len, -+ dbsc->fd, -+ err, -+ strerror(err)); -+ if (err == EINTR) -+ continue; -+ goto fail; -+ } -+ -+ *dh = (struct dmabuf_h){ -+ .fd = data.fd, -+ .size = (size_t)data.len, -+ .mapptr = MAP_FAILED -+ }; -+ -+#if TRACE_ALLOC -+ ++total_bufs; -+ total_size += dh->size; -+ request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); -+#endif -+ -+ return dh; -+ -+fail: -+ free(dh); -+ return NULL; -+} -+ -+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags) -+{ -+ struct dma_buf_sync sync = { -+ .flags = flags -+ }; -+ while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) { -+ const int err = errno; -+ if (errno == EINTR) -+ continue; -+ request_log("%s: ioctl failed: flags=%#x\n", __func__, flags); -+ return -err; -+ } -+ return 0; -+} -+ -+int dmabuf_write_start(struct dmabuf_h * const dh) -+{ -+ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE); -+} -+ -+int dmabuf_write_end(struct dmabuf_h * const dh) -+{ -+ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE); -+} -+ -+int dmabuf_read_start(struct dmabuf_h * const dh) -+{ -+ if (!dmabuf_map(dh)) -+ return -1; -+ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ); -+} -+ -+int dmabuf_read_end(struct dmabuf_h * const dh) -+{ -+ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ); -+} -+ -+ -+void * dmabuf_map(struct dmabuf_h * const dh) -+{ -+ if (!dh) -+ return NULL; -+ if (dh->mapptr != MAP_FAILED) -+ return dh->mapptr; -+ dh->mapptr = mmap(NULL, dh->size, -+ PROT_READ | PROT_WRITE, -+ MAP_SHARED | MAP_POPULATE, -+ dh->fd, 0); -+ if (dh->mapptr == MAP_FAILED) { -+ request_log("%s: Map failed\n", __func__); -+ return NULL; -+ } -+ return dh->mapptr; -+} -+ -+int dmabuf_fd(const struct dmabuf_h * const dh) -+{ -+ if (!dh) -+ return -1; -+ return dh->fd; -+} -+ -+size_t dmabuf_size(const struct dmabuf_h * const dh) -+{ -+ if (!dh) -+ return 0; -+ return dh->size; -+} -+ -+size_t dmabuf_len(const struct dmabuf_h * const dh) -+{ -+ if (!dh) -+ return 0; -+ return dh->len; -+} -+ -+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len) -+{ -+ dh->len = len; -+} -+ -+ -+ -+void dmabuf_free(struct dmabuf_h * dh) -+{ -+ if (!dh) -+ return; -+ -+#if TRACE_ALLOC -+ --total_bufs; -+ total_size -= dh->size; -+ request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); -+#endif -+ -+ if (dh->mapptr != MAP_FAILED) -+ munmap(dh->mapptr, dh->size); -+ while (close(dh->fd) == -1 && errno == EINTR) -+ /* loop */; -+ free(dh); -+} -+ -+struct dmabufs_ctl * dmabufs_ctl_new(void) -+{ -+ struct dmabufs_ctl * dbsc = malloc(sizeof(*dbsc)); -+ -+ if (!dbsc) -+ return NULL; -+ -+ while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 && -+ errno == EINTR) -+ /* Loop */; -+ -+ if (dbsc->fd == -1) { -+ while ((dbsc->fd = open(DMABUF_NAME2, O_RDWR)) == -1 && -+ errno == EINTR) -+ /* Loop */; -+ if (dbsc->fd == -1) { -+ request_log("Unable to open either %s or %s\n", -+ DMABUF_NAME1, DMABUF_NAME2); -+ goto fail; -+ } -+ } -+ -+ dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE); -+ -+ return dbsc; -+ -+fail: -+ free(dbsc); -+ return NULL; -+} -+ -+void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc) -+{ -+ struct dmabufs_ctl * const dbsc = *pDbsc; -+ -+ if (!dbsc) -+ return; -+ *pDbsc = NULL; -+ -+ while (close(dbsc->fd) == -1 && errno == EINTR) -+ /* loop */; -+ -+ free(dbsc); -+} -+ -+ -diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h -new file mode 100644 -index 000000000000..cfb17e801d59 ---- /dev/null -+++ b/libavcodec/v4l2_req_dmabufs.h -@@ -0,0 +1,40 @@ -+#ifndef DMABUFS_H -+#define DMABUFS_H -+ -+#include -+ -+struct dmabufs_ctl; -+struct dmabuf_h; -+ -+struct dmabufs_ctl * dmabufs_ctl_new(void); -+void dmabufs_ctl_delete(struct dmabufs_ctl ** const pdbsc); -+ -+// Need not preserve old contents -+// On NULL return old buffer is freed -+struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size); -+ -+static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) { -+ return dmabuf_realloc(dbsc, NULL, size); -+} -+/* Create from existing fd - dups(fd) */ -+struct dmabuf_h * dmabuf_import(int fd, size_t size); -+void * dmabuf_map(struct dmabuf_h * const dh); -+ -+/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */ -+int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags); -+ -+int dmabuf_write_start(struct dmabuf_h * const dh); -+int dmabuf_write_end(struct dmabuf_h * const dh); -+int dmabuf_read_start(struct dmabuf_h * const dh); -+int dmabuf_read_end(struct dmabuf_h * const dh); -+ -+int dmabuf_fd(const struct dmabuf_h * const dh); -+/* Allocated size */ -+size_t dmabuf_size(const struct dmabuf_h * const dh); -+/* Bytes in use */ -+size_t dmabuf_len(const struct dmabuf_h * const dh); -+/* Set bytes in use */ -+void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len); -+void dmabuf_free(struct dmabuf_h * dh); -+ -+#endif -diff --git a/libavcodec/v4l2_req_hevc_v1.c b/libavcodec/v4l2_req_hevc_v1.c -new file mode 100644 -index 000000000000..169b532832e0 ---- /dev/null -+++ b/libavcodec/v4l2_req_hevc_v1.c -@@ -0,0 +1,3 @@ -+#define HEVC_CTRLS_VERSION 1 -+#include "v4l2_req_hevc_vx.c" -+ -diff --git a/libavcodec/v4l2_req_hevc_v2.c b/libavcodec/v4l2_req_hevc_v2.c -new file mode 100644 -index 000000000000..42af98e1569b ---- /dev/null -+++ b/libavcodec/v4l2_req_hevc_v2.c -@@ -0,0 +1,3 @@ -+#define HEVC_CTRLS_VERSION 2 -+#include "v4l2_req_hevc_vx.c" -+ -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -new file mode 100644 -index 000000000000..0ae03b10c4a8 ---- /dev/null -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -0,0 +1,1213 @@ -+// File included by v4l2_req_hevc_v* - not compiled on its own -+ -+#include "decode.h" -+#include "hevcdec.h" -+#include "hwconfig.h" -+#include "internal.h" -+#include "thread.h" -+ -+#include "v4l2_request_hevc.h" -+ -+#if HEVC_CTRLS_VERSION == 1 -+#include "hevc-ctrls-v1.h" -+ -+// Fixup renamed entries -+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT -+ -+#elif HEVC_CTRLS_VERSION == 2 -+#include "hevc-ctrls-v2.h" -+#else -+#error Unknown HEVC_CTRLS_VERSION -+#endif -+ -+#include "libavutil/hwcontext_drm.h" -+ -+#include -+#include -+ -+#include "v4l2_req_devscan.h" -+#include "v4l2_req_dmabufs.h" -+#include "v4l2_req_pollqueue.h" -+#include "v4l2_req_media.h" -+#include "v4l2_req_utils.h" -+ -+// Attached to buf[0] in frame -+// Pooled in hwcontext so generally create once - 1/frame -+typedef struct V4L2MediaReqDescriptor { -+ AVDRMFrameDescriptor drm; -+ -+ // Media -+ uint64_t timestamp; -+ struct qent_dst * qe_dst; -+ -+ // Decode only - should be NULL by the time we emit the frame -+ struct req_decode_ent decode_ent; -+ -+ struct media_request *req; -+ struct qent_src *qe_src; -+ -+#if HEVC_CTRLS_VERSION >= 2 -+ struct v4l2_ctrl_hevc_decode_params dec; -+#endif -+ -+ size_t num_slices; -+ size_t alloced_slices; -+ struct v4l2_ctrl_hevc_slice_params * slice_params; -+ struct slice_info * slices; -+ -+} V4L2MediaReqDescriptor; -+ -+struct slice_info { -+ const uint8_t * ptr; -+ size_t len; // bytes -+}; -+ -+// Handy container for accumulating controls before setting -+struct req_controls { -+ int has_scaling; -+ struct timeval tv; -+ struct v4l2_ctrl_hevc_sps sps; -+ struct v4l2_ctrl_hevc_pps pps; -+ struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix; -+}; -+ -+//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 }; -+ -+ -+// Get an FFmpeg format from the v4l2 format -+static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format) -+{ -+ switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ? -+ format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) { -+ case V4L2_PIX_FMT_YUV420: -+ return AV_PIX_FMT_YUV420P; -+ case V4L2_PIX_FMT_NV12: -+ return AV_PIX_FMT_NV12; -+#if CONFIG_SAND -+ case V4L2_PIX_FMT_NV12_COL128: -+ return AV_PIX_FMT_RPI4_8; -+ case V4L2_PIX_FMT_NV12_10_COL128: -+ return AV_PIX_FMT_RPI4_10; -+#endif -+ default: -+ break; -+ } -+ return AV_PIX_FMT_NONE; -+} -+ -+static inline uint64_t frame_capture_dpb(const AVFrame * const frame) -+{ -+ const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; -+ return rd->timestamp; -+} -+ -+static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp) -+{ -+ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; -+ rd->timestamp = dpb_stamp; -+} -+ -+static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table) -+{ -+ int32_t luma_weight_denom, chroma_weight_denom; -+ const SliceHeader *sh = &h->sh; -+ -+ if (sh->slice_type == HEVC_SLICE_I || -+ (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) || -+ (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag)) -+ return; -+ -+ table->luma_log2_weight_denom = sh->luma_log2_weight_denom; -+ -+ if (h->ps.sps->chroma_format_idc) -+ table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom; -+ -+ luma_weight_denom = (1 << sh->luma_log2_weight_denom); -+ chroma_weight_denom = (1 << sh->chroma_log2_weight_denom); -+ -+ for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) { -+ table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom; -+ table->luma_offset_l0[i] = sh->luma_offset_l0[i]; -+ table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom; -+ table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom; -+ table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0]; -+ table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1]; -+ } -+ -+ if (sh->slice_type != HEVC_SLICE_B) -+ return; -+ -+ for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) { -+ table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom; -+ table->luma_offset_l1[i] = sh->luma_offset_l1[i]; -+ table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom; -+ table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom; -+ table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0]; -+ table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1]; -+ } -+} -+ -+static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp) -+{ -+ const HEVCFrame *frame; -+ int i; -+ -+ for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { -+ frame = h->rps[ST_CURR_BEF].ref[i]; -+ if (frame && timestamp == frame_capture_dpb(frame->frame)) -+ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE; -+ } -+ -+ for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { -+ frame = h->rps[ST_CURR_AFT].ref[i]; -+ if (frame && timestamp == frame_capture_dpb(frame->frame)) -+ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER; -+ } -+ -+ for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) { -+ frame = h->rps[LT_CURR].ref[i]; -+ if (frame && timestamp == frame_capture_dpb(frame->frame)) -+ return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR; -+ } -+ -+ return 0; -+} -+ -+static unsigned int -+get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame, -+ const struct v4l2_hevc_dpb_entry * const entries, -+ const unsigned int num_entries) -+{ -+ uint64_t timestamp; -+ -+ if (!frame) -+ return 0; -+ -+ timestamp = frame_capture_dpb(frame->frame); -+ -+ for (unsigned int i = 0; i < num_entries; i++) { -+ if (entries[i].timestamp == timestamp) -+ return i; -+ } -+ -+ return 0; -+} -+ -+static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx) -+{ -+ unsigned int z = 0; -+ while (idx--) { -+ if (*b++ == 0) { -+ ++z; -+ if (z >= 2 && *b == 3) { -+ ++b; -+ z = 0; -+ } -+ } -+ else { -+ z = 0; -+ } -+ } -+ return b; -+} -+ -+static int slice_add(V4L2MediaReqDescriptor * const rd) -+{ -+ if (rd->num_slices >= rd->alloced_slices) { -+ struct v4l2_ctrl_hevc_slice_params * p2; -+ struct slice_info * s2; -+ size_t n2 = rd->num_slices == 0 ? 8 : rd->num_slices * 2; -+ -+ p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2)); -+ if (p2 == NULL) -+ return AVERROR(ENOMEM); -+ rd->slice_params = p2; -+ -+ s2 = av_realloc_array(rd->slices, n2, sizeof(*s2)); -+ if (s2 == NULL) -+ return AVERROR(ENOMEM); -+ rd->slices = s2; -+ -+ rd->alloced_slices = n2; -+ } -+ ++rd->num_slices; -+ return 0; -+} -+ -+static unsigned int -+fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries) -+{ -+ unsigned int i; -+ unsigned int n = 0; -+ const HEVCFrame * const pic = h->ref; -+ -+ for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) { -+ const HEVCFrame * const frame = &h->DPB[i]; -+ if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) { -+ struct v4l2_hevc_dpb_entry * const entry = entries + n++; -+ -+ entry->timestamp = frame_capture_dpb(frame->frame); -+ entry->rps = find_frame_rps_type(h, entry->timestamp); -+ entry->field_pic = frame->frame->interlaced_frame; -+ -+ /* TODO: Interleaved: Get the POC for each field. */ -+ entry->pic_order_cnt[0] = frame->poc; -+ entry->pic_order_cnt[1] = frame->poc; -+ } -+ } -+ return n; -+} -+ -+static void fill_slice_params(const HEVCContext * const h, -+#if HEVC_CTRLS_VERSION >= 2 -+ const struct v4l2_ctrl_hevc_decode_params * const dec, -+#endif -+ struct v4l2_ctrl_hevc_slice_params *slice_params, -+ uint32_t bit_size, uint32_t bit_offset) -+{ -+ const SliceHeader * const sh = &h->sh; -+#if HEVC_CTRLS_VERSION >= 2 -+ const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb; -+ const unsigned int dpb_n = dec->num_active_dpb_entries; -+#else -+ struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb; -+ unsigned int dpb_n; -+#endif -+ unsigned int i; -+ RefPicList *rpl; -+ -+ *slice_params = (struct v4l2_ctrl_hevc_slice_params) { -+ .bit_size = bit_size, -+ .data_bit_offset = bit_offset, -+ -+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ -+ .slice_segment_addr = sh->slice_segment_addr, -+ -+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ -+ .nal_unit_type = h->nal_unit_type, -+ .nuh_temporal_id_plus1 = h->temporal_id + 1, -+ -+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ -+ .slice_type = sh->slice_type, -+ .colour_plane_id = sh->colour_plane_id, -+ .slice_pic_order_cnt = h->ref->poc, -+ .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0, -+ .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0, -+ .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0, -+ .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand, -+ .slice_qp_delta = sh->slice_qp_delta, -+ .slice_cb_qp_offset = sh->slice_cb_qp_offset, -+ .slice_cr_qp_offset = sh->slice_cr_qp_offset, -+ .slice_act_y_qp_offset = 0, -+ .slice_act_cb_qp_offset = 0, -+ .slice_act_cr_qp_offset = 0, -+ .slice_beta_offset_div2 = sh->beta_offset / 2, -+ .slice_tc_offset_div2 = sh->tc_offset / 2, -+ -+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ -+ .pic_struct = h->sei.picture_timing.picture_struct, -+ -+#if HEVC_CTRLS_VERSION < 2 -+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ -+ .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, -+ .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, -+ .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs, -+#endif -+ }; -+ -+ if (sh->slice_sample_adaptive_offset_flag[0]) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA; -+ -+ if (sh->slice_sample_adaptive_offset_flag[1]) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA; -+ -+ if (sh->slice_temporal_mvp_enabled_flag) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED; -+ -+ if (sh->mvd_l1_zero_flag) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO; -+ -+ if (sh->cabac_init_flag) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT; -+ -+ if (sh->collocated_list == L0) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0; -+ -+ if (sh->disable_deblocking_filter_flag) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED; -+ -+ if (sh->slice_loop_filter_across_slices_enabled_flag) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED; -+ -+ if (sh->dependent_slice_segment_flag) -+ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT; -+ -+#if HEVC_CTRLS_VERSION < 2 -+ dpb_n = fill_dpb_entries(h, dpb); -+ slice_params->num_active_dpb_entries = dpb_n; -+#endif -+ -+ if (sh->slice_type != HEVC_SLICE_I) { -+ rpl = &h->ref->refPicList[0]; -+ for (i = 0; i < rpl->nb_refs; i++) -+ slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); -+ } -+ -+ if (sh->slice_type == HEVC_SLICE_B) { -+ rpl = &h->ref->refPicList[1]; -+ for (i = 0; i < rpl->nb_refs; i++) -+ slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); -+ } -+ -+ fill_pred_table(h, &slice_params->pred_weight_table); -+ -+ slice_params->num_entry_point_offsets = sh->num_entry_point_offsets; -+ if (slice_params->num_entry_point_offsets > 256) { -+ slice_params->num_entry_point_offsets = 256; -+ av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); -+ } -+ -+ for (i = 0; i < slice_params->num_entry_point_offsets; i++) -+ slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; -+} -+ -+#if HEVC_CTRLS_VERSION >= 2 -+static void -+fill_decode_params(const HEVCContext * const h, -+ struct v4l2_ctrl_hevc_decode_params * const dec) -+{ -+ unsigned int i; -+ -+ *dec = (struct v4l2_ctrl_hevc_decode_params){ -+ .pic_order_cnt_val = h->poc, -+ .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, -+ .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, -+ .num_poc_lt_curr = h->rps[LT_CURR].nb_refs, -+ }; -+ -+ dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb); -+ -+ // The docn does seem to ask that we fit our 32 bit signed POC into -+ // a U8 so... (To be fair 16 bits would be enough) -+ // Luckily we (Pi) don't use these fields -+ for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i) -+ dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc; -+ for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i) -+ dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc; -+ for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i) -+ dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc; -+ -+ if (IS_IRAP(h)) -+ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC; -+ if (IS_IDR(h)) -+ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC; -+ if (h->sh.no_output_of_prior_pics_flag) -+ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR; -+ -+} -+#endif -+ -+static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps) -+{ -+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ -+ *ctrl = (struct v4l2_ctrl_hevc_sps) { -+ .chroma_format_idc = sps->chroma_format_idc, -+ .pic_width_in_luma_samples = sps->width, -+ .pic_height_in_luma_samples = sps->height, -+ .bit_depth_luma_minus8 = sps->bit_depth - 8, -+ .bit_depth_chroma_minus8 = sps->bit_depth - 8, -+ .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, -+ .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1, -+ .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics, -+ .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1, -+ .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, -+ .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, -+ .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, -+ .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size, -+ .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, -+ .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, -+ .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, -+ .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, -+ .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, -+ .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, -+ .num_short_term_ref_pic_sets = sps->nb_st_rps, -+ .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, -+ .chroma_format_idc = sps->chroma_format_idc, -+ .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, -+ }; -+ -+ if (sps->separate_colour_plane_flag) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE; -+ -+ if (sps->scaling_list_enable_flag) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED; -+ -+ if (sps->amp_enabled_flag) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED; -+ -+ if (sps->sao_enabled) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET; -+ -+ if (sps->pcm_enabled_flag) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED; -+ -+ if (sps->pcm.loop_filter_disable_flag) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED; -+ -+ if (sps->long_term_ref_pics_present_flag) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT; -+ -+ if (sps->sps_temporal_mvp_enabled_flag) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED; -+ -+ if (sps->sps_strong_intra_smoothing_enable_flag) -+ ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED; -+} -+ -+static void fill_scaling_matrix(const ScalingList * const sl, -+ struct v4l2_ctrl_hevc_scaling_matrix * const sm) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < 6; i++) { -+ unsigned int j; -+ -+ for (j = 0; j < 16; j++) -+ sm->scaling_list_4x4[i][j] = sl->sl[0][i][j]; -+ for (j = 0; j < 64; j++) { -+ sm->scaling_list_8x8[i][j] = sl->sl[1][i][j]; -+ sm->scaling_list_16x16[i][j] = sl->sl[2][i][j]; -+ if (i < 2) -+ sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j]; -+ } -+ sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i]; -+ if (i < 2) -+ sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3]; -+ } -+} -+ -+static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps) -+{ -+ uint64_t flags = 0; -+ -+ if (pps->dependent_slice_segments_enabled_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED; -+ -+ if (pps->output_flag_present_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT; -+ -+ if (pps->sign_data_hiding_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED; -+ -+ if (pps->cabac_init_present_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT; -+ -+ if (pps->constrained_intra_pred_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED; -+ -+ if (pps->transform_skip_enabled_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED; -+ -+ if (pps->cu_qp_delta_enabled_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED; -+ -+ if (pps->pic_slice_level_chroma_qp_offsets_present_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT; -+ -+ if (pps->weighted_pred_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED; -+ -+ if (pps->weighted_bipred_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED; -+ -+ if (pps->transquant_bypass_enable_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED; -+ -+ if (pps->tiles_enabled_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED; -+ -+ if (pps->entropy_coding_sync_enabled_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED; -+ -+ if (pps->loop_filter_across_tiles_enabled_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED; -+ -+ if (pps->seq_loop_filter_across_slices_enabled_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED; -+ -+ if (pps->deblocking_filter_override_enabled_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED; -+ -+ if (pps->disable_dbf) -+ flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER; -+ -+ if (pps->lists_modification_present_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT; -+ -+ if (pps->slice_header_extension_present_flag) -+ flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT; -+ -+ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ -+ *ctrl = (struct v4l2_ctrl_hevc_pps) { -+ .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, -+ .init_qp_minus26 = pps->pic_init_qp_minus26, -+ .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, -+ .pps_cb_qp_offset = pps->cb_qp_offset, -+ .pps_cr_qp_offset = pps->cr_qp_offset, -+ .pps_beta_offset_div2 = pps->beta_offset / 2, -+ .pps_tc_offset_div2 = pps->tc_offset / 2, -+ .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, -+ .flags = flags -+ }; -+ -+ -+ if (pps->tiles_enabled_flag) { -+ ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1; -+ ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1; -+ -+ for (int i = 0; i < pps->num_tile_columns; i++) -+ ctrl->column_width_minus1[i] = pps->column_width[i] - 1; -+ -+ for (int i = 0; i < pps->num_tile_rows; i++) -+ ctrl->row_height_minus1[i] = pps->row_height[i] - 1; -+ } -+} -+ -+// Called before finally returning the frame to the user -+// Set corrupt flag here as this is actually the frame structure that -+// is going to the user (in MT land each thread has its own pool) -+static int frame_post_process(void *logctx, AVFrame *frame) -+{ -+ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0]; -+ -+// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); -+ frame->flags &= ~AV_FRAME_FLAG_CORRUPT; -+ if (rd->qe_dst) { -+ MediaBufsStatus stat = qent_dst_wait(rd->qe_dst); -+ if (stat != MEDIABUFS_STATUS_SUCCESS) { -+ av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__); -+ frame->flags |= AV_FRAME_FLAG_CORRUPT; -+ } -+ } -+ -+ return 0; -+} -+ -+static inline struct timeval cvt_dpb_to_tv(uint64_t t) -+{ -+ t /= 1000; -+ return (struct timeval){ -+ .tv_usec = t % 1000000, -+ .tv_sec = t / 1000000 -+ }; -+} -+ -+static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t) -+{ -+ return (uint64_t)t * 1000; -+} -+ -+static int v4l2_request_hevc_start_frame(AVCodecContext *avctx, -+ av_unused const uint8_t *buffer, -+ av_unused uint32_t size) -+{ -+ const HEVCContext *h = avctx->priv_data; -+ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0]; -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ -+// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); -+ decode_q_add(&ctx->decode_q, &rd->decode_ent); -+ -+ rd->num_slices = 0; -+ ctx->timestamp++; -+ rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp); -+ -+ { -+ FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data; -+ fdd->post_process = frame_post_process; -+ } -+ -+ // qe_dst needs to be bound to the data buffer and only returned when that is -+ if (!rd->qe_dst) -+ { -+ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); -+ return AVERROR(ENOMEM); -+ } -+ } -+ -+ ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame -+ -+ return 0; -+} -+ -+// Object fd & size will be zapped by this & need setting later -+static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format) -+{ -+ AVDRMLayerDescriptor *layer = &desc->layers[0]; -+ unsigned int width; -+ unsigned int height; -+ unsigned int bpl; -+ uint32_t pixelformat; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { -+ width = format->fmt.pix_mp.width; -+ height = format->fmt.pix_mp.height; -+ pixelformat = format->fmt.pix_mp.pixelformat; -+ bpl = format->fmt.pix_mp.plane_fmt[0].bytesperline; -+ } -+ else { -+ width = format->fmt.pix.width; -+ height = format->fmt.pix.height; -+ pixelformat = format->fmt.pix.pixelformat; -+ bpl = format->fmt.pix.bytesperline; -+ } -+ -+ switch (pixelformat) { -+ case V4L2_PIX_FMT_NV12: -+ layer->format = DRM_FORMAT_NV12; -+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ break; -+#if CONFIG_SAND -+ case V4L2_PIX_FMT_NV12_COL128: -+ layer->format = DRM_FORMAT_NV12; -+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); -+ break; -+ case V4L2_PIX_FMT_NV12_10_COL128: -+ layer->format = DRM_FORMAT_P030; -+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); -+ break; -+#endif -+#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED -+ case V4L2_PIX_FMT_SUNXI_TILED_NV12: -+ layer->format = DRM_FORMAT_NV12; -+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED; -+ break; -+#endif -+#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15) -+ case V4L2_PIX_FMT_NV15: -+ layer->format = DRM_FORMAT_NV15; -+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ break; -+#endif -+ case V4L2_PIX_FMT_NV16: -+ layer->format = DRM_FORMAT_NV16; -+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ break; -+#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20) -+ case V4L2_PIX_FMT_NV20: -+ layer->format = DRM_FORMAT_NV20; -+ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ break; -+#endif -+ default: -+ return -1; -+ } -+ -+ desc->nb_objects = 1; -+ desc->objects[0].fd = -1; -+ desc->objects[0].size = 0; -+ -+ desc->nb_layers = 1; -+ layer->nb_planes = 2; -+ -+ layer->planes[0].object_index = 0; -+ layer->planes[0].offset = 0; -+ layer->planes[0].pitch = bpl; -+#if CONFIG_SAND -+ if (pixelformat == V4L2_PIX_FMT_NV12_COL128) { -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = height * 128; -+ layer->planes[0].pitch = width; -+ layer->planes[1].pitch = width; -+ } -+ else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = height * 128; -+ layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy -+ layer->planes[1].pitch = width * 2; -+ } -+ else -+#endif -+ { -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = layer->planes[0].pitch * height; -+ layer->planes[1].pitch = layer->planes[0].pitch; -+ } -+ -+ return 0; -+} -+ -+static int -+set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, -+ struct req_controls *const controls, -+#if HEVC_CTRLS_VERSION >= 2 -+ struct v4l2_ctrl_hevc_decode_params * const dec, -+#endif -+ struct v4l2_ctrl_hevc_slice_params * const slices, -+ const unsigned int slice_no, -+ const unsigned int slice_count) -+{ -+ int rv; -+ -+ struct v4l2_ext_control control[] = { -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS, -+ .ptr = &controls->sps, -+ .size = sizeof(controls->sps), -+ }, -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS, -+ .ptr = &controls->pps, -+ .size = sizeof(controls->pps), -+ }, -+#if HEVC_CTRLS_VERSION >= 2 -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS, -+ .ptr = dec, -+ .size = sizeof(*dec), -+ }, -+#endif -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, -+ .ptr = slices + slice_no, -+ .size = sizeof(*slices) * slice_count, -+ }, -+ // Optional -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX, -+ .ptr = &controls->scaling_matrix, -+ .size = sizeof(controls->scaling_matrix), -+ }, -+ }; -+ -+ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, -+ controls->has_scaling ? -+ FF_ARRAY_ELEMS(control) : -+ FF_ARRAY_ELEMS(control) - 1); -+ -+ return rv; -+} -+ -+static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) -+{ -+ const HEVCContext * const h = avctx->priv_data; -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0]; -+ int bcount = get_bits_count(&h->HEVClc->gb); -+ uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount; -+ -+ int rv; -+ struct slice_info * si; -+ -+ if ((rv = slice_add(rd)) != 0) -+ return rv; -+ -+ si = rd->slices + rd->num_slices - 1; -+ si->ptr = buffer; -+ si->len = size; -+ -+ if (ctx->multi_slice && rd->num_slices > 1) { -+ struct slice_info *const si0 = rd->slices; -+ const size_t offset = (buffer - si0->ptr); -+ boff += offset * 8; -+ size += offset; -+ si0->len = si->len + offset; -+ } -+ -+#if HEVC_CTRLS_VERSION >= 2 -+ if (rd->num_slices == 1) -+ fill_decode_params(h, &rd->dec); -+ fill_slice_params(h, &rd->dec, rd->slice_params + rd->num_slices - 1, size * 8, boff); -+#else -+ fill_slice_params(h, rd->slice_params + rd->num_slices - 1, size * 8, boff); -+#endif -+ -+ return 0; -+} -+ -+static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx) -+{ -+ const HEVCContext * const h = avctx->priv_data; -+ if (h->ref != NULL) { -+ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0]; -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ -+ media_request_abort(&rd->req); -+ mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src); -+ -+ decode_q_remove(&ctx->decode_q, &rd->decode_ent); -+ } -+} -+ -+static int send_slice(AVCodecContext * const avctx, -+ V4L2MediaReqDescriptor * const rd, -+ struct req_controls *const controls, -+ const unsigned int i, const unsigned int j) -+{ -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ -+ struct slice_info *const si = rd->slices + i; -+ struct media_request * req = NULL; -+ struct qent_src * src = NULL; -+ MediaBufsStatus stat; -+ -+ if ((req = media_request_get(ctx->mpool)) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__); -+ return AVERROR(ENOMEM); -+ } -+ -+ if (set_req_ctls(ctx, req, -+ controls, -+#if HEVC_CTRLS_VERSION >= 2 -+ &rd->dec, -+#endif -+ rd->slice_params, -+ i, j - i)) { -+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__); -+ goto fail1; -+ } -+ -+ if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__); -+ goto fail1; -+ } -+ -+ if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) { -+ av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__); -+ goto fail2; -+ } -+ -+ if (qent_src_params_set(src, &controls->tv)) { -+ av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__); -+ goto fail2; -+ } -+ -+#warning ANNEX_B start code -+// if (ctx->start_code == V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) { -+// } -+ -+ stat = mediabufs_start_request(ctx->mbufs, &req, &src, -+ i == 0 ? rd->qe_dst : NULL, -+ j == rd->num_slices); -+ -+ if (stat != MEDIABUFS_STATUS_SUCCESS) { -+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__); -+ return AVERROR_UNKNOWN; -+ } -+ return 0; -+ -+fail2: -+ mediabufs_src_qent_abort(ctx->mbufs, &src); -+fail1: -+ media_request_abort(&req); -+ return AVERROR_UNKNOWN; -+} -+ -+static int v4l2_request_hevc_end_frame(AVCodecContext *avctx) -+{ -+ const HEVCContext * const h = avctx->priv_data; -+ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0]; -+ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; -+ struct req_controls rc; -+ unsigned int i; -+ int rv; -+ -+ // It is possible, though maybe a bug, to get an end_frame without -+ // a previous start_frame. If we do then give up. -+ if (!decode_q_in_q(&rd->decode_ent)) { -+ av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__); -+ return AVERROR_INVALIDDATA; -+ } -+ -+ { -+ const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ? -+ &h->ps.pps->scaling_list : -+ h->ps.sps->scaling_list_enable_flag ? -+ &h->ps.sps->scaling_list : NULL; -+ -+ -+ memset(&rc, 0, sizeof(rc)); -+ rc.tv = cvt_dpb_to_tv(rd->timestamp); -+ fill_sps(&rc.sps, h->ps.sps); -+ fill_pps(&rc.pps, h->ps.pps); -+ if (sl) { -+ rc.has_scaling = 1; -+ fill_scaling_matrix(sl, &rc.scaling_matrix); -+ } -+ } -+ -+ decode_q_wait(&ctx->decode_q, &rd->decode_ent); -+ -+ // qe_dst needs to be bound to the data buffer and only returned when that is -+ // Alloc almost certainly wants to be serialised if there is any chance of blocking -+ // so we get the next frame to be free in the thread that needs it for decode first. -+ // -+ // In our current world this probably isn't a concern but put it here anyway -+ if (!rd->qe_dst) -+ { -+ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); -+ rv = AVERROR(ENOMEM); -+ goto fail; -+ } -+ } -+ -+ // Send as slices -+ if (ctx->multi_slice) -+ { -+ if ((rv = send_slice(avctx, rd, &rc, 0, rd->num_slices)) != 0) -+ goto fail; -+ } -+ else -+ { -+ for (i = 0; i != rd->num_slices; ++i) { -+ if ((rv = send_slice(avctx, rd, &rc, i, i + 1)) != 0) -+ goto fail; -+ } -+ } -+ -+ // Set the drm_prime desriptor -+ drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs)); -+ rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0)); -+ rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0)); -+ -+ decode_q_remove(&ctx->decode_q, &rd->decode_ent); -+ return 0; -+ -+fail: -+ decode_q_remove(&ctx->decode_q, &rd->decode_ent); -+ return rv; -+} -+ -+// Initial check & init -+static int -+probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) -+{ -+ const HEVCContext *h = avctx->priv_data; -+ const HEVCSPS * const sps = h->ps.sps; -+ struct v4l2_ctrl_hevc_sps ctrl_sps; -+ unsigned int i; -+ -+ // Check for var slice array -+ struct v4l2_query_ext_ctrl qc[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX }, -+#if HEVC_CTRLS_VERSION >= 2 -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS }, -+#endif -+ }; -+ // Order & size must match! -+ static const size_t ctrl_sizes[] = { -+ sizeof(struct v4l2_ctrl_hevc_slice_params), -+ sizeof(struct v4l2_ctrl_hevc_sps), -+ sizeof(struct v4l2_ctrl_hevc_pps), -+ sizeof(struct v4l2_ctrl_hevc_scaling_matrix), -+#if HEVC_CTRLS_VERSION >= 2 -+ sizeof(struct v4l2_ctrl_hevc_decode_params), -+#endif -+ }; -+ const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc); -+ -+ if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) { -+ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION); -+ return AVERROR(EINVAL); -+ } -+ for (i = 0; i != noof_ctrls; ++i) { -+ if (ctrl_sizes[i] != (size_t)qc[i].elem_size) { -+ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n", -+ HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size); -+ return AVERROR(EINVAL); -+ } -+ } -+ -+ fill_sps(&ctrl_sps, sps); -+ -+ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_MPEG_VIDEO_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n"); -+ return AVERROR(EINVAL); -+ } -+ -+ ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0; -+ return 0; -+} -+ -+// Final init -+static int -+set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) -+{ -+ int ret; -+ -+ struct v4l2_query_ext_ctrl querys[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, }, -+ }; -+ -+ struct v4l2_ext_control ctrls[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, }, -+ }; -+ -+ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); -+ -+ ctx->decode_mode = querys[0].default_value; -+ -+ if (ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED && -+ ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode); -+ return AVERROR(EINVAL); -+ } -+ -+ ctx->start_code = querys[1].default_value; -+ if (ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE && -+ ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code); -+ return AVERROR(EINVAL); -+ } -+ -+ ctx->max_slices = querys[2].elems; -+ if (ctx->max_slices > MAX_SLICES) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported max slices, %d\n", __func__, ctx->max_slices); -+ return AVERROR(EINVAL); -+ } -+ -+ ctrls[0].value = ctx->decode_mode; -+ ctrls[1].value = ctx->start_code; -+ -+ ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls)); -+ return !ret ? 0 : AVERROR(-ret); -+} -+ -+static void v4l2_req_frame_free(void *opaque, uint8_t *data) -+{ -+ AVCodecContext *avctx = opaque; -+ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data; -+ -+ av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data); -+ -+ qent_dst_unref(&rd->qe_dst); -+ -+ // We don't expect req or qe_src to be set -+ if (rd->req || rd->qe_src) -+ av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src); -+ -+ av_freep(&rd->slices); -+ av_freep(&rd->slice_params); -+ -+ av_free(rd); -+} -+ -+static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size) -+{ -+ AVCodecContext *avctx = opaque; -+// V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; -+// V4L2MediaReqDescriptor *req; -+ AVBufferRef *ref; -+ uint8_t *data; -+// int ret; -+ -+ data = av_mallocz(size); -+ if (!data) -+ return NULL; -+ -+ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data); -+ ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0); -+ if (!ref) { -+ av_freep(&data); -+ return NULL; -+ } -+ return ref; -+} -+ -+#if 0 -+static void v4l2_req_pool_free(void *opaque) -+{ -+ av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque); -+} -+ -+static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc) -+{ -+ av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool); -+ -+ av_buffer_pool_uninit(&hwfc->pool); -+} -+#endif -+ -+static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) -+{ -+ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; -+ AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data; -+ const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs); -+ -+ hwfc->format = AV_PIX_FMT_DRM_PRIME; -+ hwfc->sw_format = pixel_format_from_format(vfmt); -+ if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) { -+ hwfc->width = vfmt->fmt.pix_mp.width; -+ hwfc->height = vfmt->fmt.pix_mp.height; -+ } else { -+ hwfc->width = vfmt->fmt.pix.width; -+ hwfc->height = vfmt->fmt.pix.height; -+ } -+#if 0 -+ hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free); -+ if (!hwfc->pool) -+ return AVERROR(ENOMEM); -+ -+ hwfc->free = v4l2_req_hwframe_ctx_free; -+ -+ hwfc->initial_pool_size = 1; -+ -+ switch (avctx->codec_id) { -+ case AV_CODEC_ID_VP9: -+ hwfc->initial_pool_size += 8; -+ break; -+ case AV_CODEC_ID_VP8: -+ hwfc->initial_pool_size += 3; -+ break; -+ default: -+ hwfc->initial_pool_size += 2; -+ } -+#endif -+ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size); -+ -+ return 0; -+} -+ -+static int alloc_frame(AVCodecContext * avctx, AVFrame *frame) -+{ -+ int rv; -+ -+ frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor)); -+ if (!frame->buf[0]) -+ return AVERROR(ENOMEM); -+ -+ frame->data[0] = frame->buf[0]->data; -+ -+ frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx); -+ -+ if ((rv = ff_attach_decode_data(frame)) != 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n"); -+ av_frame_unref(frame); -+ return rv; -+ } -+ -+ return 0; -+} -+ -+const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = { -+ .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE, -+ .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION), -+ .probe = probe, -+ .set_controls = set_controls, -+ -+ .start_frame = v4l2_request_hevc_start_frame, -+ .decode_slice = v4l2_request_hevc_decode_slice, -+ .end_frame = v4l2_request_hevc_end_frame, -+ .abort_frame = v4l2_request_hevc_abort_frame, -+ .frame_params = frame_params, -+ .alloc_frame = alloc_frame, -+}; -+ -diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c -new file mode 100644 -index 000000000000..eb00ecb40623 ---- /dev/null -+++ b/libavcodec/v4l2_req_media.c -@@ -0,0 +1,1596 @@ -+/* -+ * Copyright (C) 2018 Paul Kocialkowski -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation the rights to use, copy, modify, merge, publish, -+ * distribute, sub license, and/or sell copies of the Software, and to -+ * permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial portions -+ * of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. -+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR -+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "v4l2_req_dmabufs.h" -+#include "v4l2_req_media.h" -+#include "v4l2_req_pollqueue.h" -+#include "v4l2_req_utils.h" -+#include "weak_link.h" -+ -+ -+/* floor(log2(x)) */ -+static unsigned int log2_size(size_t x) -+{ -+ unsigned int n = 0; -+ -+ if (x & ~0xffff) { -+ n += 16; -+ x >>= 16; -+ } -+ if (x & ~0xff) { -+ n += 8; -+ x >>= 8; -+ } -+ if (x & ~0xf) { -+ n += 4; -+ x >>= 4; -+ } -+ if (x & ~3) { -+ n += 2; -+ x >>= 2; -+ } -+ return (x & ~1) ? n + 1 : n; -+} -+ -+static size_t round_up_size(const size_t x) -+{ -+ /* Admit no size < 256 */ -+ const unsigned int n = x < 256 ? 8 : log2_size(x) - 1; -+ -+ return x >= (3 << n) ? 4 << n : (3 << n); -+} -+ -+struct media_request; -+ -+struct media_pool { -+ int fd; -+ sem_t sem; -+ pthread_mutex_t lock; -+ struct media_request * free_reqs; -+ struct pollqueue * pq; -+}; -+ -+struct media_request { -+ struct media_request * next; -+ struct media_pool * mp; -+ int fd; -+ struct polltask * pt; -+}; -+ -+ -+static inline int do_trywait(sem_t *const sem) -+{ -+ while (sem_trywait(sem)) { -+ if (errno != EINTR) -+ return -errno; -+ } -+ return 0; -+} -+ -+static inline int do_wait(sem_t *const sem) -+{ -+ while (sem_wait(sem)) { -+ if (errno != EINTR) -+ return -errno; -+ } -+ return 0; -+} -+ -+static int request_buffers(int video_fd, unsigned int type, -+ enum v4l2_memory memory, unsigned int buffers_count) -+{ -+ struct v4l2_requestbuffers buffers; -+ int rc; -+ -+ memset(&buffers, 0, sizeof(buffers)); -+ buffers.type = type; -+ buffers.memory = memory; -+ buffers.count = buffers_count; -+ -+ rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers); -+ if (rc < 0) { -+ rc = -errno; -+ request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc)); -+ return rc; -+ } -+ -+ return 0; -+} -+ -+ -+static int set_stream(int video_fd, unsigned int type, bool enable) -+{ -+ enum v4l2_buf_type buf_type = type; -+ int rc; -+ -+ rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF, -+ &buf_type); -+ if (rc < 0) { -+ rc = -errno; -+ request_log("Unable to %sable stream: %s\n", -+ enable ? "en" : "dis", strerror(-rc)); -+ return rc; -+ } -+ -+ return 0; -+} -+ -+ -+ -+struct media_request * media_request_get(struct media_pool * const mp) -+{ -+ struct media_request *req = NULL; -+ -+ /* Timeout handled by poll code */ -+ if (do_wait(&mp->sem)) -+ return NULL; -+ -+ pthread_mutex_lock(&mp->lock); -+ req = mp->free_reqs; -+ if (req) { -+ mp->free_reqs = req->next; -+ req->next = NULL; -+ } -+ pthread_mutex_unlock(&mp->lock); -+ return req; -+} -+ -+int media_request_fd(const struct media_request * const req) -+{ -+ return req->fd; -+} -+ -+int media_request_start(struct media_request * const req) -+{ -+ while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1) -+ { -+ const int err = errno; -+ if (err == EINTR) -+ continue; -+ request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err)); -+ return -err; -+ } -+ -+ pollqueue_add_task(req->pt, 2000); -+ return 0; -+} -+ -+static void media_request_done(void *v, short revents) -+{ -+ struct media_request *const req = v; -+ struct media_pool *const mp = req->mp; -+ -+ /* ** Not sure what to do about timeout */ -+ -+ if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0) -+ request_log("Unable to reinit media request: %s\n", -+ strerror(errno)); -+ -+ pthread_mutex_lock(&mp->lock); -+ req->next = mp->free_reqs; -+ mp->free_reqs = req; -+ pthread_mutex_unlock(&mp->lock); -+ sem_post(&mp->sem); -+} -+ -+int media_request_abort(struct media_request ** const preq) -+{ -+ struct media_request * const req = *preq; -+ -+ if (req == NULL) -+ return 0; -+ *preq = NULL; -+ -+ media_request_done(req, 0); -+ return 0; -+} -+ -+static void delete_req_chain(struct media_request * const chain) -+{ -+ struct media_request * next = chain; -+ while (next) { -+ struct media_request * const req = next; -+ next = req->next; -+ if (req->pt) -+ polltask_delete(&req->pt); -+ if (req->fd != -1) -+ close(req->fd); -+ free(req); -+ } -+} -+ -+struct media_pool * media_pool_new(const char * const media_path, -+ struct pollqueue * const pq, -+ const unsigned int n) -+{ -+ struct media_pool * const mp = calloc(1, sizeof(*mp)); -+ unsigned int i; -+ -+ if (!mp) -+ goto fail0; -+ -+ mp->pq = pq; -+ pthread_mutex_init(&mp->lock, NULL); -+ mp->fd = open(media_path, O_RDWR | O_NONBLOCK); -+ if (mp->fd == -1) { -+ request_log("Failed to open '%s': %s\n", media_path, strerror(errno)); -+ goto fail1; -+ } -+ -+ for (i = 0; i != n; ++i) { -+ struct media_request * req = malloc(sizeof(*req)); -+ if (!req) -+ goto fail4; -+ -+ *req = (struct media_request){ -+ .next = mp->free_reqs, -+ .mp = mp, -+ .fd = -1 -+ }; -+ mp->free_reqs = req; -+ -+ if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) { -+ request_log("Failed to alloc request %d: %s\n", i, strerror(errno)); -+ goto fail4; -+ } -+ -+ req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req); -+ if (!req->pt) -+ goto fail4; -+ } -+ -+ sem_init(&mp->sem, 0, n); -+ -+ return mp; -+ -+fail4: -+ delete_req_chain(mp->free_reqs); -+ close(mp->fd); -+ pthread_mutex_destroy(&mp->lock); -+fail1: -+ free(mp); -+fail0: -+ return NULL; -+} -+ -+void media_pool_delete(struct media_pool ** pMp) -+{ -+ struct media_pool * const mp = *pMp; -+ -+ if (!mp) -+ return; -+ *pMp = NULL; -+ -+ delete_req_chain(mp->free_reqs); -+ close(mp->fd); -+ sem_destroy(&mp->sem); -+ pthread_mutex_destroy(&mp->lock); -+ free(mp); -+} -+ -+ -+#define INDEX_UNSET (~(uint32_t)0) -+ -+enum qent_status { -+ QENT_NEW = 0, // Initial state - shouldn't last -+ QENT_FREE, // On free chain -+ QENT_PENDING, // User has ent -+ QENT_WAITING, // On inuse -+ QENT_DONE, // Frame rx -+ QENT_ERROR, // Error -+ QENT_IMPORT -+}; -+ -+struct qent_base { -+ atomic_int ref_count; -+ struct qent_base *next; -+ struct qent_base *prev; -+ enum qent_status status; -+ uint32_t index; -+ struct dmabuf_h *dh[VIDEO_MAX_PLANES]; -+ struct timeval timestamp; -+}; -+ -+struct qent_src { -+ struct qent_base base; -+ int fixed_size; -+}; -+ -+struct qent_dst { -+ struct qent_base base; -+ bool waiting; -+ pthread_mutex_t lock; -+ pthread_cond_t cond; -+ struct ff_weak_link_client * mbc_wl; -+}; -+ -+struct qe_list_head { -+ struct qent_base *head; -+ struct qent_base *tail; -+}; -+ -+struct buf_pool { -+ pthread_mutex_t lock; -+ sem_t free_sem; -+ enum v4l2_buf_type buf_type; -+ struct qe_list_head free; -+ struct qe_list_head inuse; -+}; -+ -+ -+static inline struct qent_dst *base_to_dst(struct qent_base *be) -+{ -+ return (struct qent_dst *)be; -+} -+ -+static inline struct qent_src *base_to_src(struct qent_base *be) -+{ -+ return (struct qent_src *)be; -+} -+ -+ -+#define QENT_BASE_INITIALIZER {\ -+ .ref_count = ATOMIC_VAR_INIT(0),\ -+ .status = QENT_NEW,\ -+ .index = INDEX_UNSET\ -+} -+ -+static void qe_base_uninit(struct qent_base *const be) -+{ -+ unsigned int i; -+ for (i = 0; i != VIDEO_MAX_PLANES; ++i) { -+ dmabuf_free(be->dh[i]); -+ be->dh[i] = NULL; -+ } -+} -+ -+static void qe_src_free(struct qent_src *const be_src) -+{ -+ if (!be_src) -+ return; -+ qe_base_uninit(&be_src->base); -+ free(be_src); -+} -+ -+static struct qent_src * qe_src_new(void) -+{ -+ struct qent_src *const be_src = malloc(sizeof(*be_src)); -+ if (!be_src) -+ return NULL; -+ *be_src = (struct qent_src){ -+ .base = QENT_BASE_INITIALIZER -+ }; -+ return be_src; -+} -+ -+static void qe_dst_free(struct qent_dst *const be_dst) -+{ -+ if (!be_dst) -+ return; -+ -+ ff_weak_link_unref(&be_dst->mbc_wl); -+ pthread_cond_destroy(&be_dst->cond); -+ pthread_mutex_destroy(&be_dst->lock); -+ qe_base_uninit(&be_dst->base); -+ free(be_dst); -+} -+ -+static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl) -+{ -+ struct qent_dst *const be_dst = malloc(sizeof(*be_dst)); -+ if (!be_dst) -+ return NULL; -+ *be_dst = (struct qent_dst){ -+ .base = QENT_BASE_INITIALIZER, -+ .lock = PTHREAD_MUTEX_INITIALIZER, -+ .cond = PTHREAD_COND_INITIALIZER, -+ .mbc_wl = ff_weak_link_ref(wl) -+ }; -+ return be_dst; -+} -+ -+static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be) -+{ -+ if (ql->tail) -+ ql->tail->next = be; -+ else -+ ql->head = be; -+ be->prev = ql->tail; -+ be->next = NULL; -+ ql->tail = be; -+} -+ -+static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be) -+{ -+ if (!be) -+ return NULL; -+ -+ if (be->next) -+ be->next->prev = be->prev; -+ else -+ ql->tail = be->prev; -+ if (be->prev) -+ be->prev->next = be->next; -+ else -+ ql->head = be->next; -+ be->next = NULL; -+ be->prev = NULL; -+ return be; -+} -+ -+ -+static void bq_put_free(struct buf_pool *const bp, struct qent_base * be) -+{ -+ ql_add_tail(&bp->free, be); -+} -+ -+static struct qent_base * bq_get_free(struct buf_pool *const bp) -+{ -+ return ql_extract(&bp->free, bp->free.head); -+} -+ -+static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be) -+{ -+ return ql_extract(&bp->inuse, be); -+} -+ -+static struct qent_base * bq_get_inuse(struct buf_pool *const bp) -+{ -+ return ql_extract(&bp->inuse, bp->inuse.head); -+} -+ -+static void bq_free_all_free_src(struct buf_pool *const bp) -+{ -+ struct qent_base *be; -+ while ((be = bq_get_free(bp)) != NULL) -+ qe_src_free(base_to_src(be)); -+} -+ -+static void bq_free_all_inuse_src(struct buf_pool *const bp) -+{ -+ struct qent_base *be; -+ while ((be = bq_get_inuse(bp)) != NULL) -+ qe_src_free(base_to_src(be)); -+} -+ -+static void bq_free_all_free_dst(struct buf_pool *const bp) -+{ -+ struct qent_base *be; -+ while ((be = bq_get_free(bp)) != NULL) -+ qe_dst_free(base_to_dst(be)); -+} -+ -+static void queue_put_free(struct buf_pool *const bp, struct qent_base *be) -+{ -+ unsigned int i; -+ -+ pthread_mutex_lock(&bp->lock); -+ /* Clear out state vars */ -+ be->timestamp.tv_sec = 0; -+ be->timestamp.tv_usec = 0; -+ be->status = QENT_FREE; -+ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) -+ dmabuf_len_set(be->dh[i], 0); -+ bq_put_free(bp, be); -+ pthread_mutex_unlock(&bp->lock); -+ sem_post(&bp->free_sem); -+} -+ -+static bool queue_is_inuse(const struct buf_pool *const bp) -+{ -+ return bp->inuse.tail != NULL; -+} -+ -+static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be) -+{ -+ if (!be) -+ return; -+ pthread_mutex_lock(&bp->lock); -+ ql_add_tail(&bp->inuse, be); -+ be->status = QENT_WAITING; -+ pthread_mutex_unlock(&bp->lock); -+} -+ -+static struct qent_base *queue_get_free(struct buf_pool *const bp) -+{ -+ struct qent_base *buf; -+ -+ if (do_wait(&bp->free_sem)) -+ return NULL; -+ pthread_mutex_lock(&bp->lock); -+ buf = bq_get_free(bp); -+ pthread_mutex_unlock(&bp->lock); -+ return buf; -+} -+ -+static struct qent_base *queue_tryget_free(struct buf_pool *const bp) -+{ -+ struct qent_base *buf; -+ -+ if (do_trywait(&bp->free_sem)) -+ return NULL; -+ pthread_mutex_lock(&bp->lock); -+ buf = bq_get_free(bp); -+ pthread_mutex_unlock(&bp->lock); -+ return buf; -+} -+ -+static struct qent_base * queue_find_extract_fd(struct buf_pool *const bp, const int fd) -+{ -+ struct qent_base *be; -+ -+ pthread_mutex_lock(&bp->lock); -+ /* Expect 1st in Q, but allow anywhere */ -+ for (be = bp->inuse.head; be; be = be->next) { -+ if (dmabuf_fd(be->dh[0]) == fd) { -+ bq_extract_inuse(bp, be); -+ break; -+ } -+ } -+ pthread_mutex_unlock(&bp->lock); -+ -+ return be; -+} -+ -+static void queue_delete(struct buf_pool *const bp) -+{ -+ sem_destroy(&bp->free_sem); -+ pthread_mutex_destroy(&bp->lock); -+ free(bp); -+} -+ -+static struct buf_pool* queue_new(const int vfd) -+{ -+ struct buf_pool *bp = calloc(1, sizeof(*bp)); -+ if (!bp) -+ return NULL; -+ pthread_mutex_init(&bp->lock, NULL); -+ sem_init(&bp->free_sem, 0, 0); -+ return bp; -+} -+ -+ -+struct mediabufs_ctl { -+ atomic_int ref_count; /* 0 is single ref for easier atomics */ -+ void * dc; -+ int vfd; -+ bool stream_on; -+ bool polling; -+ bool dst_fixed; // Dst Q is fixed size -+ pthread_mutex_t lock; -+ struct buf_pool * src; -+ struct buf_pool * dst; -+ struct polltask * pt; -+ struct pollqueue * pq; -+ struct ff_weak_link_master * this_wlm; -+ -+ struct v4l2_format src_fmt; -+ struct v4l2_format dst_fmt; -+}; -+ -+static int qe_v4l2_queue(struct qent_base *const be, -+ const int vfd, struct media_request *const mreq, -+ const struct v4l2_format *const fmt, -+ const bool is_dst, const bool hold_flag) -+{ -+ struct v4l2_buffer buffer = { -+ .type = fmt->type, -+ .memory = V4L2_MEMORY_DMABUF, -+ .index = be->index -+ }; -+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ unsigned int i; -+ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) { -+ if (is_dst) -+ dmabuf_len_set(be->dh[i], 0); -+ -+ /* *** Really need a pixdesc rather than a format so we can fill in data_offset */ -+ planes[i].length = dmabuf_size(be->dh[i]); -+ planes[i].bytesused = dmabuf_len(be->dh[i]); -+ planes[i].m.fd = dmabuf_fd(be->dh[i]); -+ } -+ buffer.m.planes = planes; -+ buffer.length = i; -+ } -+ else { -+ if (is_dst) -+ dmabuf_len_set(be->dh[0], 0); -+ -+ buffer.bytesused = dmabuf_len(be->dh[0]); -+ buffer.length = dmabuf_size(be->dh[0]); -+ buffer.m.fd = dmabuf_fd(be->dh[0]); -+ } -+ -+ if (!is_dst && mreq) { -+ buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD; -+ buffer.request_fd = media_request_fd(mreq); -+ if (hold_flag) -+ buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF; -+ } -+ -+ if (is_dst) -+ be->timestamp = (struct timeval){0,0}; -+ -+ buffer.timestamp = be->timestamp; -+ -+ while (ioctl(vfd, VIDIOC_QBUF, &buffer)) { -+ const int err = errno; -+ if (err != EINTR) { -+ request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err)); -+ return -err; -+ } -+ } -+ return 0; -+} -+ -+static struct qent_base * qe_dequeue(struct buf_pool *const bp, -+ const int vfd, -+ const struct v4l2_format * const f) -+{ -+ int fd; -+ struct qent_base *be; -+ int rc; -+ const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type); -+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; -+ struct v4l2_buffer buffer = { -+ .type = f->type, -+ .memory = V4L2_MEMORY_DMABUF -+ }; -+ if (mp) { -+ buffer.length = f->fmt.pix_mp.num_planes; -+ buffer.m.planes = planes; -+ } -+ -+ while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 && -+ errno == EINTR) -+ /* Loop */; -+ if (rc) { -+ request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno)); -+ return NULL; -+ } -+ -+ fd = mp ? planes[0].m.fd : buffer.m.fd; -+ be = queue_find_extract_fd(bp, fd); -+ if (!be) { -+ request_log("Failed to find fd %d in Q\n", fd); -+ return NULL; -+ } -+ -+ be->timestamp = buffer.timestamp; -+ be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE; -+ return be; -+} -+ -+static void qe_dst_done(struct qent_dst * dst_be) -+{ -+ pthread_mutex_lock(&dst_be->lock); -+ dst_be->waiting = false; -+ pthread_cond_broadcast(&dst_be->cond); -+ pthread_mutex_unlock(&dst_be->lock); -+ -+ qent_dst_unref(&dst_be); -+} -+ -+static bool qe_dst_waiting(struct qent_dst *const dst_be) -+{ -+ bool waiting; -+ pthread_mutex_lock(&dst_be->lock); -+ waiting = dst_be->waiting; -+ dst_be->waiting = true; -+ pthread_mutex_unlock(&dst_be->lock); -+ return waiting; -+} -+ -+ -+static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc) -+{ -+ return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst); -+} -+ -+static void mediabufs_poll_cb(void * v, short revents) -+{ -+ struct mediabufs_ctl *mbc = v; -+ struct qent_src *src_be = NULL; -+ struct qent_dst *dst_be = NULL; -+ -+ if (!revents) -+ request_err(mbc->dc, "%s: Timeout\n", __func__); -+ -+ pthread_mutex_lock(&mbc->lock); -+ mbc->polling = false; -+ -+ if ((revents & POLLOUT) != 0) -+ src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt)); -+ if ((revents & POLLIN) != 0) -+ dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt)); -+ -+ /* Reschedule */ -+ if (mediabufs_wants_poll(mbc)) { -+ mbc->polling = true; -+ pollqueue_add_task(mbc->pt, 2000); -+ } -+ pthread_mutex_unlock(&mbc->lock); -+ -+ if (src_be) -+ queue_put_free(mbc->src, &src_be->base); -+ if (dst_be) -+ qe_dst_done(dst_be); -+} -+ -+int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp) -+{ -+ struct qent_base *const be = &be_src->base; -+ -+ be->timestamp = *timestamp; -+ return 0; -+} -+ -+struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst) -+{ -+ return be_dst->base.timestamp; -+} -+ -+static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc) -+{ -+ if (!be->dh[0] || len > dmabuf_size(be->dh[0])) { -+ size_t newsize = round_up_size(len); -+ request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize); -+ if (!dbsc) { -+ request_log("%s: No dmbabuf_ctrl for realloc\n", __func__); -+ return -ENOMEM; -+ } -+ if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) { -+ request_log("%s: Realloc %zd failed\n", __func__, newsize); -+ return -ENOMEM; -+ } -+ } -+ return 0; -+} -+ -+int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc) -+{ -+ struct qent_base *const be = &be_src->base; -+ return qent_base_realloc(be, len, dbsc); -+} -+ -+ -+int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc) -+{ -+ void * dst; -+ struct qent_base *const be = &be_src->base; -+ int rv; -+ -+ // Realloc doesn't copy so don't alloc if offset != 0 -+ if ((rv = qent_base_realloc(be, offset + len, -+ be_src->fixed_size || offset ? NULL : dbsc)) != 0) -+ return rv; -+ -+ dmabuf_write_start(be->dh[0]); -+ dst = dmabuf_map(be->dh[0]); -+ if (!dst) -+ return -1; -+ memcpy((char*)dst + offset, src, len); -+ dmabuf_len_set(be->dh[0], len); -+ dmabuf_write_end(be->dh[0]); -+ return 0; -+} -+ -+const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane) -+{ -+ const struct qent_base *const be = &be_dst->base; -+ -+ return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane]; -+} -+ -+int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane) -+{ -+ return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane))); -+} -+ -+MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, -+ struct media_request **const pmreq, -+ struct qent_src **const psrc_be, -+ struct qent_dst *const dst_be, -+ const bool is_final) -+{ -+ struct media_request * mreq = *pmreq; -+ struct qent_src *const src_be = *psrc_be; -+ -+ // Req & src are always both "consumed" -+ *pmreq = NULL; -+ *psrc_be = NULL; -+ -+ pthread_mutex_lock(&mbc->lock); -+ -+ if (!src_be) -+ goto fail1; -+ -+ if (dst_be) { -+ if (qe_dst_waiting(dst_be)) { -+ request_info(mbc->dc, "Request buffer already waiting on start\n"); -+ goto fail1; -+ } -+ dst_be->base.timestamp = (struct timeval){0,0}; -+ if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false)) -+ goto fail1; -+ -+ qent_dst_ref(dst_be); -+ queue_put_inuse(mbc->dst, &dst_be->base); -+ } -+ -+ if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final)) -+ goto fail1; -+ queue_put_inuse(mbc->src, &src_be->base); -+ -+ if (!mbc->polling && mediabufs_wants_poll(mbc)) { -+ mbc->polling = true; -+ pollqueue_add_task(mbc->pt, 2000); -+ } -+ pthread_mutex_unlock(&mbc->lock); -+ -+ if (media_request_start(mreq)) -+ return MEDIABUFS_ERROR_OPERATION_FAILED; -+ -+ return MEDIABUFS_STATUS_SUCCESS; -+ -+fail1: -+ media_request_abort(&mreq); -+ if (src_be) -+ queue_put_free(mbc->src, &src_be->base); -+ -+// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q -+ if (dst_be) { -+ dst_be->base.status = QENT_ERROR; -+ qe_dst_done(dst_be); -+ } -+ pthread_mutex_unlock(&mbc->lock); -+ return MEDIABUFS_ERROR_OPERATION_FAILED; -+} -+ -+ -+static int qe_alloc_from_fmt(struct qent_base *const be, -+ struct dmabufs_ctl *const dbsc, -+ const struct v4l2_format *const fmt) -+{ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ unsigned int i; -+ for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) { -+ be->dh[i] = dmabuf_realloc(dbsc, be->dh[i], -+ fmt->fmt.pix_mp.plane_fmt[i].sizeimage); -+ /* On failure tidy up and die */ -+ if (!be->dh[i]) { -+ while (i--) { -+ dmabuf_free(be->dh[i]); -+ be->dh[i] = NULL; -+ } -+ return -1; -+ } -+ } -+ } -+ else { -+// be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage); -+ size_t size = fmt->fmt.pix.sizeimage; -+ be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size); -+ if (!be->dh[0]) -+ return -1; -+ } -+ return 0; -+} -+ -+static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd, -+ const enum v4l2_buf_type buftype, -+ uint32_t pixfmt, -+ const unsigned int width, const unsigned int height, -+ const size_t bufsize) -+{ -+ *fmt = (struct v4l2_format){.type = buftype}; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { -+ fmt->fmt.pix_mp.width = width; -+ fmt->fmt.pix_mp.height = height; -+ fmt->fmt.pix_mp.pixelformat = pixfmt; -+ if (bufsize) { -+ fmt->fmt.pix_mp.num_planes = 1; -+ fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize; -+ } -+ } -+ else { -+ fmt->fmt.pix.width = width; -+ fmt->fmt.pix.height = height; -+ fmt->fmt.pix.pixelformat = pixfmt; -+ fmt->fmt.pix.sizeimage = bufsize; -+ } -+ -+ while (ioctl(fd, VIDIOC_S_FMT, fmt)) -+ if (errno != EINTR) -+ return MEDIABUFS_ERROR_OPERATION_FAILED; -+ -+ // Treat anything where we don't get at least what we asked for as a fail -+ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { -+ if (fmt->fmt.pix_mp.width < width || -+ fmt->fmt.pix_mp.height < height || -+ fmt->fmt.pix_mp.pixelformat != pixfmt) { -+ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; -+ } -+ } -+ else { -+ if (fmt->fmt.pix.width < width || -+ fmt->fmt.pix.height < height || -+ fmt->fmt.pix.pixelformat != pixfmt) { -+ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; -+ } -+ } -+ -+ return MEDIABUFS_STATUS_SUCCESS; -+} -+ -+static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt, -+ const int fd, -+ const unsigned int type_v4l2, -+ const uint32_t flags_must, -+ const uint32_t flags_not, -+ const unsigned int width, -+ const unsigned int height, -+ mediabufs_dst_fmt_accept_fn *const accept_fn, -+ void *const accept_v) -+{ -+ unsigned int i; -+ -+ for (i = 0;; ++i) { -+ struct v4l2_fmtdesc fmtdesc = { -+ .index = i, -+ .type = type_v4l2 -+ }; -+ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { -+ if (errno != EINTR) -+ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; -+ } -+ if ((fmtdesc.flags & flags_must) != flags_must || -+ (fmtdesc.flags & flags_not)) -+ continue; -+ if (!accept_fn(accept_v, &fmtdesc)) -+ continue; -+ -+ if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat, -+ width, height, 0) == MEDIABUFS_STATUS_SUCCESS) -+ return MEDIABUFS_STATUS_SUCCESS; -+ } -+ return 0; -+} -+ -+ -+/* Wait for qent done */ -+ -+MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst) -+{ -+ struct qent_base *const be = &be_dst->base; -+ enum qent_status estat; -+ -+ pthread_mutex_lock(&be_dst->lock); -+ while (be_dst->waiting && -+ !pthread_cond_wait(&be_dst->cond, &be_dst->lock)) -+ /* Loop */; -+ estat = be->status; -+ pthread_mutex_unlock(&be_dst->lock); -+ -+ return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS : -+ estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR : -+ MEDIABUFS_ERROR_OPERATION_FAILED; -+} -+ -+const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no) -+{ -+ struct qent_base *const be = &be_dst->base; -+ return dmabuf_map(be->dh[buf_no]); -+} -+ -+MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst) -+{ -+ struct qent_base *const be = &be_dst->base; -+ unsigned int i; -+ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { -+ if (dmabuf_read_start(be->dh[i])) { -+ while (i--) -+ dmabuf_read_end(be->dh[i]); -+ return MEDIABUFS_ERROR_ALLOCATION_FAILED; -+ } -+ } -+ return MEDIABUFS_STATUS_SUCCESS; -+} -+ -+MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst) -+{ -+ struct qent_base *const be = &be_dst->base; -+ unsigned int i; -+ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; -+ -+ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { -+ if (dmabuf_read_end(be->dh[i])) -+ status = MEDIABUFS_ERROR_OPERATION_FAILED; -+ } -+ return status; -+} -+ -+struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst) -+{ -+ if (be_dst) -+ atomic_fetch_add(&be_dst->base.ref_count, 1); -+ return be_dst; -+} -+ -+void qent_dst_unref(struct qent_dst ** const pbe_dst) -+{ -+ struct qent_dst * const be_dst = *pbe_dst; -+ struct mediabufs_ctl * mbc; -+ if (!be_dst) -+ return; -+ *pbe_dst = NULL; -+ -+ if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0) -+ return; -+ -+ if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) { -+ queue_put_free(mbc->dst, &be_dst->base); -+ ff_weak_link_unlock(be_dst->mbc_wl); -+ } -+ else { -+ qe_dst_free(be_dst); -+ } -+} -+ -+MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, -+ unsigned int plane, -+ int fd, size_t size) -+{ -+ struct qent_base *const be = &be_dst->base; -+ struct dmabuf_h * dh; -+ -+ if (be->status != QENT_IMPORT || be->dh[plane]) -+ return MEDIABUFS_ERROR_OPERATION_FAILED; -+ -+ dh = dmabuf_import(fd, size); -+ if (!dh) -+ return MEDIABUFS_ERROR_ALLOCATION_FAILED; -+ -+ be->dh[plane] = dh; -+ return MEDIABUFS_STATUS_SUCCESS; -+} -+ -+// Returns noof buffers created, -ve for error -+static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[]) -+{ -+ unsigned int i; -+ -+ struct v4l2_create_buffers cbuf = { -+ .count = n, -+ .memory = V4L2_MEMORY_DMABUF, -+ .format = mbc->dst_fmt, -+ }; -+ -+ while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) { -+ const int err = -errno; -+ if (err != EINTR) { -+ request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__); -+ return -err; -+ } -+ } -+ -+ if (cbuf.count != n) -+ request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n); -+ -+ for (i = 0; i != cbuf.count; ++i) -+ qes[i]->base.index = cbuf.index + i; -+ -+ return cbuf.count; -+} -+ -+struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc) -+{ -+ struct qent_dst * be_dst; -+ -+ if (mbc == NULL) { -+ be_dst = qe_dst_new(NULL); -+ if (be_dst) -+ be_dst->base.status = QENT_IMPORT; -+ return be_dst; -+ } -+ -+ if (mbc->dst_fixed) { -+ be_dst = base_to_dst(queue_get_free(mbc->dst)); -+ if (!be_dst) -+ return NULL; -+ } -+ else { -+ be_dst = base_to_dst(queue_tryget_free(mbc->dst)); -+ if (!be_dst) { -+ be_dst = qe_dst_new(mbc->this_wlm); -+ if (!be_dst) -+ return NULL; -+ -+ if (create_dst_bufs(mbc, 1, &be_dst) != 1) { -+ qe_dst_free(be_dst); -+ return NULL; -+ } -+ } -+ } -+ -+ if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) { -+ /* Given how create buf works we can't uncreate it on alloc failure -+ * all we can do is put it on the free Q -+ */ -+ queue_put_free(mbc->dst, &be_dst->base); -+ return NULL; -+ } -+ -+ be_dst->base.status = QENT_PENDING; -+ atomic_store(&be_dst->base.ref_count, 0); -+ return be_dst; -+} -+ -+const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc) -+{ -+ return &mbc->dst_fmt; -+} -+ -+MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, -+ const unsigned int width, -+ const unsigned int height, -+ mediabufs_dst_fmt_accept_fn *const accept_fn, -+ void *const accept_v) -+{ -+ MediaBufsStatus status; -+ unsigned int i; -+ const enum v4l2_buf_type buf_type = mbc->dst_fmt.type; -+ static const struct { -+ unsigned int flags_must; -+ unsigned int flags_not; -+ } trys[] = { -+ {0, V4L2_FMT_FLAG_EMULATED}, -+ {V4L2_FMT_FLAG_EMULATED, 0}, -+ }; -+ for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) { -+ status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd, -+ buf_type, -+ trys[i].flags_must, -+ trys[i].flags_not, -+ width, height, accept_fn, accept_v); -+ if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE) -+ return status; -+ } -+ -+ if (status != MEDIABUFS_STATUS_SUCCESS) -+ return status; -+ -+ /* Try to create a buffer - don't alloc */ -+ return status; -+} -+ -+// ** This is a mess if we get partial alloc but without any way to remove -+// individual V4L2 Q members we are somewhat stuffed -+MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed) -+{ -+ unsigned int i; -+ int a = 0; -+ unsigned int qc; -+ struct qent_dst * qes[32]; -+ -+ if (n > 32) -+ return MEDIABUFS_ERROR_ALLOCATION_FAILED; -+ -+ // Create qents first as it is hard to get rid of the V4L2 buffers on error -+ for (qc = 0; qc != n; ++qc) -+ { -+ if ((qes[qc] = qe_dst_new(mbc->this_wlm)) == NULL) -+ goto fail; -+ } -+ -+ if ((a = create_dst_bufs(mbc, n, qes)) < 0) -+ goto fail; -+ -+ for (i = 0; i != a; ++i) -+ queue_put_free(mbc->dst, &qes[i]->base); -+ -+ if (a != n) -+ goto fail; -+ -+ mbc->dst_fixed = fixed; -+ return MEDIABUFS_STATUS_SUCCESS; -+ -+fail: -+ for (i = (a < 0 ? 0 : a); i != qc; ++i) -+ qe_dst_free(qes[i]); -+ -+ return MEDIABUFS_ERROR_ALLOCATION_FAILED; -+} -+ -+struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc) -+{ -+ struct qent_base * buf = queue_get_free(mbc->src); -+ buf->status = QENT_PENDING; -+ return base_to_src(buf); -+} -+ -+void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src) -+{ -+ struct qent_src *const qe_src = *pqe_src; -+ if (!qe_src) -+ return; -+ *pqe_src = NULL; -+ queue_put_free(mbc->src, &qe_src->base); -+} -+ -+/* src format must have been set up before this */ -+MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc, -+ struct dmabufs_ctl * const dbsc, -+ unsigned int n) -+{ -+ unsigned int i; -+ struct v4l2_requestbuffers req = { -+ .count = n, -+ .type = mbc->src_fmt.type, -+ .memory = V4L2_MEMORY_DMABUF -+ }; -+ -+ bq_free_all_free_src(mbc->src); -+ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) { -+ if (errno != EINTR) { -+ request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__); -+ return MEDIABUFS_ERROR_OPERATION_FAILED; -+ } -+ } -+ -+ if (n > req.count) { -+ request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n); -+ n = req.count; -+ } -+ -+ for (i = 0; i != n; ++i) { -+ struct qent_src *const be_src = qe_src_new(); -+ if (!be_src) { -+ request_err(mbc->dc, "Failed to create src be %d\n", i); -+ goto fail; -+ } -+ if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) { -+ qe_src_free(be_src); -+ goto fail; -+ } -+ be_src->base.index = i; -+ be_src->fixed_size = !mediabufs_src_resizable(mbc); -+ -+ queue_put_free(mbc->src, &be_src->base); -+ } -+ -+ return MEDIABUFS_STATUS_SUCCESS; -+ -+fail: -+ bq_free_all_free_src(mbc->src); -+ req.count = 0; -+ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 && -+ errno == EINTR) -+ /* Loop */; -+ -+ return MEDIABUFS_ERROR_OPERATION_FAILED; -+} -+ -+ -+ -+/* -+ * Set stuff order: -+ * Set src fmt -+ * Set parameters (sps) on vfd -+ * Negotiate dst format (dst_fmt_set) -+ * Create src buffers -+ * Alloc a dst buffer or Create dst slots -+*/ -+MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc) -+{ -+ if (mbc->stream_on) -+ return MEDIABUFS_STATUS_SUCCESS; -+ -+ if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) { -+ request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type); -+ return MEDIABUFS_ERROR_OPERATION_FAILED; -+ } -+ -+ if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) { -+ request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type); -+ set_stream(mbc->vfd, mbc->src_fmt.type, false); -+ return MEDIABUFS_ERROR_OPERATION_FAILED; -+ } -+ -+ mbc->stream_on = true; -+ return MEDIABUFS_STATUS_SUCCESS; -+} -+ -+MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc) -+{ -+ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; -+ -+ if (!mbc->stream_on) -+ return MEDIABUFS_STATUS_SUCCESS; -+ -+ if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) { -+ request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type); -+ status = MEDIABUFS_ERROR_OPERATION_FAILED; -+ } -+ -+ if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) { -+ request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type); -+ status = MEDIABUFS_ERROR_OPERATION_FAILED; -+ } -+ -+ mbc->stream_on = false; -+ return status; -+} -+ -+int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n) -+{ -+ struct v4l2_ext_controls controls = { -+ .controls = control_array, -+ .count = n -+ }; -+ -+ if (mreq) { -+ controls.which = V4L2_CTRL_WHICH_REQUEST_VAL; -+ controls.request_fd = media_request_fd(mreq); -+ } -+ -+ while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls)) -+ { -+ const int err = errno; -+ if (err != EINTR) { -+ request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err)); -+ return -err; -+ } -+ } -+ -+ return 0; -+} -+ -+MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, -+ struct media_request * const mreq, -+ unsigned int id, void *data, -+ unsigned int size) -+{ -+ struct v4l2_ext_control control = { -+ .id = id, -+ .ptr = data, -+ .size = size -+ }; -+ -+ int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1); -+ return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED; -+} -+ -+MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, -+ enum v4l2_buf_type buf_type, -+ const uint32_t pixfmt, -+ const uint32_t width, const uint32_t height, -+ const size_t bufsize) -+{ -+ MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize); -+ if (rv != MEDIABUFS_STATUS_SUCCESS) -+ request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height); -+ -+ return rv; -+} -+ -+int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n) -+{ -+ int rv = 0; -+ while (n--) { -+ while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) { -+ const int err = errno; -+ if (err != EINTR) { -+ // Often used for probing - errors are to be expected -+ request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err); -+ ctrls->type = 0; // 0 is invalid -+ rv = -err; -+ break; -+ } -+ } -+ ++ctrls; -+ } -+ return rv; -+} -+ -+int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc) -+{ -+ // Single planar OUTPUT can only take exact size buffers -+ // Multiplanar will take larger than negotiated -+ return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type); -+} -+ -+static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc) -+{ -+ if (!mbc) -+ return; -+ -+ // Break the weak link first -+ ff_weak_link_break(&mbc->this_wlm); -+ -+ polltask_delete(&mbc->pt); -+ -+ mediabufs_stream_off(mbc); -+ -+ // Empty v4l2 buffer stash -+ request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0); -+ request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0); -+ -+ bq_free_all_free_src(mbc->src); -+ bq_free_all_inuse_src(mbc->src); -+ bq_free_all_free_dst(mbc->dst); -+ -+ { -+ struct qent_dst *dst_be; -+ while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) { -+ dst_be->base.timestamp = (struct timeval){0}; -+ dst_be->base.status = QENT_ERROR; -+ qe_dst_done(dst_be); -+ } -+ } -+ -+ queue_delete(mbc->dst); -+ queue_delete(mbc->src); -+ close(mbc->vfd); -+ pthread_mutex_destroy(&mbc->lock); -+ -+ free(mbc); -+} -+ -+struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc) -+{ -+ atomic_fetch_add(&mbc->ref_count, 1); -+ return mbc; -+} -+ -+void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc) -+{ -+ struct mediabufs_ctl *const mbc = *pmbc; -+ int n; -+ -+ if (!mbc) -+ return; -+ *pmbc = NULL; -+ n = atomic_fetch_sub(&mbc->ref_count, 1); -+ if (n) -+ return; -+ mediabufs_ctl_delete(mbc); -+} -+ -+static int set_capabilities(struct mediabufs_ctl *const mbc) -+{ -+ struct v4l2_capability capability = { 0 }; -+ uint32_t caps; -+ -+ if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &capability)) { -+ int err = errno; -+ request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err)); -+ return -err; -+ } -+ -+ caps = (capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ? -+ capability.device_caps : -+ capability.capabilities; -+ -+ if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) { -+ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; -+ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; -+ } -+ else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) { -+ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -+ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ } -+ else { -+ request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+/* One of these per context */ -+struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq) -+{ -+ struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc)); -+ -+ if (!mbc) -+ return NULL; -+ -+ mbc->dc = dc; -+ // Default mono planar -+ mbc->pq = pq; -+ pthread_mutex_init(&mbc->lock, NULL); -+ -+ /* Pick a default - could we scan for this? */ -+ if (vpath == NULL) -+ vpath = "/dev/media0"; -+ -+ while ((mbc->vfd = open(vpath, O_RDWR)) == -1) -+ { -+ const int err = errno; -+ if (err != EINTR) { -+ request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err)); -+ goto fail0; -+ } -+ } -+ -+ if (set_capabilities(mbc)) { -+ request_err(dc, "Bad capabilities for video dev '%s'\n", vpath); -+ goto fail1; -+ } -+ -+ mbc->src = queue_new(mbc->vfd); -+ if (!mbc->src) -+ goto fail1; -+ mbc->dst = queue_new(mbc->vfd); -+ if (!mbc->dst) -+ goto fail2; -+ mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc); -+ if (!mbc->pt) -+ goto fail3; -+ mbc->this_wlm = ff_weak_link_new(mbc); -+ if (!mbc->this_wlm) -+ goto fail4; -+ -+ /* Cannot add polltask now - polling with nothing pending -+ * generates infinite error polls -+ */ -+ return mbc; -+ -+fail4: -+ polltask_delete(&mbc->pt); -+fail3: -+ queue_delete(mbc->dst); -+fail2: -+ queue_delete(mbc->src); -+fail1: -+ close(mbc->vfd); -+fail0: -+ free(mbc); -+ request_info(dc, "%s: FAILED\n", __func__); -+ return NULL; -+} -+ -+ -+ -diff --git a/libavcodec/v4l2_req_media.h b/libavcodec/v4l2_req_media.h -new file mode 100644 -index 000000000000..2f826cfb14e7 ---- /dev/null -+++ b/libavcodec/v4l2_req_media.h -@@ -0,0 +1,151 @@ -+/* -+e.h -+* -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the -+ * "Software"), to deal in the Software without restriction, including -+ * without limitation the rights to use, copy, modify, merge, publish, -+ * distribute, sub license, and/or sell copies of the Software, and to -+ * permit persons to whom the Software is furnished to do so, subject to -+ * the following conditions: -+ * -+ * The above copyright notice and this permission notice (including the -+ * next paragraph) shall be included in all copies or substantial portions -+ * of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. -+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR -+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef _MEDIA_H_ -+#define _MEDIA_H_ -+ -+#include -+#include -+ -+struct v4l2_format; -+struct v4l2_fmtdesc; -+struct v4l2_query_ext_ctrl; -+ -+struct pollqueue; -+struct media_request; -+struct media_pool; -+ -+typedef enum media_buf_status { -+ MEDIABUFS_STATUS_SUCCESS = 0, -+ MEDIABUFS_ERROR_OPERATION_FAILED, -+ MEDIABUFS_ERROR_DECODING_ERROR, -+ MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE, -+ MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT, -+ MEDIABUFS_ERROR_ALLOCATION_FAILED, -+} MediaBufsStatus; -+ -+struct media_pool * media_pool_new(const char * const media_path, -+ struct pollqueue * const pq, -+ const unsigned int n); -+void media_pool_delete(struct media_pool ** pmp); -+ -+// Obtain a media request -+// Will block if none availible - has a 2sec timeout -+struct media_request * media_request_get(struct media_pool * const mp); -+int media_request_fd(const struct media_request * const req); -+ -+// Start this request -+// Request structure is returned to pool once done -+int media_request_start(struct media_request * const req); -+ -+// Return an *unstarted* media_request to the pool -+// May later be upgraded to allow for aborting a started req -+int media_request_abort(struct media_request ** const preq); -+ -+ -+struct mediabufs_ctl; -+struct qent_src; -+struct qent_dst; -+struct dmabuf_h; -+struct dmabufs_ctl; -+ -+int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp); -+struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst); -+ -+// prealloc -+int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc); -+// dbsc may be NULL if realloc not required -+int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc); -+const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane); -+int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane); -+MediaBufsStatus qent_dst_wait(struct qent_dst *const be); -+void qent_dst_delete(struct qent_dst *const be); -+// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead -+void qent_dst_unref(struct qent_dst ** const pbe_dst); -+struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst); -+ -+const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no); -+MediaBufsStatus qent_dst_read_start(struct qent_dst *const be); -+MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be); -+/* Import an fd unattached to any mediabuf */ -+MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, -+ unsigned int plane, -+ int fd, size_t size); -+ -+MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, -+ struct media_request **const pmreq, -+ struct qent_src **const psrc_be, -+ struct qent_dst *const dst_be, -+ const bool is_final); -+// Get / alloc a dst buffer & associate with a slot -+// If the dst pool is empty then behaviour depends on the fixed flag passed to -+// dst_slots_create. Default is !fixed = unlimited alloc -+struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, -+ struct dmabufs_ctl *const dbsc); -+// Create dst slots without alloc -+// If fixed true then qent_alloc will only get slots from this pool and will -+// block until a qent has been unrefed -+MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed); -+ -+MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc); -+MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc); -+const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc); -+ -+typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc); -+ -+MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, -+ const unsigned int width, -+ const unsigned int height, -+ mediabufs_dst_fmt_accept_fn *const accept_fn, -+ void *const accept_v); -+struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc); -+void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src); -+ -+int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, -+ struct v4l2_ext_control control_array[], unsigned int n); -+MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, -+ struct media_request * const mreq, -+ unsigned int id, void *data, -+ unsigned int size); -+int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n); -+ -+int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc); -+ -+MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, -+ enum v4l2_buf_type buf_type, -+ const uint32_t pixfmt, -+ const uint32_t width, const uint32_t height, -+ const size_t bufsize); -+ -+MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw, -+ struct dmabufs_ctl * const dbsc, -+ unsigned int n); -+ -+struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, -+ const char *vpath, struct pollqueue *const pq); -+void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc); -+struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc); -+ -+ -+#endif -diff --git a/libavcodec/v4l2_req_pollqueue.c b/libavcodec/v4l2_req_pollqueue.c -new file mode 100644 -index 000000000000..cc8a5d400120 ---- /dev/null -+++ b/libavcodec/v4l2_req_pollqueue.c -@@ -0,0 +1,361 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "v4l2_req_pollqueue.h" -+#include "v4l2_req_utils.h" -+ -+ -+struct pollqueue; -+ -+enum polltask_state { -+ POLLTASK_UNQUEUED = 0, -+ POLLTASK_QUEUED, -+ POLLTASK_RUNNING, -+ POLLTASK_Q_KILL, -+ POLLTASK_RUN_KILL, -+}; -+ -+struct polltask { -+ struct polltask *next; -+ struct polltask *prev; -+ struct pollqueue *q; -+ enum polltask_state state; -+ -+ int fd; -+ short events; -+ -+ void (*fn)(void *v, short revents); -+ void * v; -+ -+ uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */ -+ sem_t kill_sem; -+}; -+ -+struct pollqueue { -+ atomic_int ref_count; -+ pthread_mutex_t lock; -+ -+ struct polltask *head; -+ struct polltask *tail; -+ -+ bool kill; -+ bool no_prod; -+ int prod_fd; -+ struct polltask *prod_pt; -+ pthread_t worker; -+}; -+ -+struct polltask *polltask_new(struct pollqueue *const pq, -+ const int fd, const short events, -+ void (*const fn)(void *v, short revents), -+ void *const v) -+{ -+ struct polltask *pt; -+ -+ if (!events) -+ return NULL; -+ -+ pt = malloc(sizeof(*pt)); -+ if (!pt) -+ return NULL; -+ -+ *pt = (struct polltask){ -+ .next = NULL, -+ .prev = NULL, -+ .q = pollqueue_ref(pq), -+ .fd = fd, -+ .events = events, -+ .fn = fn, -+ .v = v -+ }; -+ -+ sem_init(&pt->kill_sem, 0, 0); -+ -+ return pt; -+} -+ -+static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt) -+{ -+ if (pt->prev) -+ pt->prev->next = pt->next; -+ else -+ pq->head = pt->next; -+ if (pt->next) -+ pt->next->prev = pt->prev; -+ else -+ pq->tail = pt->prev; -+ pt->next = NULL; -+ pt->prev = NULL; -+} -+ -+static void polltask_free(struct polltask * const pt) -+{ -+ sem_destroy(&pt->kill_sem); -+ free(pt); -+} -+ -+static int pollqueue_prod(const struct pollqueue *const pq) -+{ -+ static const uint64_t one = 1; -+ return write(pq->prod_fd, &one, sizeof(one)); -+} -+ -+void polltask_delete(struct polltask **const ppt) -+{ -+ struct polltask *const pt = *ppt; -+ struct pollqueue * pq; -+ enum polltask_state state; -+ bool prodme; -+ -+ if (!pt) -+ return; -+ -+ pq = pt->q; -+ pthread_mutex_lock(&pq->lock); -+ state = pt->state; -+ pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL; -+ prodme = !pq->no_prod; -+ pthread_mutex_unlock(&pq->lock); -+ -+ if (state != POLLTASK_UNQUEUED) { -+ if (prodme) -+ pollqueue_prod(pq); -+ while (sem_wait(&pt->kill_sem) && errno == EINTR) -+ /* loop */; -+ } -+ -+ // Leave zapping the ref until we have DQed the PT as might well be -+ // legitimately used in it -+ *ppt = NULL; -+ polltask_free(pt); -+ pollqueue_unref(&pq); -+} -+ -+static uint64_t pollqueue_now(int timeout) -+{ -+ struct timespec now; -+ uint64_t now_ms; -+ -+ if (clock_gettime(CLOCK_MONOTONIC, &now)) -+ return 0; -+ now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout; -+ return now_ms ? now_ms : (uint64_t)1; -+} -+ -+void pollqueue_add_task(struct polltask *const pt, const int timeout) -+{ -+ bool prodme = false; -+ struct pollqueue * const pq = pt->q; -+ -+ pthread_mutex_lock(&pq->lock); -+ if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) { -+ if (pq->tail) -+ pq->tail->next = pt; -+ else -+ pq->head = pt; -+ pt->prev = pq->tail; -+ pt->next = NULL; -+ pt->state = POLLTASK_QUEUED; -+ pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout); -+ pq->tail = pt; -+ prodme = !pq->no_prod; -+ } -+ pthread_mutex_unlock(&pq->lock); -+ if (prodme) -+ pollqueue_prod(pq); -+} -+ -+static void *poll_thread(void *v) -+{ -+ struct pollqueue *const pq = v; -+ struct pollfd *a = NULL; -+ size_t asize = 0; -+ -+ pthread_mutex_lock(&pq->lock); -+ do { -+ unsigned int i; -+ unsigned int n = 0; -+ struct polltask *pt; -+ struct polltask *pt_next; -+ uint64_t now = pollqueue_now(0); -+ int timeout = -1; -+ int rv; -+ -+ for (pt = pq->head; pt; pt = pt_next) { -+ int64_t t; -+ -+ pt_next = pt->next; -+ -+ if (pt->state == POLLTASK_Q_KILL) { -+ pollqueue_rem_task(pq, pt); -+ sem_post(&pt->kill_sem); -+ continue; -+ } -+ -+ if (n >= asize) { -+ asize = asize ? asize * 2 : 4; -+ a = realloc(a, asize * sizeof(*a)); -+ if (!a) { -+ request_log("Failed to realloc poll array to %zd\n", asize); -+ goto fail_locked; -+ } -+ } -+ -+ a[n++] = (struct pollfd){ -+ .fd = pt->fd, -+ .events = pt->events -+ }; -+ -+ t = (int64_t)(pt->timeout - now); -+ if (pt->timeout && t < INT_MAX && -+ (timeout < 0 || (int)t < timeout)) -+ timeout = (t < 0) ? 0 : (int)t; -+ } -+ pthread_mutex_unlock(&pq->lock); -+ -+ if ((rv = poll(a, n, timeout)) == -1) { -+ if (errno != EINTR) { -+ request_log("Poll error: %s\n", strerror(errno)); -+ goto fail_unlocked; -+ } -+ } -+ -+ pthread_mutex_lock(&pq->lock); -+ now = pollqueue_now(0); -+ -+ /* Prodding in this loop is pointless and might lead to -+ * infinite looping -+ */ -+ pq->no_prod = true; -+ for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) { -+ pt_next = pt->next; -+ -+ /* Pending? */ -+ if (a[i].revents || -+ (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) { -+ pollqueue_rem_task(pq, pt); -+ if (pt->state == POLLTASK_QUEUED) -+ pt->state = POLLTASK_RUNNING; -+ if (pt->state == POLLTASK_Q_KILL) -+ pt->state = POLLTASK_RUN_KILL; -+ pthread_mutex_unlock(&pq->lock); -+ -+ /* This can add new entries to the Q but as -+ * those are added to the tail our existing -+ * chain remains intact -+ */ -+ pt->fn(pt->v, a[i].revents); -+ -+ pthread_mutex_lock(&pq->lock); -+ if (pt->state == POLLTASK_RUNNING) -+ pt->state = POLLTASK_UNQUEUED; -+ if (pt->state == POLLTASK_RUN_KILL) -+ sem_post(&pt->kill_sem); -+ } -+ } -+ pq->no_prod = false; -+ -+ } while (!pq->kill); -+ -+fail_locked: -+ pthread_mutex_unlock(&pq->lock); -+fail_unlocked: -+ free(a); -+ return NULL; -+} -+ -+static void prod_fn(void *v, short revents) -+{ -+ struct pollqueue *const pq = v; -+ char buf[8]; -+ if (revents) -+ read(pq->prod_fd, buf, 8); -+ if (!pq->kill) -+ pollqueue_add_task(pq->prod_pt, -1); -+} -+ -+struct pollqueue * pollqueue_new(void) -+{ -+ struct pollqueue *pq = malloc(sizeof(*pq)); -+ if (!pq) -+ return NULL; -+ *pq = (struct pollqueue){ -+ .ref_count = ATOMIC_VAR_INIT(0), -+ .lock = PTHREAD_MUTEX_INITIALIZER, -+ .head = NULL, -+ .tail = NULL, -+ .kill = false, -+ .prod_fd = -1 -+ }; -+ -+ pq->prod_fd = eventfd(0, EFD_NONBLOCK); -+ if (pq->prod_fd == 1) -+ goto fail1; -+ pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq); -+ if (!pq->prod_pt) -+ goto fail2; -+ pollqueue_add_task(pq->prod_pt, -1); -+ if (pthread_create(&pq->worker, NULL, poll_thread, pq)) -+ goto fail3; -+ // Reset ref count which will have been inced by the add_task -+ atomic_store(&pq->ref_count, 0); -+ return pq; -+ -+fail3: -+ polltask_free(pq->prod_pt); -+fail2: -+ close(pq->prod_fd); -+fail1: -+ free(pq); -+ return NULL; -+} -+ -+static void pollqueue_free(struct pollqueue *const pq) -+{ -+ void *rv; -+ -+ pthread_mutex_lock(&pq->lock); -+ pq->kill = true; -+ pollqueue_prod(pq); -+ pthread_mutex_unlock(&pq->lock); -+ -+ pthread_join(pq->worker, &rv); -+ polltask_free(pq->prod_pt); -+ pthread_mutex_destroy(&pq->lock); -+ close(pq->prod_fd); -+ free(pq); -+} -+ -+struct pollqueue * pollqueue_ref(struct pollqueue *const pq) -+{ -+ atomic_fetch_add(&pq->ref_count, 1); -+ return pq; -+} -+ -+void pollqueue_unref(struct pollqueue **const ppq) -+{ -+ struct pollqueue * const pq = *ppq; -+ -+ if (!pq) -+ return; -+ *ppq = NULL; -+ -+ if (atomic_fetch_sub(&pq->ref_count, 1) != 0) -+ return; -+ -+ pollqueue_free(pq); -+} -+ -+ -+ -diff --git a/libavcodec/v4l2_req_pollqueue.h b/libavcodec/v4l2_req_pollqueue.h -new file mode 100644 -index 000000000000..e1182cb2fc92 ---- /dev/null -+++ b/libavcodec/v4l2_req_pollqueue.h -@@ -0,0 +1,18 @@ -+#ifndef POLLQUEUE_H_ -+#define POLLQUEUE_H_ -+ -+struct polltask; -+struct pollqueue; -+ -+struct polltask *polltask_new(struct pollqueue *const pq, -+ const int fd, const short events, -+ void (*const fn)(void *v, short revents), -+ void *const v); -+void polltask_delete(struct polltask **const ppt); -+ -+void pollqueue_add_task(struct polltask *const pt, const int timeout); -+struct pollqueue * pollqueue_new(void); -+void pollqueue_unref(struct pollqueue **const ppq); -+struct pollqueue * pollqueue_ref(struct pollqueue *const pq); -+ -+#endif /* POLLQUEUE_H_ */ -diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h -new file mode 100644 -index 000000000000..a31cc1f4ec2a ---- /dev/null -+++ b/libavcodec/v4l2_req_utils.h -@@ -0,0 +1,27 @@ -+#ifndef AVCODEC_V4L2_REQ_UTILS_H -+#define AVCODEC_V4L2_REQ_UTILS_H -+ -+#include -+#include "libavutil/log.h" -+ -+#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__) -+ -+#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__) -+#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__) -+#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__) -+#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__) -+ -+static inline char safechar(char c) { -+ return c > 0x20 && c < 0x7f ? c : '.'; -+} -+ -+static inline const char * strfourcc(char tbuf[5], uint32_t fcc) { -+ tbuf[0] = safechar((fcc >> 0) & 0xff); -+ tbuf[1] = safechar((fcc >> 8) & 0xff); -+ tbuf[2] = safechar((fcc >> 16) & 0xff); -+ tbuf[3] = safechar((fcc >> 24) & 0xff); -+ tbuf[4] = '\0'; -+ return tbuf; -+} -+ -+#endif -diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -new file mode 100644 -index 000000000000..b0a5930844a8 ---- /dev/null -+++ b/libavcodec/v4l2_request_hevc.c -@@ -0,0 +1,297 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+ -+ -+#include "decode.h" -+#include "hevcdec.h" -+#include "hwconfig.h" -+#include "internal.h" -+ -+#include "v4l2_request_hevc.h" -+ -+#include "libavutil/hwcontext_drm.h" -+ -+#include "v4l2_req_devscan.h" -+#include "v4l2_req_dmabufs.h" -+#include "v4l2_req_pollqueue.h" -+#include "v4l2_req_media.h" -+#include "v4l2_req_utils.h" -+ -+static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8) -+{ -+ const size_t wxh = w * h; -+ size_t bits_alloc; -+ -+ /* Annex A gives a min compression of 2 @ lvl 3.1 -+ * (wxh <= 983040) and min 4 thereafter but avoid -+ * the odity of 983041 having a lower limit than -+ * 983040. -+ * Multiply by 3/2 for 4:2:0 -+ */ -+ bits_alloc = wxh < 983040 ? wxh * 3 / 4 : -+ wxh < 983040 * 2 ? 983040 * 3 / 4 : -+ wxh * 3 / 8; -+ /* Allow for bit depth */ -+ bits_alloc += (bits_alloc * bits_minus8) / 8; -+ /* Add a few bytes (16k) for overhead */ -+ bits_alloc += 0x4000; -+ return bits_alloc; -+} -+ -+static int v4l2_req_hevc_start_frame(AVCodecContext *avctx, -+ av_unused const uint8_t *buffer, -+ av_unused uint32_t size) -+{ -+ const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ return ctx->fns->start_frame(avctx, buffer, size); -+} -+ -+static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) -+{ -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ return ctx->fns->decode_slice(avctx, buffer, size); -+} -+ -+static int v4l2_req_hevc_end_frame(AVCodecContext *avctx) -+{ -+ V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; -+ return ctx->fns->end_frame(avctx); -+} -+ -+static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx) -+{ -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ ctx->fns->abort_frame(avctx); -+} -+ -+static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) -+{ -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ return ctx->fns->frame_params(avctx, hw_frames_ctx); -+} -+ -+static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame) -+{ -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ return ctx->fns->alloc_frame(avctx, frame); -+} -+ -+ -+static int v4l2_request_hevc_uninit(AVCodecContext *avctx) -+{ -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ -+ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); -+ -+ decode_q_wait(&ctx->decode_q, NULL); // Wait for all other threads to be out of decode -+ -+ mediabufs_ctl_unref(&ctx->mbufs); -+ media_pool_delete(&ctx->mpool); -+ pollqueue_unref(&ctx->pq); -+ dmabufs_ctl_delete(&ctx->dbufs); -+ devscan_delete(&ctx->devscan); -+ -+ decode_q_uninit(&ctx->decode_q); -+ -+// if (avctx->hw_frames_ctx) { -+// AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data; -+// av_buffer_pool_flush(hwfc->pool); -+// } -+ return 0; -+} -+ -+static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc) -+{ -+ AVCodecContext *const avctx = v; -+ const HEVCContext *const h = avctx->priv_data; -+ -+ if (h->ps.sps->bit_depth == 8) { -+ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 || -+ fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) { -+ return 1; -+ } -+ } -+ else if (h->ps.sps->bit_depth == 10) { -+ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { -+ return 1; -+ } -+ } -+ return 0; -+} -+ -+static int v4l2_request_hevc_init(AVCodecContext *avctx) -+{ -+ const HEVCContext *h = avctx->priv_data; -+ V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; -+ const HEVCSPS * const sps = h->ps.sps; -+ int ret; -+ const struct decdev * decdev; -+ const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 1).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes -+ size_t src_size; -+ -+ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); -+ -+ if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) { -+ av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n"); -+ return (AVERROR(-ret)); -+ } -+ ret = AVERROR(ENOMEM); // Assume mem fail by default for these -+ -+ if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL) -+ { -+ av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n"); -+ ret = AVERROR(ENODEV); -+ goto fail0; -+ } -+ av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n", -+ decdev_media_path(decdev), decdev_video_path(decdev)); -+ -+ if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "Unable to open dmabufs\n"); -+ goto fail0; -+ } -+ -+ if ((ctx->pq = pollqueue_new()) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n"); -+ goto fail1; -+ } -+ -+ if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n"); -+ goto fail2; -+ } -+ -+ if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n"); -+ goto fail3; -+ } -+ -+ // Ask for an initial bitbuf size of max size / 4 -+ // We will realloc if we need more -+ // Must use sps->h/w as avctx contains cropped size -+ src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8); -+ if (mediabufs_src_resizable(ctx->mbufs)) -+ src_size /= 4; -+ // Kludge for conformance tests which break Annex A limits -+ else if (src_size < 0x40000) -+ src_size = 0x40000; -+ -+ if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt, -+ sps->width, sps->height, src_size)) { -+ char tbuf1[5]; -+ av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); -+ goto fail4; -+ } -+ -+ if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) { -+ av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n"); -+ ctx->fns = &V2(ff_v4l2_req_hevc, 2); -+ } -+ else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) { -+ av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n"); -+ ctx->fns = &V2(ff_v4l2_req_hevc, 1); -+ } -+ else { -+ av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n"); -+ ret = AVERROR(EINVAL); -+ goto fail4; -+ } -+ -+ if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) { -+ char tbuf1[5]; -+ av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); -+ goto fail4; -+ } -+ -+ if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6)) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n"); -+ goto fail4; -+ } -+ -+ { -+ unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + -+ avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6); -+ av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots, -+ sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering, -+ avctx->thread_count, avctx->extra_hw_frames); -+ -+ // extra_hw_frames is -1 if unset -+ if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0))) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n"); -+ goto fail4; -+ } -+ } -+ -+ if (mediabufs_stream_on(ctx->mbufs)) { -+ av_log(avctx, AV_LOG_ERROR, "Failed stream on\n"); -+ goto fail4; -+ } -+ -+ if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n"); -+ goto fail4; -+ } -+ -+ if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed set controls\n"); -+ goto fail5; -+ } -+ -+ decode_q_init(&ctx->decode_q); -+ -+ // Set our s/w format -+ avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format; -+ -+ av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s\n", -+ ctx->fns->name, -+ decdev_media_path(decdev), decdev_video_path(decdev)); -+ -+ return 0; -+ -+fail5: -+ av_buffer_unref(&avctx->hw_frames_ctx); -+fail4: -+ mediabufs_ctl_unref(&ctx->mbufs); -+fail3: -+ media_pool_delete(&ctx->mpool); -+fail2: -+ pollqueue_unref(&ctx->pq); -+fail1: -+ dmabufs_ctl_delete(&ctx->dbufs); -+fail0: -+ devscan_delete(&ctx->devscan); -+ return ret; -+} -+ -+const AVHWAccel ff_hevc_v4l2request_hwaccel = { -+ .name = "hevc_v4l2request", -+ .type = AVMEDIA_TYPE_VIDEO, -+ .id = AV_CODEC_ID_HEVC, -+ .pix_fmt = AV_PIX_FMT_DRM_PRIME, -+ .alloc_frame = v4l2_req_hevc_alloc_frame, -+ .start_frame = v4l2_req_hevc_start_frame, -+ .decode_slice = v4l2_req_hevc_decode_slice, -+ .end_frame = v4l2_req_hevc_end_frame, -+ .abort_frame = v4l2_req_hevc_abort_frame, -+ .init = v4l2_request_hevc_init, -+ .uninit = v4l2_request_hevc_uninit, -+ .priv_data_size = sizeof(V4L2RequestContextHEVC), -+ .frame_params = v4l2_req_hevc_frame_params, -+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE, -+}; -diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h -new file mode 100644 -index 000000000000..f14f594564d7 ---- /dev/null -+++ b/libavcodec/v4l2_request_hevc.h -@@ -0,0 +1,102 @@ -+#ifndef AVCODEC_V4L2_REQUEST_HEVC_H -+#define AVCODEC_V4L2_REQUEST_HEVC_H -+ -+#include -+#include -+#include "v4l2_req_decode_q.h" -+ -+#ifndef DRM_FORMAT_NV15 -+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') -+#endif -+ -+#ifndef DRM_FORMAT_NV20 -+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') -+#endif -+ -+// P030 should be defined in drm_fourcc.h and hopefully will be sometime -+// in the future but until then... -+#ifndef DRM_FORMAT_P030 -+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') -+#endif -+ -+#ifndef DRM_FORMAT_NV15 -+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') -+#endif -+ -+#ifndef DRM_FORMAT_NV20 -+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') -+#endif -+ -+#include -+#ifndef V4L2_CID_CODEC_BASE -+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE -+#endif -+ -+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined -+// in drm_fourcc.h hopefully will be sometime in the future but until then... -+#ifndef V4L2_PIX_FMT_NV12_10_COL128 -+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') -+#endif -+ -+#ifndef V4L2_PIX_FMT_NV12_COL128 -+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ -+#endif -+ -+#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY -+#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 -+#endif -+ -+#define MAX_SLICES 128 -+ -+#define VCAT(name, version) name##_v##version -+#define V2(n,v) VCAT(n, v) -+#define V(n) V2(n, HEVC_CTRLS_VERSION) -+ -+#define S2(x) #x -+#define STR(x) S2(x) -+ -+// 1 per decoder -+struct v4l2_req_decode_fns; -+ -+typedef struct V4L2RequestContextHEVC { -+// V4L2RequestContext base; -+ const struct v4l2_req_decode_fns * fns; -+ -+ unsigned int timestamp; // ?? maybe uint64_t -+ -+ int multi_slice; -+ int decode_mode; -+ int start_code; -+ int max_slices; -+ -+ req_decode_q decode_q; -+ -+ struct devscan *devscan; -+ struct dmabufs_ctl *dbufs; -+ struct pollqueue *pq; -+ struct media_pool * mpool; -+ struct mediabufs_ctl *mbufs; -+} V4L2RequestContextHEVC; -+ -+typedef struct v4l2_req_decode_fns { -+ int src_pix_fmt_v4l2; -+ const char * name; -+ -+ // Init setup -+ int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); -+ int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); -+ -+ // Passthrough of hwaccel fns -+ int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); -+ int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size); -+ int (*end_frame)(AVCodecContext *avctx); -+ void (*abort_frame)(AVCodecContext *avctx); -+ int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx); -+ int (*alloc_frame)(AVCodecContext * avctx, AVFrame *frame); -+} v4l2_req_decode_fns; -+ -+ -+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); -+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); -+ -+#endif - -From 43c7c3d42888304f2f5ca39739bf88baa3c1861e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Apr 2021 19:30:36 +0100 -Subject: [PATCH 013/186] Add no_cvt_hw option to ffmpeg - ---- - fftools/ffmpeg.c | 6 ++++-- - fftools/ffmpeg.h | 2 ++ - fftools/ffmpeg_opt.c | 3 +++ - 3 files changed, 9 insertions(+), 2 deletions(-) - -diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c -index ca5431aeb401..719463016216 100644 ---- a/fftools/ffmpeg.c -+++ b/fftools/ffmpeg.c -@@ -2008,6 +2008,9 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame, int keep_ref - (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data)) - need_reinit = 1; - -+ if (no_cvt_hw && fg->graph) -+ need_reinit = 0; -+ - if (sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX)) { - if (!ifilter->displaymatrix || memcmp(sd->data, ifilter->displaymatrix, sizeof(int32_t) * 9)) - need_reinit = 1; -@@ -2277,8 +2280,7 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_ - decoded_frame->top_field_first = ist->top_field_first; - - ist->frames_decoded++; -- -- if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) { -+ if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) { - err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame); - if (err < 0) - goto fail; -diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h -index f1412f6446b8..8f478619b305 100644 ---- a/fftools/ffmpeg.h -+++ b/fftools/ffmpeg.h -@@ -729,6 +729,8 @@ extern enum VideoSyncMethod video_sync_method; - extern float frame_drop_threshold; - extern int do_benchmark; - extern int do_benchmark_all; -+extern int no_cvt_hw; -+extern int do_deinterlace; - extern int do_hex_dump; - extern int do_pkt_dump; - extern int copy_ts; -diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c -index 055275d81394..761db365883b 100644 ---- a/fftools/ffmpeg_opt.c -+++ b/fftools/ffmpeg_opt.c -@@ -71,6 +71,7 @@ enum VideoSyncMethod video_sync_method = VSYNC_AUTO; - float frame_drop_threshold = 0; - int do_benchmark = 0; - int do_benchmark_all = 0; -+int no_cvt_hw = 0; - int do_hex_dump = 0; - int do_pkt_dump = 0; - int copy_ts = 0; -@@ -1427,6 +1428,8 @@ const OptionDef options[] = { - "add timings for benchmarking" }, - { "benchmark_all", OPT_BOOL | OPT_EXPERT, { &do_benchmark_all }, - "add timings for each task" }, -+ { "no_cvt_hw", OPT_BOOL | OPT_EXPERT, { &no_cvt_hw }, -+ "do not auto-convert hw frames to sw" }, - { "progress", HAS_ARG | OPT_EXPERT, { .func_arg = opt_progress }, - "write program-readable progress information", "url" }, - { "stdin", OPT_BOOL | OPT_EXPERT, { &stdin_interaction }, - -From 0b46976bcb9c699235dd9a3ff6528c2dfcdba4ec Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 28 Apr 2021 10:16:39 +0100 -Subject: [PATCH 014/186] Add vout_drm - ---- - configure | 4 + - libavdevice/Makefile | 1 + - libavdevice/alldevices.c | 1 + - libavdevice/drm_vout.c | 638 +++++++++++++++++++++++++++++++++++++++ - 4 files changed, 644 insertions(+) - create mode 100644 libavdevice/drm_vout.c - -diff --git a/configure b/configure -index c09144673050..fb72aa89a60c 100755 ---- a/configure -+++ b/configure -@@ -346,6 +346,7 @@ External library support: - --enable-libnpp enable Nvidia Performance Primitives-based code [no] - --enable-mmal enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no] - --enable-sand enable sand video formats [rpi] -+ --enable-vout-drm enable the vout_drm module - for internal testing only [no] - --disable-nvdec disable Nvidia video decoding acceleration (via hwaccel) [autodetect] - --disable-nvenc disable Nvidia video encoding code [autodetect] - --enable-omx enable OpenMAX IL code [no] -@@ -1940,6 +1941,7 @@ FEATURE_LIST=" - small - static - swscale_alpha -+ vout_drm - " - - # this list should be kept in linking order -@@ -3559,8 +3561,10 @@ sndio_indev_deps="sndio" - sndio_outdev_deps="sndio" - v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h" - v4l2_indev_suggest="libv4l2" -+v4l2_outdev_deps="libdrm" - v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h" - v4l2_outdev_suggest="libv4l2" -+vout_drm_outdev_deps="libdrm vout_drm" - vfwcap_indev_deps="vfw32 vfwcap_defines" - xcbgrab_indev_deps="libxcb" - xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes" -diff --git a/libavdevice/Makefile b/libavdevice/Makefile -index 8a62822b69ec..36aac301861a 100644 ---- a/libavdevice/Makefile -+++ b/libavdevice/Makefile -@@ -48,6 +48,7 @@ OBJS-$(CONFIG_SNDIO_OUTDEV) += sndio_enc.o sndio.o - OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o v4l2-common.o timefilter.o - OBJS-$(CONFIG_V4L2_OUTDEV) += v4l2enc.o v4l2-common.o - OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o -+OBJS-$(CONFIG_VOUT_DRM_OUTDEV) += drm_vout.o - OBJS-$(CONFIG_XCBGRAB_INDEV) += xcbgrab.o - OBJS-$(CONFIG_XV_OUTDEV) += xv.o - -diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c -index 8a90fcb5d782..e2a8669f2712 100644 ---- a/libavdevice/alldevices.c -+++ b/libavdevice/alldevices.c -@@ -52,6 +52,7 @@ extern const FFOutputFormat ff_sndio_muxer; - extern const AVInputFormat ff_v4l2_demuxer; - extern const FFOutputFormat ff_v4l2_muxer; - extern const AVInputFormat ff_vfwcap_demuxer; -+extern const FFOutputFormat ff_vout_drm_muxer; - extern const AVInputFormat ff_xcbgrab_demuxer; - extern const FFOutputFormat ff_xv_muxer; - -diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c -new file mode 100644 -index 000000000000..cfb33ce7c319 ---- /dev/null -+++ b/libavdevice/drm_vout.c -@@ -0,0 +1,638 @@ -+/* -+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+ -+// *** This module is a work in progress and its utility is strictly -+// limited to testing. -+ -+#include "libavutil/opt.h" -+#include "libavutil/pixdesc.h" -+#include "libavutil/hwcontext_drm.h" -+#include "libavformat/mux.h" -+#include "avdevice.h" -+ -+#include "pthread.h" -+#include -+#include -+ -+#include -+#include -+ -+#define TRACE_ALL 0 -+ -+#define DRM_MODULE "vc4" -+ -+#define ERRSTR strerror(errno) -+ -+struct drm_setup { -+ int conId; -+ uint32_t crtcId; -+ int crtcIdx; -+ uint32_t planeId; -+ unsigned int out_fourcc; -+ struct { -+ int x, y, width, height; -+ } compose; -+}; -+ -+typedef struct drm_aux_s { -+ unsigned int fb_handle; -+ uint32_t bo_handles[AV_DRM_MAX_PLANES]; -+ AVFrame * frame; -+} drm_aux_t; -+ -+// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS -+// we get initial flicker probably due to dodgy drm timing -+#define AUX_SIZE 3 -+typedef struct drm_display_env_s -+{ -+ AVClass *class; -+ -+ int drm_fd; -+ uint32_t con_id; -+ struct drm_setup setup; -+ enum AVPixelFormat avfmt; -+ int show_all; -+ -+ unsigned int ano; -+ drm_aux_t aux[AUX_SIZE]; -+ -+ pthread_t q_thread; -+ sem_t q_sem_in; -+ sem_t q_sem_out; -+ int q_terminate; -+ AVFrame * q_next; -+ -+} drm_display_env_t; -+ -+ -+static int drm_vout_write_trailer(AVFormatContext *s) -+{ -+#if TRACE_ALL -+ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); -+#endif -+ -+ return 0; -+} -+ -+static int drm_vout_write_header(AVFormatContext *s) -+{ -+ const AVCodecParameters * const par = s->streams[0]->codecpar; -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); -+#endif -+ if ( s->nb_streams > 1 -+ || par->codec_type != AVMEDIA_TYPE_VIDEO -+ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { -+ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); -+ return AVERROR(EINVAL); -+ } -+ -+ return 0; -+} -+ -+static int find_plane(struct AVFormatContext * const avctx, -+ const int drmfd, const int crtcidx, const uint32_t format, -+ uint32_t * const pplane_id) -+{ -+ drmModePlaneResPtr planes; -+ drmModePlanePtr plane; -+ unsigned int i; -+ unsigned int j; -+ int ret = 0; -+ -+ planes = drmModeGetPlaneResources(drmfd); -+ if (!planes) -+ { -+ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR); -+ return -1; -+ } -+ -+ for (i = 0; i < planes->count_planes; ++i) { -+ plane = drmModeGetPlane(drmfd, planes->planes[i]); -+ if (!planes) -+ { -+ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR); -+ break; -+ } -+ -+ if (!(plane->possible_crtcs & (1 << crtcidx))) { -+ drmModeFreePlane(plane); -+ continue; -+ } -+ -+ for (j = 0; j < plane->count_formats; ++j) { -+ if (plane->formats[j] == format) -+ break; -+ } -+ -+ if (j == plane->count_formats) { -+ drmModeFreePlane(plane); -+ continue; -+ } -+ -+ *pplane_id = plane->plane_id; -+ drmModeFreePlane(plane); -+ break; -+ } -+ -+ if (i == planes->count_planes) -+ ret = -1; -+ -+ drmModeFreePlaneResources(planes); -+ return ret; -+} -+ -+static void da_uninit(drm_display_env_t * const de, drm_aux_t * da) -+{ -+ if (da->fb_handle != 0) { -+ drmModeRmFB(de->drm_fd, da->fb_handle); -+ da->fb_handle = 0; -+ } -+ -+ for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) { -+ if (da->bo_handles[i]) { -+ struct drm_gem_close gem_close = {.handle = da->bo_handles[i]}; -+ drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close); -+ da->bo_handles[i] = 0; -+ } -+ } -+ av_frame_free(&da->frame); -+} -+ -+static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame) -+{ -+ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0]; -+ drm_aux_t * da = de->aux + de->ano; -+ const uint32_t format = desc->layers[0].format; -+ int ret = 0; -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd); -+#endif -+ -+ if (de->setup.out_fourcc != format) { -+ if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) { -+ av_frame_free(&frame); -+ av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format); -+ return -1; -+ } -+ de->setup.out_fourcc = format; -+ } -+ -+ { -+ drmVBlank vbl = { -+ .request = { -+ .type = DRM_VBLANK_RELATIVE, -+ .sequence = 0 -+ } -+ }; -+ -+ while (drmWaitVBlank(de->drm_fd, &vbl)) { -+ if (errno != EINTR) { -+// av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR); -+ break; -+ } -+ } -+ } -+ -+ da_uninit(de, da); -+ -+ { -+ uint32_t pitches[4] = {0}; -+ uint32_t offsets[4] = {0}; -+ uint64_t modifiers[4] = {0}; -+ uint32_t bo_handles[4] = {0}; -+ int i, j, n; -+ -+ da->frame = frame; -+ -+ for (i = 0; i < desc->nb_objects; ++i) { -+ if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) { -+ av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR); -+ return -1; -+ } -+ } -+ -+ n = 0; -+ for (i = 0; i < desc->nb_layers; ++i) { -+ for (j = 0; j < desc->layers[i].nb_planes; ++j) { -+ const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j; -+ const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index; -+ pitches[n] = p->pitch; -+ offsets[n] = p->offset; -+ modifiers[n] = obj->format_modifier; -+ bo_handles[n] = da->bo_handles[p->object_index]; -+ ++n; -+ } -+ } -+ -+#if 1 && TRACE_ALL -+ av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d," -+ " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n", -+ av_frame_cropped_width(frame), -+ av_frame_cropped_height(frame), -+ desc->layers[0].format, -+ bo_handles[0], -+ bo_handles[1], -+ bo_handles[2], -+ bo_handles[3], -+ pitches[0], -+ pitches[1], -+ pitches[2], -+ pitches[3], -+ offsets[0], -+ offsets[1], -+ offsets[2], -+ offsets[3], -+ (long long)modifiers[0], -+ (long long)modifiers[1], -+ (long long)modifiers[2], -+ (long long)modifiers[3] -+ ); -+#endif -+ -+ if (drmModeAddFB2WithModifiers(de->drm_fd, -+ av_frame_cropped_width(frame), -+ av_frame_cropped_height(frame), -+ desc->layers[0].format, bo_handles, -+ pitches, offsets, modifiers, -+ &da->fb_handle, DRM_MODE_FB_MODIFIERS /** 0 if no mods */) != 0) { -+ av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR); -+ return -1; -+ } -+ } -+ -+ ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId, -+ da->fb_handle, 0, -+ de->setup.compose.x, de->setup.compose.y, -+ de->setup.compose.width, -+ de->setup.compose.height, -+ 0, 0, -+ av_frame_cropped_width(frame) << 16, -+ av_frame_cropped_height(frame) << 16); -+ -+ if (ret != 0) { -+ av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR); -+ } -+ -+ de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1; -+ -+ return ret; -+} -+ -+static int do_sem_wait(sem_t * const sem, const int nowait) -+{ -+ while (nowait ? sem_trywait(sem) : sem_wait(sem)) { -+ if (errno != EINTR) -+ return -errno; -+ } -+ return 0; -+} -+ -+static void * display_thread(void * v) -+{ -+ AVFormatContext * const s = v; -+ drm_display_env_t * const de = s->priv_data; -+ int i; -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); -+#endif -+ -+ sem_post(&de->q_sem_out); -+ -+ for (;;) { -+ AVFrame * frame; -+ -+ do_sem_wait(&de->q_sem_in, 0); -+ -+ if (de->q_terminate) -+ break; -+ -+ frame = de->q_next; -+ de->q_next = NULL; -+ sem_post(&de->q_sem_out); -+ -+ do_display(s, de, frame); -+ } -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); -+#endif -+ -+ for (i = 0; i != AUX_SIZE; ++i) -+ da_uninit(de, de->aux + i); -+ -+ av_frame_free(&de->q_next); -+ -+ return NULL; -+} -+ -+static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt) -+{ -+ const AVFrame * const src_frame = (AVFrame *)pkt->data; -+ AVFrame * frame; -+ drm_display_env_t * const de = s->priv_data; -+ int ret; -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); -+#endif -+ -+ if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) { -+ av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts); -+ return 0; -+ } -+ -+ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { -+ frame = av_frame_alloc(); -+ av_frame_ref(frame, src_frame); -+ } -+ else if (src_frame->format == AV_PIX_FMT_VAAPI) { -+ frame = av_frame_alloc(); -+ frame->format = AV_PIX_FMT_DRM_PRIME; -+ if (av_hwframe_map(frame, src_frame, 0) != 0) -+ { -+ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); -+ av_frame_free(&frame); -+ return AVERROR(EINVAL); -+ } -+ } -+ else { -+ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); -+ return AVERROR(EINVAL); -+ } -+ -+ ret = do_sem_wait(&de->q_sem_out, !de->show_all); -+ if (ret) { -+ av_frame_free(&frame); -+ } -+ else { -+ de->q_next = frame; -+ sem_post(&de->q_sem_in); -+ } -+ -+ return 0; -+} -+ -+static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, -+ unsigned flags) -+{ -+ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); -+ return AVERROR_PATCHWELCOME; -+} -+ -+static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) -+{ -+#if TRACE_ALL -+ av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type); -+#endif -+ switch(type) { -+ case AV_APP_TO_DEV_WINDOW_REPAINT: -+ return 0; -+ default: -+ break; -+ } -+ return AVERROR(ENOSYS); -+} -+ -+static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId) -+{ -+ int ret = -1; -+ int i; -+ drmModeRes *res = drmModeGetResources(drmfd); -+ drmModeConnector *c; -+ -+ if(!res) -+ { -+ printf( "drmModeGetResources failed: %s\n", ERRSTR); -+ return -1; -+ } -+ -+ if (res->count_crtcs <= 0) -+ { -+ printf( "drm: no crts\n"); -+ goto fail_res; -+ } -+ -+ if (!s->conId) { -+ fprintf(stderr, -+ "No connector ID specified. Choosing default from list:\n"); -+ -+ for (i = 0; i < res->count_connectors; i++) { -+ drmModeConnector *con = -+ drmModeGetConnector(drmfd, res->connectors[i]); -+ drmModeEncoder *enc = NULL; -+ drmModeCrtc *crtc = NULL; -+ -+ if (con->encoder_id) { -+ enc = drmModeGetEncoder(drmfd, con->encoder_id); -+ if (enc->crtc_id) { -+ crtc = drmModeGetCrtc(drmfd, enc->crtc_id); -+ } -+ } -+ -+ if (!s->conId && crtc) { -+ s->conId = con->connector_id; -+ s->crtcId = crtc->crtc_id; -+ } -+ -+ av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n", -+ con->connector_id, -+ crtc ? crtc->crtc_id : 0, -+ con->connector_type, -+ crtc ? crtc->width : 0, -+ crtc ? crtc->height : 0, -+ (s->conId == (int)con->connector_id ? -+ " (chosen)" : "")); -+ } -+ -+ if (!s->conId) { -+ av_log(avctx, AV_LOG_ERROR, -+ "No suitable enabled connector found.\n"); -+ return -1;; -+ } -+ } -+ -+ s->crtcIdx = -1; -+ -+ for (i = 0; i < res->count_crtcs; ++i) { -+ if (s->crtcId == res->crtcs[i]) { -+ s->crtcIdx = i; -+ break; -+ } -+ } -+ -+ if (s->crtcIdx == -1) -+ { -+ av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId); -+ goto fail_res; -+ } -+ -+ if (res->count_connectors <= 0) -+ { -+ av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n"); -+ goto fail_res; -+ } -+ -+ c = drmModeGetConnector(drmfd, s->conId); -+ if (!c) -+ { -+ av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR); -+ goto fail_res; -+ } -+ -+ if (!c->count_modes) -+ { -+ av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n"); -+ goto fail_conn; -+ } -+ -+ { -+ drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId); -+ s->compose.x = crtc->x; -+ s->compose.y = crtc->y; -+ s->compose.width = crtc->width; -+ s->compose.height = crtc->height; -+ drmModeFreeCrtc(crtc); -+ } -+ -+ if (pConId) -+ *pConId = c->connector_id; -+ ret = 0; -+ -+fail_conn: -+ drmModeFreeConnector(c); -+ -+fail_res: -+ drmModeFreeResources(res); -+ -+ return ret; -+} -+ -+// deinit is called if init fails so no need to clean up explicity here -+static int drm_vout_init(struct AVFormatContext * s) -+{ -+ drm_display_env_t * const de = s->priv_data; -+ int rv; -+ const char * drm_module = DRM_MODULE; -+ -+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); -+ -+ de->drm_fd = -1; -+ de->con_id = 0; -+ de->setup = (struct drm_setup){0}; -+ de->q_terminate = 0; -+ -+ if ((de->drm_fd = drmOpen(drm_module, NULL)) < 0) -+ { -+ rv = AVERROR(errno); -+ av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", drm_module, av_err2str(rv)); -+ return rv; -+ } -+ -+ if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0) -+ { -+ av_log(s, AV_LOG_ERROR, "failed to find valid mode\n"); -+ rv = AVERROR(EINVAL); -+ goto fail_close; -+ } -+ -+ sem_init(&de->q_sem_in, 0, 0); -+ sem_init(&de->q_sem_out, 0, 0); -+ if (pthread_create(&de->q_thread, NULL, display_thread, s)) { -+ rv = AVERROR(errno); -+ av_log(s, AV_LOG_ERROR, "Failed to creatye display thread: %s\n", av_err2str(rv)); -+ goto fail_close; -+ } -+ -+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); -+ -+ return 0; -+ -+fail_close: -+ close(de->drm_fd); -+ de->drm_fd = -1; -+ av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__); -+ -+ return rv; -+} -+ -+static void drm_vout_deinit(struct AVFormatContext * s) -+{ -+ drm_display_env_t * const de = s->priv_data; -+ -+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); -+ -+ de->q_terminate = 1; -+ sem_post(&de->q_sem_in); -+ pthread_join(de->q_thread, NULL); -+ sem_destroy(&de->q_sem_in); -+ sem_destroy(&de->q_sem_out); -+ -+ for (unsigned int i = 0; i != AUX_SIZE; ++i) -+ da_uninit(de, de->aux + i); -+ -+ av_frame_free(&de->q_next); -+ -+ if (de->drm_fd >= 0) { -+ close(de->drm_fd); -+ de->drm_fd = -1; -+ } -+ -+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); -+} -+ -+ -+#define OFFSET(x) offsetof(drm_display_env_t, x) -+static const AVOption options[] = { -+ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, -+ { NULL } -+}; -+ -+static const AVClass drm_vout_class = { -+ .class_name = "drm vid outdev", -+ .item_name = av_default_item_name, -+ .option = options, -+ .version = LIBAVUTIL_VERSION_INT, -+ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, -+}; -+ -+FFOutputFormat ff_vout_drm_muxer = { -+ .p = { -+ .name = "vout_drm", -+ .long_name = NULL_IF_CONFIG_SMALL("Drm video output device"), -+ .audio_codec = AV_CODEC_ID_NONE, -+ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, -+ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, -+ .priv_class = &drm_vout_class, -+ }, -+ .priv_data_size = sizeof(drm_display_env_t), -+ .write_header = drm_vout_write_header, -+ .write_packet = drm_vout_write_packet, -+ .write_uncoded_frame = drm_vout_write_frame, -+ .write_trailer = drm_vout_write_trailer, -+ .control_message = drm_vout_control_message, -+ .init = drm_vout_init, -+ .deinit = drm_vout_deinit, -+}; -+ - -From bfdfdd9ad94d8a9773c9069b930725ad44e9ddb3 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 28 Apr 2021 11:34:18 +0100 -Subject: [PATCH 015/186] Add vout_egl - ---- - configure | 6 + - libavdevice/Makefile | 1 + - libavdevice/alldevices.c | 1 + - libavdevice/egl_vout.c | 811 +++++++++++++++++++++++++++++++++++++++ - 4 files changed, 819 insertions(+) - create mode 100644 libavdevice/egl_vout.c - -diff --git a/configure b/configure -index fb72aa89a60c..a4ffd8797690 100755 ---- a/configure -+++ b/configure -@@ -347,6 +347,7 @@ External library support: - --enable-mmal enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no] - --enable-sand enable sand video formats [rpi] - --enable-vout-drm enable the vout_drm module - for internal testing only [no] -+ --enable-vout-egl enable the vout_egl module - for internal testing only [no] - --disable-nvdec disable Nvidia video decoding acceleration (via hwaccel) [autodetect] - --disable-nvenc disable Nvidia video encoding code [autodetect] - --enable-omx enable OpenMAX IL code [no] -@@ -1818,6 +1819,7 @@ EXTERNAL_LIBRARY_LIST=" - libdav1d - libdc1394 - libdrm -+ epoxy - libflite - libfontconfig - libfreetype -@@ -1942,6 +1944,7 @@ FEATURE_LIST=" - static - swscale_alpha - vout_drm -+ vout_egl - " - - # this list should be kept in linking order -@@ -3565,6 +3568,8 @@ v4l2_outdev_deps="libdrm" - v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h" - v4l2_outdev_suggest="libv4l2" - vout_drm_outdev_deps="libdrm vout_drm" -+vout_egl_outdev_deps="xlib" -+vout_egl_outdev_select="epoxy" - vfwcap_indev_deps="vfw32 vfwcap_defines" - xcbgrab_indev_deps="libxcb" - xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes" -@@ -6596,6 +6601,7 @@ enabled libdav1d && require_pkg_config libdav1d "dav1d >= 0.5.0" "dav1d - enabled libdavs2 && require_pkg_config libdavs2 "davs2 >= 1.6.0" davs2.h davs2_decoder_open - enabled libdc1394 && require_pkg_config libdc1394 libdc1394-2 dc1394/dc1394.h dc1394_new - enabled libdrm && require_pkg_config libdrm libdrm xf86drm.h drmGetVersion -+enabled epoxy && require_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version - enabled libfdk_aac && { check_pkg_config libfdk_aac fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen || - { require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac && - warn "using libfdk without pkg-config"; } } -diff --git a/libavdevice/Makefile b/libavdevice/Makefile -index 36aac301861a..0989cb895f9e 100644 ---- a/libavdevice/Makefile -+++ b/libavdevice/Makefile -@@ -49,6 +49,7 @@ OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o v4l2-common.o timefilter.o - OBJS-$(CONFIG_V4L2_OUTDEV) += v4l2enc.o v4l2-common.o - OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o - OBJS-$(CONFIG_VOUT_DRM_OUTDEV) += drm_vout.o -+OBJS-$(CONFIG_VOUT_EGL_OUTDEV) += egl_vout.o - OBJS-$(CONFIG_XCBGRAB_INDEV) += xcbgrab.o - OBJS-$(CONFIG_XV_OUTDEV) += xv.o - -diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c -index e2a8669f2712..ffb410b92da8 100644 ---- a/libavdevice/alldevices.c -+++ b/libavdevice/alldevices.c -@@ -53,6 +53,7 @@ extern const AVInputFormat ff_v4l2_demuxer; - extern const FFOutputFormat ff_v4l2_muxer; - extern const AVInputFormat ff_vfwcap_demuxer; - extern const FFOutputFormat ff_vout_drm_muxer; -+extern const FFOutputFormat ff_vout_egl_muxer; - extern const AVInputFormat ff_xcbgrab_demuxer; - extern const FFOutputFormat ff_xv_muxer; - -diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c -new file mode 100644 -index 000000000000..7b9c610ace28 ---- /dev/null -+++ b/libavdevice/egl_vout.c -@@ -0,0 +1,811 @@ -+/* -+ * Copyright (c) 2020 John Cox for Raspberry Pi Trading -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+ -+// *** This module is a work in progress and its utility is strictly -+// limited to testing. -+// Amongst other issues it doesn't wait for the pic to be displayed before -+// returning the buffer so flikering does occur. -+ -+#include -+#include -+ -+#include "libavutil/opt.h" -+#include "libavutil/avassert.h" -+#include "libavutil/pixdesc.h" -+#include "libavutil/imgutils.h" -+#include "libavutil/hwcontext_drm.h" -+#include "libavformat/mux.h" -+#include "avdevice.h" -+ -+#include "pthread.h" -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include "libavutil/rpi_sand_fns.h" -+ -+#define TRACE_ALL 0 -+ -+struct egl_setup { -+ int conId; -+ -+ Display *dpy; -+ EGLDisplay egl_dpy; -+ EGLContext ctx; -+ EGLSurface surf; -+ Window win; -+ -+ uint32_t crtcId; -+ int crtcIdx; -+ uint32_t planeId; -+ struct { -+ int x, y, width, height; -+ } compose; -+}; -+ -+typedef struct egl_aux_s { -+ int fd; -+ GLuint texture; -+ -+} egl_aux_t; -+ -+typedef struct egl_display_env_s -+{ -+ AVClass *class; -+ -+ struct egl_setup setup; -+ enum AVPixelFormat avfmt; -+ -+ int show_all; -+ int window_width, window_height; -+ int window_x, window_y; -+ int fullscreen; -+ -+ egl_aux_t aux[32]; -+ -+ pthread_t q_thread; -+ pthread_mutex_t q_lock; -+ sem_t display_start_sem; -+ sem_t q_sem; -+ int q_terminate; -+ AVFrame * q_this; -+ AVFrame * q_next; -+ -+} egl_display_env_t; -+ -+ -+/** -+ * Remove window border/decorations. -+ */ -+static void -+no_border( Display *dpy, Window w) -+{ -+ static const unsigned MWM_HINTS_DECORATIONS = (1 << 1); -+ static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5; -+ -+ typedef struct -+ { -+ unsigned long flags; -+ unsigned long functions; -+ unsigned long decorations; -+ long inputMode; -+ unsigned long status; -+ } PropMotifWmHints; -+ -+ PropMotifWmHints motif_hints; -+ Atom prop, proptype; -+ unsigned long flags = 0; -+ -+ /* setup the property */ -+ motif_hints.flags = MWM_HINTS_DECORATIONS; -+ motif_hints.decorations = flags; -+ -+ /* get the atom for the property */ -+ prop = XInternAtom( dpy, "_MOTIF_WM_HINTS", True ); -+ if (!prop) { -+ /* something went wrong! */ -+ return; -+ } -+ -+ /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */ -+ proptype = prop; -+ -+ XChangeProperty( dpy, w, /* display, window */ -+ prop, proptype, /* property, type */ -+ 32, /* format: 32-bit datums */ -+ PropModeReplace, /* mode */ -+ (unsigned char *) &motif_hints, /* data */ -+ PROP_MOTIF_WM_HINTS_ELEMENTS /* nelements */ -+ ); -+} -+ -+ -+/* -+ * Create an RGB, double-buffered window. -+ * Return the window and context handles. -+ */ -+static int -+make_window(struct AVFormatContext * const s, -+ egl_display_env_t * const de, -+ Display *dpy, EGLDisplay egl_dpy, const char *name, -+ Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet) -+{ -+ int scrnum = DefaultScreen( dpy ); -+ XSetWindowAttributes attr; -+ unsigned long mask; -+ Window root = RootWindow( dpy, scrnum ); -+ Window win; -+ EGLContext ctx; -+ const int fullscreen = de->fullscreen; -+ EGLConfig config; -+ int x = de->window_x; -+ int y = de->window_y; -+ int width = de->window_width ? de->window_width : 1280; -+ int height = de->window_height ? de->window_height : 720; -+ -+ -+ if (fullscreen) { -+ int scrnum = DefaultScreen(dpy); -+ -+ x = 0; y = 0; -+ width = DisplayWidth(dpy, scrnum); -+ height = DisplayHeight(dpy, scrnum); -+ } -+ -+ { -+ EGLint num_configs; -+ static const EGLint attribs[] = { -+ EGL_RED_SIZE, 1, -+ EGL_GREEN_SIZE, 1, -+ EGL_BLUE_SIZE, 1, -+ EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, -+ EGL_NONE -+ }; -+ -+ if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { -+ av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); -+ return -1; -+ } -+ } -+ -+ { -+ EGLint vid; -+ if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { -+ av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); -+ return -1; -+ } -+ -+ { -+ XVisualInfo visTemplate = { -+ .visualid = vid, -+ }; -+ int num_visuals; -+ XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, -+ &visTemplate, &num_visuals); -+ -+ /* window attributes */ -+ attr.background_pixel = 0; -+ attr.border_pixel = 0; -+ attr.colormap = XCreateColormap( dpy, root, visinfo->visual, AllocNone); -+ attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; -+ /* XXX this is a bad way to get a borderless window! */ -+ mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; -+ -+ win = XCreateWindow( dpy, root, x, y, width, height, -+ 0, visinfo->depth, InputOutput, -+ visinfo->visual, mask, &attr ); -+ XFree(visinfo); -+ } -+ } -+ -+ if (fullscreen) -+ no_border(dpy, win); -+ -+ /* set hints and properties */ -+ { -+ XSizeHints sizehints; -+ sizehints.x = x; -+ sizehints.y = y; -+ sizehints.width = width; -+ sizehints.height = height; -+ sizehints.flags = USSize | USPosition; -+ XSetNormalHints(dpy, win, &sizehints); -+ XSetStandardProperties(dpy, win, name, name, -+ None, (char **)NULL, 0, &sizehints); -+ } -+ -+ eglBindAPI(EGL_OPENGL_ES_API); -+ -+ { -+ static const EGLint ctx_attribs[] = { -+ EGL_CONTEXT_CLIENT_VERSION, 2, -+ EGL_NONE -+ }; -+ ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs ); -+ if (!ctx) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); -+ return -1; -+ } -+ } -+ -+ -+ XMapWindow(dpy, win); -+ -+ { -+ EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL); -+ if (!surf) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); -+ return -1; -+ } -+ -+ if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); -+ return -1; -+ } -+ -+ *winRet = win; -+ *ctxRet = ctx; -+ *surfRet = surf; -+ } -+ -+ return 0; -+} -+ -+static GLint -+compile_shader(struct AVFormatContext * const avctx, GLenum target, const char *source) -+{ -+ GLuint s = glCreateShader(target); -+ -+ if (s == 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n"); -+ return 0; -+ } -+ -+ glShaderSource(s, 1, (const GLchar **) &source, NULL); -+ glCompileShader(s); -+ -+ { -+ GLint ok; -+ glGetShaderiv(s, GL_COMPILE_STATUS, &ok); -+ -+ if (!ok) { -+ GLchar *info; -+ GLint size; -+ -+ glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); -+ info = malloc(size); -+ -+ glGetShaderInfoLog(s, size, NULL, info); -+ av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); -+ -+ return 0; -+ } -+ } -+ -+ return s; -+} -+ -+static GLuint link_program(struct AVFormatContext * const s, GLint vs, GLint fs) -+{ -+ GLuint prog = glCreateProgram(); -+ -+ if (prog == 0) { -+ av_log(s, AV_LOG_ERROR, "Failed to create program\n"); -+ return 0; -+ } -+ -+ glAttachShader(prog, vs); -+ glAttachShader(prog, fs); -+ glLinkProgram(prog); -+ -+ { -+ GLint ok; -+ glGetProgramiv(prog, GL_LINK_STATUS, &ok); -+ if (!ok) { -+ /* Some drivers return a size of 1 for an empty log. This is the size -+ * of a log that contains only a terminating NUL character. -+ */ -+ GLint size; -+ GLchar *info = NULL; -+ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); -+ if (size > 1) { -+ info = malloc(size); -+ glGetProgramInfoLog(prog, size, NULL, info); -+ } -+ -+ av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", -+ (info != NULL) ? info : ""); -+ return 0; -+ } -+ } -+ -+ return prog; -+} -+ -+static int -+gl_setup(struct AVFormatContext * const s) -+{ -+ const char *vs = -+ "attribute vec4 pos;\n" -+ "varying vec2 texcoord;\n" -+ "\n" -+ "void main() {\n" -+ " gl_Position = pos;\n" -+ " texcoord.x = (pos.x + 1.0) / 2.0;\n" -+ " texcoord.y = (-pos.y + 1.0) / 2.0;\n" -+ "}\n"; -+ const char *fs = -+ "#extension GL_OES_EGL_image_external : enable\n" -+ "precision mediump float;\n" -+ "uniform samplerExternalOES s;\n" -+ "varying vec2 texcoord;\n" -+ "void main() {\n" -+ " gl_FragColor = texture2D(s, texcoord);\n" -+ "}\n"; -+ -+ GLuint vs_s; -+ GLuint fs_s; -+ GLuint prog; -+ -+ if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) || -+ !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) || -+ !(prog = link_program(s, vs_s, fs_s))) -+ return -1; -+ -+ glUseProgram(prog); -+ -+ { -+ static const float verts[] = { -+ -1, -1, -+ 1, -1, -+ 1, 1, -+ -1, 1, -+ }; -+ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); -+ } -+ -+ glEnableVertexAttribArray(0); -+ return 0; -+} -+ -+static int egl_vout_write_trailer(AVFormatContext *s) -+{ -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, "%s\n", __func__); -+#endif -+ -+ return 0; -+} -+ -+static int egl_vout_write_header(AVFormatContext *s) -+{ -+ const AVCodecParameters * const par = s->streams[0]->codecpar; -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, "%s\n", __func__); -+#endif -+ if ( s->nb_streams > 1 -+ || par->codec_type != AVMEDIA_TYPE_VIDEO -+ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { -+ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); -+ return AVERROR(EINVAL); -+ } -+ -+ return 0; -+} -+ -+ -+static int do_display(AVFormatContext * const s, egl_display_env_t * const de, AVFrame * const frame) -+{ -+ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0]; -+ egl_aux_t * da = NULL; -+ unsigned int i; -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); -+#endif -+ -+ for (i = 0; i != 32; ++i) { -+ if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) { -+ da = de->aux + i; -+ break; -+ } -+ } -+ -+ if (da == NULL) { -+ av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__); -+ return AVERROR(EINVAL); -+ } -+ -+ if (da->texture == 0) { -+ EGLint attribs[50]; -+ EGLint * a = attribs; -+ int i, j; -+ static const EGLint anames[] = { -+ EGL_DMA_BUF_PLANE0_FD_EXT, -+ EGL_DMA_BUF_PLANE0_OFFSET_EXT, -+ EGL_DMA_BUF_PLANE0_PITCH_EXT, -+ EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, -+ EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, -+ EGL_DMA_BUF_PLANE1_FD_EXT, -+ EGL_DMA_BUF_PLANE1_OFFSET_EXT, -+ EGL_DMA_BUF_PLANE1_PITCH_EXT, -+ EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT, -+ EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT, -+ EGL_DMA_BUF_PLANE2_FD_EXT, -+ EGL_DMA_BUF_PLANE2_OFFSET_EXT, -+ EGL_DMA_BUF_PLANE2_PITCH_EXT, -+ EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT, -+ EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT, -+ }; -+ const EGLint * b = anames; -+ -+ *a++ = EGL_WIDTH; -+ *a++ = av_frame_cropped_width(frame); -+ *a++ = EGL_HEIGHT; -+ *a++ = av_frame_cropped_height(frame); -+ *a++ = EGL_LINUX_DRM_FOURCC_EXT; -+ *a++ = desc->layers[0].format; -+ -+ for (i = 0; i < desc->nb_layers; ++i) { -+ for (j = 0; j < desc->layers[i].nb_planes; ++j) { -+ const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j; -+ const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index; -+ *a++ = *b++; -+ *a++ = obj->fd; -+ *a++ = *b++; -+ *a++ = p->offset; -+ *a++ = *b++; -+ *a++ = p->pitch; -+ if (obj->format_modifier == 0) { -+ b += 2; -+ } -+ else { -+ *a++ = *b++; -+ *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF); -+ *a++ = *b++; -+ *a++ = (EGLint)(obj->format_modifier >> 32); -+ } -+ } -+ } -+ -+ *a = EGL_NONE; -+ -+#if TRACE_ALL -+ for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) { -+ av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]); -+ } -+#endif -+ { -+ const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, -+ EGL_NO_CONTEXT, -+ EGL_LINUX_DMA_BUF_EXT, -+ NULL, attribs); -+ if (!image) { -+ av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd); -+ return -1; -+ } -+ -+ glGenTextures(1, &da->texture); -+ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); -+ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -+ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -+ glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); -+ -+ eglDestroyImageKHR(de->setup.egl_dpy, image); -+ } -+ -+ da->fd = desc->objects[0].fd; -+ -+#if 0 -+ av_log(s, AV_LOG_INFO, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d," -+ " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n", -+ av_frame_cropped_width(frame), -+ av_frame_cropped_height(frame), -+ desc->layers[0].format, -+ bo_plane_handles[0], -+ bo_plane_handles[1], -+ bo_plane_handles[2], -+ bo_plane_handles[3], -+ pitches[0], -+ pitches[1], -+ pitches[2], -+ pitches[3], -+ offsets[0], -+ offsets[1], -+ offsets[2], -+ offsets[3], -+ (long long)modifiers[0], -+ (long long)modifiers[1], -+ (long long)modifiers[2], -+ (long long)modifiers[3] -+ ); -+#endif -+ } -+ -+ glClearColor(0.5, 0.5, 0.5, 0.5); -+ glClear(GL_COLOR_BUFFER_BIT); -+ -+ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); -+ glDrawArrays(GL_TRIANGLE_FAN, 0, 4); -+ eglSwapBuffers(de->setup.egl_dpy, de->setup.surf); -+ -+ glDeleteTextures(1, &da->texture); -+ da->texture = 0; -+ da->fd = -1; -+ -+ return 0; -+} -+ -+static void * display_thread(void * v) -+{ -+ AVFormatContext * const s = v; -+ egl_display_env_t * const de = s->priv_data; -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); -+#endif -+ { -+ EGLint egl_major, egl_minor; -+ -+ de->setup.dpy = XOpenDisplay(NULL); -+ if (!de->setup.dpy) { -+ av_log(s, AV_LOG_ERROR, "Couldn't open X display\n"); -+ goto fail; -+ } -+ -+ de->setup.egl_dpy = eglGetDisplay(de->setup.dpy); -+ if (!de->setup.egl_dpy) { -+ av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n"); -+ goto fail; -+ } -+ -+ if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) { -+ av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n"); -+ goto fail; -+ } -+ -+ av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor); -+ -+ if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) { -+ av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n"); -+ goto fail; -+ } -+ } -+ -+ if (!de->window_width || !de->window_height) { -+ de->window_width = 1280; -+ de->window_height = 720; -+ } -+ if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout", -+ &de->setup.win, &de->setup.ctx, &de->setup.surf)) { -+ av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__); -+ goto fail; -+ } -+ -+ if (gl_setup(s)) { -+ av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__); -+ goto fail; -+ } -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__); -+#endif -+ sem_post(&de->display_start_sem); -+ -+ for (;;) { -+ AVFrame * frame; -+ -+ while (sem_wait(&de->q_sem) != 0) { -+ av_assert0(errno == EINTR); -+ } -+ -+ if (de->q_terminate) -+ break; -+ -+ pthread_mutex_lock(&de->q_lock); -+ frame = de->q_next; -+ de->q_next = NULL; -+ pthread_mutex_unlock(&de->q_lock); -+ -+ do_display(s, de, frame); -+ -+ av_frame_free(&de->q_this); -+ de->q_this = frame; -+ } -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, ">>> %s\n", __func__); -+#endif -+ -+ return NULL; -+ -+fail: -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__); -+#endif -+ de->q_terminate = 1; -+ sem_post(&de->display_start_sem); -+ -+ return NULL; -+} -+ -+static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) -+{ -+ const AVFrame * const src_frame = (AVFrame *)pkt->data; -+ AVFrame * frame; -+ egl_display_env_t * const de = s->priv_data; -+ -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, "%s\n", __func__); -+#endif -+ -+ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { -+ frame = av_frame_alloc(); -+ av_frame_ref(frame, src_frame); -+ } -+ else if (src_frame->format == AV_PIX_FMT_VAAPI) { -+ frame = av_frame_alloc(); -+ frame->format = AV_PIX_FMT_DRM_PRIME; -+ if (av_hwframe_map(frame, src_frame, 0) != 0) -+ { -+ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); -+ av_frame_free(&frame); -+ return AVERROR(EINVAL); -+ } -+ } -+ else { -+ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); -+ return AVERROR(EINVAL); -+ } -+ -+ // Really hacky sync -+ while (de->show_all && de->q_next) { -+ usleep(3000); -+ } -+ -+ pthread_mutex_lock(&de->q_lock); -+ { -+ AVFrame * const t = de->q_next; -+ de->q_next = frame; -+ frame = t; -+ } -+ pthread_mutex_unlock(&de->q_lock); -+ -+ if (frame == NULL) -+ sem_post(&de->q_sem); -+ else -+ av_frame_free(&frame); -+ -+ return 0; -+} -+ -+static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, -+ unsigned flags) -+{ -+ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); -+ return AVERROR_PATCHWELCOME; -+} -+ -+static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) -+{ -+#if TRACE_ALL -+ av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type); -+#endif -+ switch(type) { -+ case AV_APP_TO_DEV_WINDOW_REPAINT: -+ return 0; -+ default: -+ break; -+ } -+ return AVERROR(ENOSYS); -+} -+ -+// deinit is called if init fails so no need to clean up explicity here -+static int egl_vout_init(struct AVFormatContext * s) -+{ -+ egl_display_env_t * const de = s->priv_data; -+ unsigned int i; -+ -+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); -+ -+ de->setup = (struct egl_setup){0}; -+ -+ for (i = 0; i != 32; ++i) { -+ de->aux[i].fd = -1; -+ } -+ -+ de->q_terminate = 0; -+ pthread_mutex_init(&de->q_lock, NULL); -+ sem_init(&de->q_sem, 0, 0); -+ sem_init(&de->display_start_sem, 0, 0); -+ av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0); -+ -+ sem_wait(&de->display_start_sem); -+ if (de->q_terminate) { -+ av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__); -+ return -1; -+ } -+ -+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); -+ -+ return 0; -+} -+ -+static void egl_vout_deinit(struct AVFormatContext * s) -+{ -+ egl_display_env_t * const de = s->priv_data; -+ -+ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); -+ -+ de->q_terminate = 1; -+ sem_post(&de->q_sem); -+ pthread_join(de->q_thread, NULL); -+ sem_destroy(&de->q_sem); -+ pthread_mutex_destroy(&de->q_lock); -+ -+ av_frame_free(&de->q_next); -+ av_frame_free(&de->q_this); -+ -+ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); -+} -+ -+#define OFFSET(x) offsetof(egl_display_env_t, x) -+static const AVOption options[] = { -+ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "window_size", "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "window_x", "set window x offset", OFFSET(window_x), AV_OPT_TYPE_INT, {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "window_y", "set window y offset", OFFSET(window_y), AV_OPT_TYPE_INT, {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "fullscreen", "set fullscreen display", OFFSET(fullscreen), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, -+ { NULL } -+ -+}; -+ -+static const AVClass egl_vout_class = { -+ .class_name = "egl vid outdev", -+ .item_name = av_default_item_name, -+ .option = options, -+ .version = LIBAVUTIL_VERSION_INT, -+ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, -+}; -+ -+FFOutputFormat ff_vout_egl_muxer = { -+ .p = { -+ .name = "vout_egl", -+ .long_name = NULL_IF_CONFIG_SMALL("Egl video output device"), -+ .audio_codec = AV_CODEC_ID_NONE, -+ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, -+ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, -+ .priv_class = &egl_vout_class, -+ }, -+ .priv_data_size = sizeof(egl_display_env_t), -+ .write_header = egl_vout_write_header, -+ .write_packet = egl_vout_write_packet, -+ .write_uncoded_frame = egl_vout_write_frame, -+ .write_trailer = egl_vout_write_trailer, -+ .control_message = egl_vout_control_message, -+ .init = egl_vout_init, -+ .deinit = egl_vout_deinit, -+}; -+ - -From 55cc6b4be80730d1bbd67c483e97921f39b58965 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 28 Apr 2021 12:51:22 +0100 -Subject: [PATCH 016/186] V4L2 stateful rework - ---- - libavcodec/Makefile | 3 +- - libavcodec/v4l2_buffers.c | 556 +++++++++++++++++++++++++++----------- - libavcodec/v4l2_buffers.h | 28 +- - libavcodec/v4l2_context.c | 536 +++++++++++++++++++++++++++--------- - libavcodec/v4l2_context.h | 20 +- - libavcodec/v4l2_m2m.c | 20 +- - libavcodec/v4l2_m2m.h | 31 +++ - libavcodec/v4l2_m2m_dec.c | 446 ++++++++++++++++++++++++++---- - 8 files changed, 1286 insertions(+), 354 deletions(-) - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 2d440b56486b..e1aa0ba014ed 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -169,7 +169,8 @@ OBJS-$(CONFIG_VIDEODSP) += videodsp.o - OBJS-$(CONFIG_VP3DSP) += vp3dsp.o - OBJS-$(CONFIG_VP56DSP) += vp56dsp.o - OBJS-$(CONFIG_VP8DSP) += vp8dsp.o --OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o -+OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\ -+ weak_link.o - OBJS-$(CONFIG_V4L2_REQUEST) += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\ - v4l2_req_devscan.o weak_link.o - OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 3f5471067a1a..a003934ca19e 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -21,6 +21,7 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -+#include - #include - #include - #include -@@ -29,12 +30,14 @@ - #include - #include "libavcodec/avcodec.h" - #include "libavutil/pixdesc.h" -+#include "libavutil/hwcontext.h" - #include "v4l2_context.h" - #include "v4l2_buffers.h" - #include "v4l2_m2m.h" -+#include "weak_link.h" - - #define USEC_PER_SEC 1000000 --static AVRational v4l2_timebase = { 1, USEC_PER_SEC }; -+static const AVRational v4l2_timebase = { 1, USEC_PER_SEC }; - - static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf) - { -@@ -51,34 +54,44 @@ static inline AVCodecContext *logger(V4L2Buffer *buf) - static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf) - { - V4L2m2mContext *s = buf_to_m2mctx(avbuf); -- -- if (s->avctx->pkt_timebase.num) -- return s->avctx->pkt_timebase; -- return s->avctx->time_base; -+ const AVRational tb = s->avctx->pkt_timebase.num ? -+ s->avctx->pkt_timebase : -+ s->avctx->time_base; -+ return tb.num && tb.den ? tb : v4l2_timebase; - } - --static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) -+static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts, int no_rescale) - { -- int64_t v4l2_pts; -- -- if (pts == AV_NOPTS_VALUE) -- pts = 0; -- - /* convert pts to v4l2 timebase */ -- v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); -+ const int64_t v4l2_pts = -+ no_rescale ? pts : -+ pts == AV_NOPTS_VALUE ? 0 : -+ av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); - out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; - out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; - } - --static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf) -+static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf, int no_rescale) - { -- int64_t v4l2_pts; -- - /* convert pts back to encoder timebase */ -- v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + -+ const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + - avbuf->buf.timestamp.tv_usec; - -- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); -+ return -+ no_rescale ? v4l2_pts : -+ v4l2_pts == 0 ? AV_NOPTS_VALUE : -+ av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); -+} -+ -+static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) -+{ -+ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { -+ out->planes[plane].bytesused = bytesused; -+ out->planes[plane].length = length; -+ } else { -+ out->buf.bytesused = bytesused; -+ out->buf.length = length; -+ } - } - - static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) -@@ -209,68 +222,143 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) - return AVCOL_TRC_UNSPECIFIED; - } - --static void v4l2_free_buffer(void *opaque, uint8_t *unused) -+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) - { -- V4L2Buffer* avbuf = opaque; -- V4L2m2mContext *s = buf_to_m2mctx(avbuf); -+ AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; -+ AVDRMLayerDescriptor *layer; - -- if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) { -- atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel); -+ /* fill the DRM frame descriptor */ -+ drm_desc->nb_objects = avbuf->num_planes; -+ drm_desc->nb_layers = 1; - -- if (s->reinit) { -- if (!atomic_load(&s->refcount)) -- sem_post(&s->refsync); -- } else { -- if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) { -- /* no need to queue more buffers to the driver */ -- avbuf->status = V4L2BUF_AVAILABLE; -- } -- else if (avbuf->context->streamon) -- ff_v4l2_buffer_enqueue(avbuf); -- } -+ layer = &drm_desc->layers[0]; -+ layer->nb_planes = avbuf->num_planes; -+ -+ for (int i = 0; i < avbuf->num_planes; i++) { -+ layer->planes[i].object_index = i; -+ layer->planes[i].offset = 0; -+ layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; -+ } -+ -+ switch (avbuf->context->av_pix_fmt) { -+ case AV_PIX_FMT_YUYV422: -+ -+ layer->format = DRM_FORMAT_YUYV; -+ layer->nb_planes = 1; -+ -+ break; -+ -+ case AV_PIX_FMT_NV12: -+ case AV_PIX_FMT_NV21: -+ -+ layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ? -+ DRM_FORMAT_NV12 : DRM_FORMAT_NV21; -+ -+ if (avbuf->num_planes > 1) -+ break; -+ -+ layer->nb_planes = 2; -+ -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * -+ avbuf->context->format.fmt.pix.height; -+ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; -+ break; -+ -+ case AV_PIX_FMT_YUV420P: -+ -+ layer->format = DRM_FORMAT_YUV420; -+ -+ if (avbuf->num_planes > 1) -+ break; -+ -+ layer->nb_planes = 3; -+ -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * -+ avbuf->context->format.fmt.pix.height; -+ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1; -+ -+ layer->planes[2].object_index = 0; -+ layer->planes[2].offset = layer->planes[1].offset + -+ ((avbuf->plane_info[0].bytesperline * -+ avbuf->context->format.fmt.pix.height) >> 2); -+ layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1; -+ break; - -- av_buffer_unref(&avbuf->context_ref); -+ default: -+ drm_desc->nb_layers = 0; -+ break; - } -+ -+ return (uint8_t *) drm_desc; - } - --static int v4l2_buf_increase_ref(V4L2Buffer *in) -+static void v4l2_free_bufref(void *opaque, uint8_t *data) - { -- V4L2m2mContext *s = buf_to_m2mctx(in); -+ AVBufferRef * bufref = (AVBufferRef *)data; -+ V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data; -+ struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl); - -- if (in->context_ref) -- atomic_fetch_add(&in->context_refcount, 1); -- else { -- in->context_ref = av_buffer_ref(s->self_ref); -- if (!in->context_ref) -- return AVERROR(ENOMEM); -+ if (ctx != NULL) { -+ // Buffer still attached to context -+ V4L2m2mContext *s = buf_to_m2mctx(avbuf); - -- in->context_refcount = 1; -- } -+ ff_mutex_lock(&ctx->lock); - -- in->status = V4L2BUF_RET_USER; -- atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed); -+ avbuf->status = V4L2BUF_AVAILABLE; - -- return 0; -+ if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) { -+ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name); -+ /* no need to queue more buffers to the driver */ -+ } -+ else if (ctx->streamon) { -+ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name); -+ avbuf->buf.timestamp.tv_sec = 0; -+ avbuf->buf.timestamp.tv_usec = 0; -+ ff_v4l2_buffer_enqueue(avbuf); // will set to IN_DRIVER -+ } -+ else { -+ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name); -+ } -+ -+ ff_mutex_unlock(&ctx->lock); -+ } -+ -+ ff_weak_link_unlock(avbuf->context_wl); -+ av_buffer_unref(&bufref); - } - --static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf) -+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) - { -- int ret; -+ struct v4l2_exportbuffer expbuf; -+ int i, ret; - -- if (plane >= in->num_planes) -- return AVERROR(EINVAL); -+ for (i = 0; i < avbuf->num_planes; i++) { -+ memset(&expbuf, 0, sizeof(expbuf)); - -- /* even though most encoders return 0 in data_offset encoding vp8 does require this value */ -- *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset, -- in->plane_info[plane].length, v4l2_free_buffer, in, 0); -- if (!*buf) -- return AVERROR(ENOMEM); -+ expbuf.index = avbuf->buf.index; -+ expbuf.type = avbuf->buf.type; -+ expbuf.plane = i; - -- ret = v4l2_buf_increase_ref(in); -- if (ret) -- av_buffer_unref(buf); -+ ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_EXPBUF, &expbuf); -+ if (ret < 0) -+ return AVERROR(errno); - -- return ret; -+ if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) { -+ /* drm frame */ -+ avbuf->drm_frame.objects[i].size = avbuf->buf.m.planes[i].length; -+ avbuf->drm_frame.objects[i].fd = expbuf.fd; -+ avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ } else { -+ /* drm frame */ -+ avbuf->drm_frame.objects[0].size = avbuf->buf.length; -+ avbuf->drm_frame.objects[0].fd = expbuf.fd; -+ avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ } -+ } -+ -+ return 0; - } - - static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset) -@@ -285,30 +373,50 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i - - memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset)); - -- if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { -- out->planes[plane].bytesused = bytesused; -- out->planes[plane].length = length; -- } else { -- out->buf.bytesused = bytesused; -- out->buf.length = length; -- } -+ set_buf_length(out, plane, bytesused, length); - - return 0; - } - -+static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf) -+{ -+ AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]); -+ AVBufferRef * newbuf; -+ -+ if (!bufref) -+ return NULL; -+ -+ newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0); -+ if (newbuf == NULL) -+ av_buffer_unref(&bufref); -+ -+ avbuf->status = V4L2BUF_RET_USER; -+ return newbuf; -+} -+ - static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) - { -- int i, ret; -+ int i; - - frame->format = avbuf->context->av_pix_fmt; - -- for (i = 0; i < avbuf->num_planes; i++) { -- ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]); -- if (ret) -- return ret; -+ frame->buf[0] = wrap_avbuf(avbuf); -+ if (frame->buf[0] == NULL) -+ return AVERROR(ENOMEM); -+ -+ if (buf_to_m2mctx(avbuf)->output_drm) { -+ /* 1. get references to the actual data */ -+ frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf); -+ frame->format = AV_PIX_FMT_DRM_PRIME; -+ frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref); -+ return 0; -+ } -+ - -+ /* 1. get references to the actual data */ -+ for (i = 0; i < avbuf->num_planes; i++) { -+ frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset; - frame->linesize[i] = avbuf->plane_info[i].bytesperline; -- frame->data[i] = frame->buf[i]->data; - } - - /* fixup special cases */ -@@ -337,68 +445,95 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) - return 0; - } - -+static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h) -+{ -+ if (dst_stride == src_stride && w + 32 >= dst_stride) { -+ memcpy(dst, src, dst_stride * h); -+ } -+ else { -+ while (--h >= 0) { -+ memcpy(dst, src, w); -+ dst += dst_stride; -+ src += src_stride; -+ } -+ } -+} -+ -+static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes) -+{ -+ return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); -+} -+ - static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - { -- int i, ret; -- struct v4l2_format fmt = out->context->format; -- int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? -- fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat; -- int height = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? -- fmt.fmt.pix_mp.height : fmt.fmt.pix.height; -- int is_planar_format = 0; -- -- switch (pixel_format) { -- case V4L2_PIX_FMT_YUV420M: -- case V4L2_PIX_FMT_YVU420M: --#ifdef V4L2_PIX_FMT_YUV422M -- case V4L2_PIX_FMT_YUV422M: --#endif --#ifdef V4L2_PIX_FMT_YVU422M -- case V4L2_PIX_FMT_YVU422M: --#endif --#ifdef V4L2_PIX_FMT_YUV444M -- case V4L2_PIX_FMT_YUV444M: --#endif --#ifdef V4L2_PIX_FMT_YVU444M -- case V4L2_PIX_FMT_YVU444M: --#endif -- case V4L2_PIX_FMT_NV12M: -- case V4L2_PIX_FMT_NV21M: -- case V4L2_PIX_FMT_NV12MT_16X16: -- case V4L2_PIX_FMT_NV12MT: -- case V4L2_PIX_FMT_NV16M: -- case V4L2_PIX_FMT_NV61M: -- is_planar_format = 1; -- } -- -- if (!is_planar_format) { -- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); -- int planes_nb = 0; -- int offset = 0; -- -- for (i = 0; i < desc->nb_components; i++) -- planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); -- -- for (i = 0; i < planes_nb; i++) { -- int size, h = height; -- if (i == 1 || i == 2) { -+ int i; -+ int num_planes = 0; -+ int pel_strides[4] = {0}; -+ -+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); -+ -+ if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) { -+ av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__); -+ return -1; -+ } -+ -+ for (i = 0; i != desc->nb_components; ++i) { -+ if (desc->comp[i].plane >= num_planes) -+ num_planes = desc->comp[i].plane + 1; -+ pel_strides[desc->comp[i].plane] = desc->comp[i].step; -+ } -+ -+ if (out->num_planes > 1) { -+ if (num_planes != out->num_planes) { -+ av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes); -+ return -1; -+ } -+ for (i = 0; i != num_planes; ++i) { -+ int w = frame->width; -+ int h = frame->height; -+ if (is_chroma(desc, i, num_planes)) { -+ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); - h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); - } -- size = frame->linesize[i] * h; -- ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset); -- if (ret) -- return ret; -- offset += size; -+ -+ cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline, -+ frame->data[i], frame->linesize[i], -+ w * pel_strides[i], h); -+ set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length); - } -- return 0; - } -+ else -+ { -+ unsigned int offset = 0; -+ -+ for (i = 0; i != num_planes; ++i) { -+ int w = frame->width; -+ int h = frame->height; -+ int dst_stride = out->plane_info[0].bytesperline; -+ uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset; -+ -+ if (is_chroma(desc, i, num_planes)) { -+ // Is chroma -+ dst_stride >>= desc->log2_chroma_w; -+ offset += dst_stride * (out->context->height >> desc->log2_chroma_h); -+ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); -+ h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); -+ } -+ else { -+ // Is luma or alpha -+ offset += dst_stride * out->context->height; -+ } -+ if (offset > out->plane_info[0].length) { -+ av_log(NULL, AV_LOG_ERROR, "%s: Plane total %d > buffer size %d\n", __func__, offset, out->plane_info[0].length); -+ return -1; -+ } - -- for (i = 0; i < out->num_planes; i++) { -- ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0); -- if (ret) -- return ret; -+ cpy_2d(dst, dst_stride, -+ frame->data[i], frame->linesize[i], -+ w * pel_strides[i], h); -+ } -+ set_buf_length(out, 0, offset, out->plane_info[0].length); - } -- - return 0; - } - -@@ -410,14 +545,15 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - - int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - { -- v4l2_set_pts(out, frame->pts); -+ v4l2_set_pts(out, frame->pts, 0); - - return v4l2_buffer_swframe_to_buf(frame, out); - } - --int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) -+int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_rescale_pts) - { - int ret; -+ V4L2Context * const ctx = avbuf->context; - - av_frame_unref(frame); - -@@ -432,13 +568,22 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) - frame->colorspace = v4l2_get_color_space(avbuf); - frame->color_range = v4l2_get_color_range(avbuf); - frame->color_trc = v4l2_get_color_trc(avbuf); -- frame->pts = v4l2_get_pts(avbuf); -+ frame->pts = v4l2_get_pts(avbuf, no_rescale_pts); - frame->pkt_dts = AV_NOPTS_VALUE; - - /* these values are updated also during re-init in v4l2_process_driver_event */ -- frame->height = avbuf->context->height; -- frame->width = avbuf->context->width; -- frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio; -+ frame->height = ctx->height; -+ frame->width = ctx->width; -+ frame->sample_aspect_ratio = ctx->sample_aspect_ratio; -+ -+ if (ctx->selection.height && ctx->selection.width) { -+ frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0; -+ frame->crop_top = ctx->selection.top < frame->height ? ctx->selection.top : 0; -+ frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ? -+ frame->width - (ctx->selection.left + ctx->selection.width) : 0; -+ frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ? -+ frame->height - (ctx->selection.top + ctx->selection.height) : 0; -+ } - - /* 3. report errors upstream */ - if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) { -@@ -451,15 +596,14 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) - - int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) - { -- int ret; -- - av_packet_unref(pkt); -- ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf); -- if (ret) -- return ret; -+ -+ pkt->buf = wrap_avbuf(avbuf); -+ if (pkt->buf == NULL) -+ return AVERROR(ENOMEM); - - pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused; -- pkt->data = pkt->buf->data; -+ pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset; - - if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) - pkt->flags |= AV_PKT_FLAG_KEY; -@@ -469,20 +613,27 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) - pkt->flags |= AV_PKT_FLAG_CORRUPT; - } - -- pkt->dts = pkt->pts = v4l2_get_pts(avbuf); -+ pkt->dts = pkt->pts = v4l2_get_pts(avbuf, 0); - - return 0; - } - --int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -+ const void *extdata, size_t extlen, int no_rescale_pts) - { - int ret; - -- ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0); -+ if (extlen) { -+ ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0); -+ if (ret) -+ return ret; -+ } -+ -+ ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen); - if (ret) - return ret; - -- v4l2_set_pts(out, pkt->pts); -+ v4l2_set_pts(out, pkt->pts, no_rescale_pts); - - if (pkt->flags & AV_PKT_FLAG_KEY) - out->flags = V4L2_BUF_FLAG_KEYFRAME; -@@ -490,15 +641,61 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) - return 0; - } - --int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) -+int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) -+{ -+ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); -+} -+ -+ -+static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data) -+{ -+ V4L2Buffer * const avbuf = (V4L2Buffer *)data; -+ int i; -+ -+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) { -+ struct V4L2Plane_info *p = avbuf->plane_info + i; -+ if (p->mm_addr != NULL) -+ munmap(p->mm_addr, p->length); -+ } -+ -+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { -+ if (avbuf->drm_frame.objects[i].fd != -1) -+ close(avbuf->drm_frame.objects[i].fd); -+ } -+ -+ ff_weak_link_unref(&avbuf->context_wl); -+ -+ av_free(avbuf); -+} -+ -+ -+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx) - { -- V4L2Context *ctx = avbuf->context; - int ret, i; -+ V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); -+ AVBufferRef * bufref; -+ -+ *pbufref = NULL; -+ if (avbuf == NULL) -+ return AVERROR(ENOMEM); -+ -+ bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0); -+ if (bufref == NULL) { -+ av_free(avbuf); -+ return AVERROR(ENOMEM); -+ } - -+ avbuf->context = ctx; - avbuf->buf.memory = V4L2_MEMORY_MMAP; - avbuf->buf.type = ctx->type; - avbuf->buf.index = index; - -+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { -+ avbuf->drm_frame.objects[i].fd = -1; -+ } -+ -+ avbuf->context_wl = ff_weak_link_ref(ctx->wl_master); -+ - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - avbuf->buf.length = VIDEO_MAX_PLANES; - avbuf->buf.m.planes = avbuf->planes; -@@ -506,7 +703,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) - - ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf); - if (ret < 0) -- return AVERROR(errno); -+ goto fail; - - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - avbuf->num_planes = 0; -@@ -526,25 +723,33 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) - - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; -- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, -- PROT_READ | PROT_WRITE, MAP_SHARED, -- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); -+ -+ if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) || -+ !buf_to_m2mctx(avbuf)->output_drm) { -+ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, -+ PROT_READ | PROT_WRITE, MAP_SHARED, -+ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); -+ } - } else { - avbuf->plane_info[i].length = avbuf->buf.length; -- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, -- PROT_READ | PROT_WRITE, MAP_SHARED, -- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); -+ -+ if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) || -+ !buf_to_m2mctx(avbuf)->output_drm) { -+ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, -+ PROT_READ | PROT_WRITE, MAP_SHARED, -+ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); -+ } - } - -- if (avbuf->plane_info[i].mm_addr == MAP_FAILED) -- return AVERROR(ENOMEM); -+ if (avbuf->plane_info[i].mm_addr == MAP_FAILED) { -+ avbuf->plane_info[i].mm_addr = NULL; -+ ret = AVERROR(ENOMEM); -+ goto fail; -+ } - } - - avbuf->status = V4L2BUF_AVAILABLE; - -- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) -- return 0; -- - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - avbuf->buf.m.planes = avbuf->planes; - avbuf->buf.length = avbuf->num_planes; -@@ -554,7 +759,20 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) - avbuf->buf.length = avbuf->planes[0].length; - } - -- return ff_v4l2_buffer_enqueue(avbuf); -+ if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { -+ if (buf_to_m2mctx(avbuf)->output_drm) { -+ ret = v4l2_buffer_export_drm(avbuf); -+ if (ret) -+ goto fail; -+ } -+ } -+ -+ *pbufref = bufref; -+ return 0; -+ -+fail: -+ av_buffer_unref(&bufref); -+ return ret; - } - - int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) -@@ -563,9 +781,27 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) - - avbuf->buf.flags = avbuf->flags; - -+ if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) { -+ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", -+ avbuf->context->name, avbuf->buf.index, -+ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, -+ avbuf->context->q_count); -+ } -+ - ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf); -- if (ret < 0) -- return AVERROR(errno); -+ if (ret < 0) { -+ int err = errno; -+ av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n", -+ avbuf->context->name, avbuf->buf.index, -+ err, strerror(err)); -+ return AVERROR(err); -+ } -+ -+ ++avbuf->context->q_count; -+ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", -+ avbuf->context->name, avbuf->buf.index, -+ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, -+ avbuf->context->q_count); - - avbuf->status = V4L2BUF_IN_DRIVER; - -diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index 3d2ff1b9a5d7..111526aee315 100644 ---- a/libavcodec/v4l2_buffers.h -+++ b/libavcodec/v4l2_buffers.h -@@ -28,27 +28,37 @@ - #include - #include - -+#include "avcodec.h" - #include "libavutil/buffer.h" - #include "libavutil/frame.h" -+#include "libavutil/hwcontext_drm.h" - #include "packet.h" - - enum V4L2Buffer_status { - V4L2BUF_AVAILABLE, - V4L2BUF_IN_DRIVER, -+ V4L2BUF_IN_USE, - V4L2BUF_RET_USER, - }; - - /** - * V4L2Buffer (wrapper for v4l2_buffer management) - */ -+struct V4L2Context; -+struct ff_weak_link_client; -+ - typedef struct V4L2Buffer { -- /* each buffer needs to have a reference to its context */ -+ /* each buffer needs to have a reference to its context -+ * The pointer is good enough for most operation but once the buffer has -+ * been passed to the user the buffer may become orphaned so for free ops -+ * the weak link must be used to ensure that the context is actually -+ * there -+ */ - struct V4L2Context *context; -+ struct ff_weak_link_client *context_wl; - -- /* This object is refcounted per-plane, so we need to keep track -- * of how many context-refs we are holding. */ -- AVBufferRef *context_ref; -- atomic_uint context_refcount; -+ /* DRM descriptor */ -+ AVDRMFrameDescriptor drm_frame; - - /* keep track of the mmap address and mmap length */ - struct V4L2Plane_info { -@@ -73,11 +83,12 @@ typedef struct V4L2Buffer { - * - * @param[in] frame The AVFRame to push the information to - * @param[in] buf The V4L2Buffer to get the information from -+ * @param[in] no_rescale_pts If non-zero do not rescale PTS - * - * @returns 0 in case of success, AVERROR(EINVAL) if the number of planes is incorrect, - * AVERROR(ENOMEM) if the AVBufferRef can't be created. - */ --int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf); -+int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf, int no_rescale_pts); - - /** - * Extracts the data from a V4L2Buffer to an AVPacket -@@ -101,6 +112,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); - */ - int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); - -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -+ const void *extdata, size_t extlen, int no_rescale_pts); -+ - /** - * Extracts the data from an AVFrame to a V4L2Buffer - * -@@ -119,7 +133,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); - * - * @returns 0 in case of success, a negative AVERROR code otherwise - */ --int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); -+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx); - - /** - * Enqueues a V4L2Buffer -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index a40be946904e..be76068af32d 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -27,11 +27,13 @@ - #include - #include - #include -+#include "libavutil/avassert.h" - #include "libavcodec/avcodec.h" - #include "decode.h" - #include "v4l2_buffers.h" - #include "v4l2_fmt.h" - #include "v4l2_m2m.h" -+#include "weak_link.h" - - struct v4l2_format_update { - uint32_t v4l2_fmt; -@@ -153,21 +155,99 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd - } - } - --static int v4l2_start_decode(V4L2Context *ctx) -+static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r) - { -- struct v4l2_decoder_cmd cmd = { -- .cmd = V4L2_DEC_CMD_START, -- .flags = 0, -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -+ struct v4l2_selection selection = { -+ .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, -+ .target = V4L2_SEL_TGT_COMPOSE - }; -- int ret; - -- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd); -- if (ret) -+ memset(r, 0, sizeof(*r)); -+ if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection)) - return AVERROR(errno); - -+ *r = selection.r; - return 0; - } - -+static int do_source_change(V4L2m2mContext * const s) -+{ -+ AVCodecContext *const avctx = s->avctx; -+ -+ int ret; -+ int reinit; -+ int full_reinit; -+ struct v4l2_format cap_fmt = s->capture.format; -+ -+ s->resize_pending = 0; -+ s->capture.done = 0; -+ -+ ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); -+ if (ret) { -+ av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name); -+ return 0; -+ } -+ -+ s->output.sample_aspect_ratio = v4l2_get_sar(&s->output); -+ -+ get_default_selection(&s->capture, &s->capture.selection); -+ -+ reinit = v4l2_resolution_changed(&s->capture, &cap_fmt); -+ if (reinit) { -+ s->capture.height = v4l2_get_height(&cap_fmt); -+ s->capture.width = v4l2_get_width(&cap_fmt); -+ } -+ s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); -+ -+ av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d\n", -+ s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den, -+ s->capture.selection.width, s->capture.selection.height, -+ s->capture.selection.left, s->capture.selection.top); -+ -+ s->reinit = 1; -+ -+ if (reinit) { -+ if (avctx) -+ ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height); -+ if (ret < 0) -+ av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n"); -+ -+ ret = ff_v4l2_m2m_codec_reinit(s); -+ if (ret) { -+ av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n"); -+ return AVERROR(EINVAL); -+ } -+ goto reinit_run; -+ } -+ -+ /* Buffers are OK so just stream off to ack */ -+ av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only\n", __func__); -+ -+ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); -+ if (ret) -+ av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n"); -+ s->draining = 0; -+ -+ /* reinit executed */ -+reinit_run: -+ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON); -+ return 1; -+} -+ -+static int ctx_done(V4L2Context * const ctx) -+{ -+ int rv = 0; -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -+ -+ ctx->done = 1; -+ -+ if (s->resize_pending && !V4L2_TYPE_IS_OUTPUT(ctx->type)) -+ rv = do_source_change(s); -+ -+ return rv; -+} -+ - /** - * handle resolution change event and end of stream event - * returns 1 if reinit was successful, negative if it failed -@@ -175,8 +255,7 @@ static int v4l2_start_decode(V4L2Context *ctx) - */ - static int v4l2_handle_event(V4L2Context *ctx) - { -- V4L2m2mContext *s = ctx_to_m2mctx(ctx); -- struct v4l2_format cap_fmt = s->capture.format; -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); - struct v4l2_event evt = { 0 }; - int ret; - -@@ -186,44 +265,22 @@ static int v4l2_handle_event(V4L2Context *ctx) - return 0; - } - -+ av_log(logger(ctx), AV_LOG_INFO, "Dq event %d\n", evt.type); -+ - if (evt.type == V4L2_EVENT_EOS) { -- ctx->done = 1; -+// ctx->done = 1; -+ av_log(logger(ctx), AV_LOG_TRACE, "%s VIDIOC_EVENT_EOS\n", ctx->name); - return 0; - } - - if (evt.type != V4L2_EVENT_SOURCE_CHANGE) - return 0; - -- ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); -- if (ret) { -- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name); -- return 0; -- } -- -- if (v4l2_resolution_changed(&s->capture, &cap_fmt)) { -- s->capture.height = v4l2_get_height(&cap_fmt); -- s->capture.width = v4l2_get_width(&cap_fmt); -- s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); -- } else { -- v4l2_start_decode(ctx); -+ s->resize_pending = 1; -+ if (!ctx->done) - return 0; -- } -- -- s->reinit = 1; -- -- if (s->avctx) -- ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height); -- if (ret < 0) -- av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n"); -- -- ret = ff_v4l2_m2m_codec_reinit(s); -- if (ret) { -- av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n"); -- return AVERROR(EINVAL); -- } - -- /* reinit executed */ -- return 1; -+ return do_source_change(s); - } - - static int v4l2_stop_decode(V4L2Context *ctx) -@@ -266,8 +323,26 @@ static int v4l2_stop_encode(V4L2Context *ctx) - return 0; - } - -+static int count_in_driver(const V4L2Context * const ctx) -+{ -+ int i; -+ int n = 0; -+ -+ if (!ctx->bufrefs) -+ return -1; -+ -+ for (i = 0; i < ctx->num_buffers; ++i) { -+ V4L2Buffer *const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -+ if (avbuf->status == V4L2BUF_IN_DRIVER) -+ ++n; -+ } -+ return n; -+} -+ - static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout) - { -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -+ const int is_capture = !V4L2_TYPE_IS_OUTPUT(ctx->type); - struct v4l2_plane planes[VIDEO_MAX_PLANES]; - struct v4l2_buffer buf = { 0 }; - V4L2Buffer *avbuf; -@@ -276,50 +351,84 @@ static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout) - .fd = ctx_to_m2mctx(ctx)->fd, - }; - int i, ret; -+ int no_rx_means_done = 0; - -- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) { -+ if (is_capture && ctx->bufrefs) { - for (i = 0; i < ctx->num_buffers; i++) { -- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) -+ avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -+ if (avbuf->status == V4L2BUF_IN_DRIVER) - break; - } - if (i == ctx->num_buffers) -- av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to " -+ av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers (%d) returned to " - "userspace. Increase num_capture_buffers " - "to prevent device deadlock or dropped " -- "packets/frames.\n"); -+ "packets/frames.\n", i); - } - -+#if 0 -+ // I think this is true but pointless -+ // we will get some other form of EOF signal -+ - /* if we are draining and there are no more capture buffers queued in the driver we are done */ -- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) { -+ if (is_capture && ctx_to_m2mctx(ctx)->draining) { - for (i = 0; i < ctx->num_buffers; i++) { - /* capture buffer initialization happens during decode hence - * detection happens at runtime - */ -- if (!ctx->buffers) -+ if (!ctx->bufrefs) - break; - -- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) -+ avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -+ if (avbuf->status == V4L2BUF_IN_DRIVER) - goto start; - } - ctx->done = 1; - return NULL; - } -+#endif - - start: -- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) -- pfd.events = POLLOUT | POLLWRNORM; -- else { -+ if (is_capture) { - /* no need to listen to requests for more input while draining */ - if (ctx_to_m2mctx(ctx)->draining) - pfd.events = POLLIN | POLLRDNORM | POLLPRI; -+ } else { -+ pfd.events = POLLOUT | POLLWRNORM; - } -+ no_rx_means_done = s->resize_pending && is_capture; - - for (;;) { -- ret = poll(&pfd, 1, timeout); -+ // If we have a resize pending then all buffers should be Qed -+ // With a resize pending we should be in drain but evidence suggests -+ // that not all decoders do this so poll to clear -+ int t2 = no_rx_means_done ? 0 : timeout < 0 ? 3000 : timeout; -+ const int e = pfd.events; -+ -+ ret = poll(&pfd, 1, t2); -+ - if (ret > 0) - break; -- if (errno == EINTR) -- continue; -+ -+ if (ret < 0) { -+ int err = errno; -+ if (err == EINTR) -+ continue; -+ av_log(logger(ctx), AV_LOG_ERROR, "=== poll error %d (%s): events=%#x, cap buffers=%d\n", -+ err, strerror(err), -+ e, count_in_driver(ctx)); -+ return NULL; -+ } -+ -+ // ret == 0 (timeout) -+ if (no_rx_means_done) { -+ av_log(logger(ctx), AV_LOG_DEBUG, "Ctx done on timeout\n"); -+ ret = ctx_done(ctx); -+ if (ret > 0) -+ goto start; -+ } -+ if (timeout == -1) -+ av_log(logger(ctx), AV_LOG_ERROR, "=== poll unexpected TIMEOUT: events=%#x, cap buffers=%d\n", e, count_in_driver(ctx));; - return NULL; - } - -@@ -329,7 +438,8 @@ start: - no need to raise a warning */ - if (timeout == 0) { - for (i = 0; i < ctx->num_buffers; i++) { -- if (ctx->buffers[i].status != V4L2BUF_AVAILABLE) -+ avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -+ if (avbuf->status != V4L2BUF_AVAILABLE) - av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); - } - } -@@ -347,22 +457,25 @@ start: - ctx->done = 1; - return NULL; - } -- if (ret) { -- /* if re-init was successful drop the buffer (if there was one) -- * since we had to reconfigure capture (unmap all buffers) -- */ -- return NULL; -- } -+ if (ret > 0) -+ goto start; - } - - /* 2. dequeue the buffer */ - if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) { - -- if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { -+ if (is_capture) { - /* there is a capture buffer ready */ - if (pfd.revents & (POLLIN | POLLRDNORM)) - goto dequeue; - -+ // CAPTURE Q drained -+ if (no_rx_means_done) { -+ if (ctx_done(ctx) > 0) -+ goto start; -+ return NULL; -+ } -+ - /* the driver is ready to accept more input; instead of waiting for the capture - * buffer to complete we return NULL so input can proceed (we are single threaded) - */ -@@ -380,37 +493,58 @@ dequeue: - buf.m.planes = planes; - } - -- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf); -- if (ret) { -- if (errno != EAGAIN) { -- ctx->done = 1; -- if (errno != EPIPE) -+ while ((ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf)) == -1) { -+ const int err = errno; -+ if (err == EINTR) -+ continue; -+ if (err != EAGAIN) { -+ // EPIPE on CAPTURE can be used instead of BUF_FLAG_LAST -+ if (err != EPIPE || !is_capture) - av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", -- ctx->name, av_err2str(AVERROR(errno))); -+ ctx->name, av_err2str(AVERROR(err))); -+ if (ctx_done(ctx) > 0) -+ goto start; - } - return NULL; - } -+ --ctx->q_count; -+ av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d\n", -+ ctx->name, buf.index, -+ buf.timestamp.tv_sec, buf.timestamp.tv_usec, -+ ctx->q_count, ++ctx->dq_count); - -- if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) { -+ avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; -+ avbuf->status = V4L2BUF_AVAILABLE; -+ avbuf->buf = buf; -+ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { -+ memcpy(avbuf->planes, planes, sizeof(planes)); -+ avbuf->buf.m.planes = avbuf->planes; -+ } -+ -+ if (ctx_to_m2mctx(ctx)->draining && is_capture) { - int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ? - buf.m.planes[0].bytesused : buf.bytesused; - if (bytesused == 0) { -- ctx->done = 1; -+ av_log(logger(ctx), AV_LOG_DEBUG, "Buffer empty - reQ\n"); -+ -+ // Must reQ so we don't leak -+ // May not matter if the next thing we do is release all the -+ // buffers but better to be tidy. -+ ff_v4l2_buffer_enqueue(avbuf); -+ -+ if (ctx_done(ctx) > 0) -+ goto start; - return NULL; - } - #ifdef V4L2_BUF_FLAG_LAST -- if (buf.flags & V4L2_BUF_FLAG_LAST) -- ctx->done = 1; -+ if (buf.flags & V4L2_BUF_FLAG_LAST) { -+ av_log(logger(ctx), AV_LOG_TRACE, "FLAG_LAST set\n"); -+ avbuf->status = V4L2BUF_IN_USE; // Avoid flushing this buffer -+ ctx_done(ctx); -+ } - #endif - } - -- avbuf = &ctx->buffers[buf.index]; -- avbuf->status = V4L2BUF_AVAILABLE; -- avbuf->buf = buf; -- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { -- memcpy(avbuf->planes, planes, sizeof(planes)); -- avbuf->buf.m.planes = avbuf->planes; -- } - return avbuf; - } - -@@ -429,8 +563,9 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) - } - - for (i = 0; i < ctx->num_buffers; i++) { -- if (ctx->buffers[i].status == V4L2BUF_AVAILABLE) -- return &ctx->buffers[i]; -+ V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -+ if (avbuf->status == V4L2BUF_AVAILABLE) -+ return avbuf; - } - - return NULL; -@@ -438,25 +573,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) - - static int v4l2_release_buffers(V4L2Context* ctx) - { -- struct v4l2_requestbuffers req = { -- .memory = V4L2_MEMORY_MMAP, -- .type = ctx->type, -- .count = 0, /* 0 -> unmaps buffers from the driver */ -- }; -- int i, j; -+ int i; -+ int ret = 0; -+ const int fd = ctx_to_m2mctx(ctx)->fd; - -- for (i = 0; i < ctx->num_buffers; i++) { -- V4L2Buffer *buffer = &ctx->buffers[i]; -+ // Orphan any buffers in the wild -+ ff_weak_link_break(&ctx->wl_master); -+ -+ if (ctx->bufrefs) { -+ for (i = 0; i < ctx->num_buffers; i++) -+ av_buffer_unref(ctx->bufrefs + i); -+ } -+ -+ if (fd != -1) { -+ struct v4l2_requestbuffers req = { -+ .memory = V4L2_MEMORY_MMAP, -+ .type = ctx->type, -+ .count = 0, /* 0 -> unmap all buffers from the driver */ -+ }; -+ -+ while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) { -+ if (errno == EINTR) -+ continue; -+ -+ ret = AVERROR(errno); - -- for (j = 0; j < buffer->num_planes; j++) { -- struct V4L2Plane_info *p = &buffer->plane_info[j]; -- if (p->mm_addr && p->length) -- if (munmap(p->mm_addr, p->length) < 0) -- av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno))); -+ av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n", -+ ctx->name, av_err2str(AVERROR(errno))); -+ -+ if (ctx_to_m2mctx(ctx)->output_drm) -+ av_log(logger(ctx), AV_LOG_ERROR, -+ "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n" -+ "for all buffers: \n" -+ " 1. drmModeRmFB(..)\n" -+ " 2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n"); - } - } -+ ctx->q_count = 0; - -- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req); -+ return ret; - } - - static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt) -@@ -485,6 +640,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm - - static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) - { -+ V4L2m2mContext* s = ctx_to_m2mctx(ctx); -+ V4L2m2mPriv *priv = s->avctx->priv_data; - enum AVPixelFormat pixfmt = ctx->av_pix_fmt; - struct v4l2_fmtdesc fdesc; - int ret; -@@ -503,6 +660,13 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) - if (ret) - return AVERROR(EINVAL); - -+ if (priv->pix_fmt != AV_PIX_FMT_NONE) { -+ if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) { -+ fdesc.index++; -+ continue; -+ } -+ } -+ - pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO); - ret = v4l2_try_raw_format(ctx, pixfmt); - if (ret){ -@@ -555,18 +719,73 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p) - * - *****************************************************************************/ - -+ -+static void flush_all_buffers_status(V4L2Context* const ctx) -+{ -+ int i; -+ for (i = 0; i < ctx->num_buffers; ++i) { -+ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; -+ if (buf->status == V4L2BUF_IN_DRIVER) -+ buf->status = V4L2BUF_AVAILABLE; -+ } -+ ctx->q_count = 0; -+} -+ -+static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx) -+{ -+ int i; -+ int rv; -+ -+ if (!ctx->bufrefs) { -+ rv = ff_v4l2_context_init(ctx); -+ if (rv) { -+ av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); -+ return rv; -+ } -+ } -+ -+ for (i = 0; i < ctx->num_buffers; ++i) { -+ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; -+ if (buf->status == V4L2BUF_AVAILABLE) { -+ rv = ff_v4l2_buffer_enqueue(buf); -+ if (rv < 0) -+ return rv; -+ } -+ } -+ return 0; -+} -+ - int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) - { - int type = ctx->type; - int ret; -+ AVCodecContext * const avctx = logger(ctx); -+ -+ ff_mutex_lock(&ctx->lock); -+ -+ if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type)) -+ stuff_all_buffers(avctx, ctx); - - ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type); -- if (ret < 0) -- return AVERROR(errno); -+ if (ret < 0) { -+ const int err = errno; -+ av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name, -+ cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err); -+ ret = AVERROR(err); -+ } -+ else -+ { -+ if (cmd == VIDIOC_STREAMOFF) -+ flush_all_buffers_status(ctx); - -- ctx->streamon = (cmd == VIDIOC_STREAMON); -+ ctx->streamon = (cmd == VIDIOC_STREAMON); -+ av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name, -+ cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF"); -+ } - -- return 0; -+ ff_mutex_unlock(&ctx->lock); -+ -+ return ret; - } - - int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) -@@ -594,7 +813,8 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) - return ff_v4l2_buffer_enqueue(avbuf); - } - --int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) -+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, -+ const void * extdata, size_t extlen, int no_rescale_pts) - { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); - V4L2Buffer* avbuf; -@@ -602,8 +822,9 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) - - if (!pkt->size) { - ret = v4l2_stop_decode(ctx); -+ // Log but otherwise ignore stop failure - if (ret) -- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name); -+ av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); - s->draining = 1; - return 0; - } -@@ -612,14 +833,14 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) - if (!avbuf) - return AVERROR(EAGAIN); - -- ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf); -+ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, no_rescale_pts); - if (ret) - return ret; - - return ff_v4l2_buffer_enqueue(avbuf); - } - --int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) -+int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout, int no_rescale_pts) - { - V4L2Buffer *avbuf; - -@@ -636,7 +857,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) - return AVERROR(EAGAIN); - } - -- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); -+ return ff_v4l2_buffer_buf_to_avframe(frame, avbuf, no_rescale_pts); - } - - int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) -@@ -695,54 +916,57 @@ void ff_v4l2_context_release(V4L2Context* ctx) - { - int ret; - -- if (!ctx->buffers) -+ if (!ctx->bufrefs) - return; - - ret = v4l2_release_buffers(ctx); - if (ret) - av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name); - -- av_freep(&ctx->buffers); -+ av_freep(&ctx->bufrefs); -+ av_buffer_unref(&ctx->frames_ref); -+ -+ ff_mutex_destroy(&ctx->lock); - } - --int ff_v4l2_context_init(V4L2Context* ctx) -+ -+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers) - { -- V4L2m2mContext *s = ctx_to_m2mctx(ctx); -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); - struct v4l2_requestbuffers req; -- int ret, i; -- -- if (!v4l2_type_supported(ctx)) { -- av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); -- return AVERROR_PATCHWELCOME; -- } -- -- ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); -- if (ret) -- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name); -+ int ret; -+ int i; - - memset(&req, 0, sizeof(req)); -- req.count = ctx->num_buffers; -+ req.count = req_buffers; - req.memory = V4L2_MEMORY_MMAP; - req.type = ctx->type; -- ret = ioctl(s->fd, VIDIOC_REQBUFS, &req); -- if (ret < 0) { -- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno)); -- return AVERROR(errno); -+ while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) { -+ if (errno != EINTR) { -+ ret = AVERROR(errno); -+ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret)); -+ return ret; -+ } - } - - ctx->num_buffers = req.count; -- ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer)); -- if (!ctx->buffers) { -+ ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs)); -+ if (!ctx->bufrefs) { - av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name); -- return AVERROR(ENOMEM); -+ goto fail_release; - } - -- for (i = 0; i < req.count; i++) { -- ctx->buffers[i].context = ctx; -- ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i); -- if (ret < 0) { -+ ctx->wl_master = ff_weak_link_new(ctx); -+ if (!ctx->wl_master) { -+ ret = AVERROR(ENOMEM); -+ goto fail_release; -+ } -+ -+ for (i = 0; i < ctx->num_buffers; i++) { -+ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx); -+ if (ret) { - av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret)); -- goto error; -+ goto fail_release; - } - } - -@@ -756,10 +980,62 @@ int ff_v4l2_context_init(V4L2Context* ctx) - - return 0; - --error: -+fail_release: - v4l2_release_buffers(ctx); -+ av_freep(&ctx->bufrefs); -+ return ret; -+} -+ -+int ff_v4l2_context_init(V4L2Context* ctx) -+{ -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -+ int ret; -+ -+ // It is not valid to reinit a context without a previous release -+ av_assert0(ctx->bufrefs == NULL); -+ -+ if (!v4l2_type_supported(ctx)) { -+ av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); -+ return AVERROR_PATCHWELCOME; -+ } -+ -+ ff_mutex_init(&ctx->lock, NULL); - -- av_freep(&ctx->buffers); -+ if (s->output_drm) { -+ AVHWFramesContext *hwframes; -+ -+ ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref); -+ if (!ctx->frames_ref) { -+ ret = AVERROR(ENOMEM); -+ goto fail_unlock; -+ } -+ -+ hwframes = (AVHWFramesContext*)ctx->frames_ref->data; -+ hwframes->format = AV_PIX_FMT_DRM_PRIME; -+ hwframes->sw_format = ctx->av_pix_fmt; -+ hwframes->width = ctx->width; -+ hwframes->height = ctx->height; -+ ret = av_hwframe_ctx_init(ctx->frames_ref); -+ if (ret < 0) -+ goto fail_unref_hwframes; -+ } -+ -+ ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); -+ if (ret) { -+ ret = AVERROR(errno); -+ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret)); -+ goto fail_unref_hwframes; -+ } -+ -+ ret = create_buffers(ctx, ctx->num_buffers); -+ if (ret < 0) -+ goto fail_unref_hwframes; -+ -+ return 0; - -+fail_unref_hwframes: -+ av_buffer_unref(&ctx->frames_ref); -+fail_unlock: -+ ff_mutex_destroy(&ctx->lock); - return ret; - } -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 6f7460c89a9d..59009d11d1e7 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -32,6 +32,8 @@ - #include "libavutil/rational.h" - #include "codec_id.h" - #include "packet.h" -+#include "libavutil/buffer.h" -+#include "libavutil/thread.h" - #include "v4l2_buffers.h" - - typedef struct V4L2Context { -@@ -71,11 +73,12 @@ typedef struct V4L2Context { - */ - int width, height; - AVRational sample_aspect_ratio; -+ struct v4l2_rect selection; - - /** -- * Indexed array of V4L2Buffers -+ * Indexed array of pointers to V4L2Buffers - */ -- V4L2Buffer *buffers; -+ AVBufferRef **bufrefs; - - /** - * Readonly after init. -@@ -93,6 +96,12 @@ typedef struct V4L2Context { - */ - int done; - -+ AVBufferRef *frames_ref; -+ int q_count; -+ int dq_count; -+ struct ff_weak_link_master *wl_master; -+ -+ AVMutex lock; - } V4L2Context; - - /** -@@ -157,9 +166,12 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); - * @param[in] ctx The V4L2Context to dequeue from. - * @param[inout] f The AVFrame to dequeue to. - * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) -+ * @param[in] no_rescale_pts (0 rescale pts, 1 use pts as -+ * timestamp directly) -+ * - * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. - */ --int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); -+int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout, int no_rescale_pts); - - /** - * Enqueues a buffer to a V4L2Context from an AVPacket -@@ -171,7 +183,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); - * @param[in] pkt A pointer to an AVPacket. - * @return 0 in case of success, a negative error otherwise. - */ --int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); -+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size, int no_rescale_pts); - - /** - * Enqueues a buffer to a V4L2Context from an AVFrame -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index 602efb7a1605..516e6d98583d 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -216,13 +216,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) - av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n"); - - /* 2. unmap the capture buffers (v4l2 and ffmpeg): -- * we must wait for all references to be released before being allowed -- * to queue new buffers. - */ -- av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n"); -- if (atomic_load(&s->refcount)) -- while(sem_wait(&s->refsync) == -1 && errno == EINTR); -- - ff_v4l2_context_release(&s->capture); - - /* 3. get the new capture format */ -@@ -259,6 +253,8 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context) - av_frame_free(&s->frame); - av_packet_unref(&s->buf_pkt); - -+ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n"); -+ - av_free(s); - } - -@@ -270,6 +266,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) - if (!s) - return 0; - -+ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n"); -+ -+ if (av_codec_is_decoder(s->avctx->codec)) -+ av_packet_unref(&s->buf_pkt); -+ - if (s->fd >= 0) { - ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF); - if (ret) -@@ -282,7 +283,14 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) - - ff_v4l2_context_release(&s->output); - -+ close(s->fd); -+ s->fd = -1; -+ - s->self_ref = NULL; -+ // This is only called on avctx close so after this point we don't have that -+ // Crash sooner if we find we are using it (can still log with avctx = NULL) -+ s->avctx = NULL; -+ priv->context = NULL; - av_buffer_unref(&priv->context_ref); - - return 0; -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 04d86d7b9222..24a9c9486468 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -30,6 +30,7 @@ - #include - - #include "libavcodec/avcodec.h" -+#include "libavutil/pixfmt.h" - #include "v4l2_context.h" - - #define container_of(ptr, type, member) ({ \ -@@ -40,6 +41,17 @@ - { "num_output_buffers", "Number of buffers in the output context",\ - OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS } - -+#define FF_V4L2_M2M_TRACK_SIZE 128 -+typedef struct V4L2m2mTrackEl { -+ int discard; // If we see this buffer its been flushed, so discard -+ int pkt_size; -+ int64_t pts; -+ int64_t reordered_opaque; -+ int64_t pkt_pos; -+ int64_t pkt_duration; -+ int64_t track_pts; -+} V4L2m2mTrackEl; -+ - typedef struct V4L2m2mContext { - char devname[PATH_MAX]; - int fd; -@@ -53,6 +65,7 @@ typedef struct V4L2m2mContext { - sem_t refsync; - atomic_uint refcount; - int reinit; -+ int resize_pending; - - /* null frame/packet received */ - int draining; -@@ -66,6 +79,23 @@ typedef struct V4L2m2mContext { - - /* reference back to V4L2m2mPriv */ - void *priv; -+ -+ AVBufferRef *device_ref; -+ -+ /* generate DRM frames */ -+ int output_drm; -+ -+ /* Frame tracking */ -+ int64_t last_pkt_dts; -+ int64_t last_opaque; -+ unsigned int track_no; -+ V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; -+ -+ /* req pkt */ -+ int req_pkt; -+ -+ /* Ext data sent */ -+ int extdata_sent; - } V4L2m2mContext; - - typedef struct V4L2m2mPriv { -@@ -76,6 +106,7 @@ typedef struct V4L2m2mPriv { - - int num_output_buffers; - int num_capture_buffers; -+ enum AVPixelFormat pix_fmt; - } V4L2m2mPriv; - - /** -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 4944d0851198..7f6033ac2c41 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -23,6 +23,10 @@ - - #include - #include -+ -+#include "libavutil/avassert.h" -+#include "libavutil/hwcontext.h" -+#include "libavutil/hwcontext_drm.h" - #include "libavutil/pixfmt.h" - #include "libavutil/pixdesc.h" - #include "libavutil/opt.h" -@@ -30,26 +34,51 @@ - #include "codec_internal.h" - #include "libavcodec/decode.h" - -+#include "libavcodec/hwaccels.h" -+#include "libavcodec/internal.h" -+#include "libavcodec/hwconfig.h" -+ - #include "v4l2_context.h" - #include "v4l2_m2m.h" - #include "v4l2_fmt.h" - -+static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s) -+{ -+ int ret; -+ struct v4l2_decoder_cmd cmd = { -+ .cmd = V4L2_DEC_CMD_START, -+ .flags = 0, -+ }; -+ -+ if (s->output.streamon) -+ return 0; -+ -+ ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON); -+ if (ret < 0) -+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context\n"); -+ -+ if (!s->capture.streamon || ret < 0) -+ return ret; -+ -+ ret = ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd); -+ if (ret < 0) -+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno); -+ else -+ av_log(avctx, AV_LOG_DEBUG, "VIDIOC_DECODER_CMD start OK\n"); -+ -+ return ret; -+} -+ - static int v4l2_try_start(AVCodecContext *avctx) - { - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const capture = &s->capture; -- V4L2Context *const output = &s->output; - struct v4l2_selection selection = { 0 }; - int ret; - - /* 1. start the output process */ -- if (!output->streamon) { -- ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON); -- if (ret < 0) { -- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n"); -- return ret; -- } -- } -+ if ((ret = check_output_streamon(avctx, s)) != 0) -+ return ret; - - if (capture->streamon) - return 0; -@@ -63,15 +92,29 @@ static int v4l2_try_start(AVCodecContext *avctx) - } - - /* 2.1 update the AVCodecContext */ -- avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); -- capture->av_pix_fmt = avctx->pix_fmt; -+ capture->av_pix_fmt = -+ ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); -+ if (s->output_drm) { -+ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; -+ avctx->sw_pix_fmt = capture->av_pix_fmt; -+ } -+ else -+ avctx->pix_fmt = capture->av_pix_fmt; - - /* 3. set the crop parameters */ -+#if 1 -+ selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ selection.target = V4L2_SEL_TGT_CROP_DEFAULT; -+ ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); -+ av_log(avctx, AV_LOG_INFO, "Post G selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height); -+#else - selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - selection.r.height = avctx->coded_height; - selection.r.width = avctx->coded_width; -+ av_log(avctx, AV_LOG_INFO, "Try selection %dx%d\n", avctx->coded_width, avctx->coded_height); - ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection); -- if (!ret) { -+ av_log(avctx, AV_LOG_INFO, "Post S selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height); -+ if (1) { - ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); - if (ret) { - av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n"); -@@ -82,15 +125,7 @@ static int v4l2_try_start(AVCodecContext *avctx) - capture->width = selection.r.width; - } - } -- -- /* 4. init the capture context now that we have the capture format */ -- if (!capture->buffers) { -- ret = ff_v4l2_context_init(capture); -- if (ret) { -- av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); -- return AVERROR(ENOMEM); -- } -- } -+#endif - - /* 5. start the capture process */ - ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); -@@ -133,50 +168,287 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) - return 0; - } - --static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) -+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) -+{ -+ return (int64_t)n; -+} -+ -+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) -+{ -+ return (unsigned int)pts; -+} -+ -+// FFmpeg requires us to propagate a number of vars from the coded pkt into -+// the decoded frame. The only thing that tracks like that in V4L2 stateful -+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no -+// guarantees about PTS being unique or specified for every frame so replace -+// the supplied PTS with a simple incrementing number and keep a circular -+// buffer of all the things we want preserved (including the original PTS) -+// indexed by the tracking no. -+static void -+xlat_pts_in(AVCodecContext *const avctx, V4L2m2mContext *const s, AVPacket *const avpkt) -+{ -+ int64_t track_pts; -+ -+ // Avoid 0 -+ if (++s->track_no == 0) -+ s->track_no = 1; -+ -+ track_pts = track_to_pts(avctx, s->track_no); -+ -+ av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, s->track_no); -+ s->last_pkt_dts = avpkt->dts; -+ s->track_els[s->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ -+ .discard = 0, -+ .pkt_size = avpkt->size, -+ .pts = avpkt->pts, -+ .reordered_opaque = avctx->reordered_opaque, -+ .pkt_pos = avpkt->pos, -+ .pkt_duration = avpkt->duration, -+ .track_pts = track_pts -+ }; -+ avpkt->pts = track_pts; -+} -+ -+// Returns -1 if we should discard the frame -+static int -+xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *const frame) -+{ -+ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; -+ const V4L2m2mTrackEl *const t = s->track_els + n; -+ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) -+ { -+ av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -+ frame->pts = AV_NOPTS_VALUE; -+ frame->pkt_dts = s->last_pkt_dts; -+ frame->reordered_opaque = s->last_opaque; -+ frame->pkt_pos = -1; -+ frame->pkt_duration = 0; -+ frame->pkt_size = -1; -+ } -+ else if (!t->discard) -+ { -+ frame->pts = t->pts; -+ frame->pkt_dts = s->last_pkt_dts; -+ frame->reordered_opaque = t->reordered_opaque; -+ frame->pkt_pos = t->pkt_pos; -+ frame->pkt_duration = t->pkt_duration; -+ frame->pkt_size = t->pkt_size; -+ -+ s->last_opaque = s->track_els[n].reordered_opaque; -+ s->track_els[n].pts = AV_NOPTS_VALUE; // If we hit this again deny accurate knowledge of PTS -+ } -+ else -+ { -+ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -+ return -1; -+ } -+ -+ frame->best_effort_timestamp = frame->pts; -+ frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? -+ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 ", DTS=%" PRId64 "\n", frame->pts, frame->pkt_dts); -+ return 0; -+} -+ -+static inline int stream_started(const V4L2m2mContext * const s) { -+ return s->capture.streamon && s->output.streamon; -+} -+ -+#define NQ_OK 0 -+#define NQ_Q_FULL 1 -+#define NQ_SRC_EMPTY 2 -+#define NQ_DRAINING 3 -+#define NQ_DEAD 4 -+ -+#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING) -+ -+// AVERROR_EOF Flushing an already flushed stream -+// -ve Error (all errors except EOF are unexpected) -+// NQ_OK (0) OK -+// NQ_Q_FULL Dst full (retry if we think V4L2 Q has space now) -+// NQ_SRC_EMPTY Src empty (do not retry) -+// NQ_DRAINING At EOS, dQ dest until EOS there too -+// NQ_DEAD Not running (do not retry, do not attempt capture dQ) -+ -+static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s) - { -- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; -- V4L2Context *const capture = &s->capture; -- V4L2Context *const output = &s->output; - int ret; - -+ // If we don't already have a coded packet - get a new one -+ // We will already have a coded pkt if the output Q was full last time we -+ // tried to Q it - if (!s->buf_pkt.size) { - ret = ff_decode_get_packet(avctx, &s->buf_pkt); -+ -+ if (ret == AVERROR(EAGAIN)) { -+ if (!stream_started(s)) { -+ av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__); -+ return NQ_DEAD; -+ } -+ return NQ_SRC_EMPTY; -+ } -+ -+ if (ret == AVERROR_EOF) { -+ // EOF - enter drain mode -+ av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n", -+ ret, s->buf_pkt.size, stream_started(s), s->draining); -+ if (!stream_started(s)) { -+ av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n"); -+ s->draining = 1; -+ s->capture.done = 1; -+ return AVERROR_EOF; -+ } -+ -+ if (!s->draining) { -+ // Calling enqueue with an empty pkt starts drain -+ av_assert0(s->buf_pkt.size == 0); -+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0, 1); -+ if (ret) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret); -+ return ret; -+ } -+ } -+ return NQ_DRAINING; -+ } -+ - if (ret < 0) { -- if (ret == AVERROR(EAGAIN)) -- return ff_v4l2_context_dequeue_frame(capture, frame, 0); -- else if (ret != AVERROR_EOF) -- return ret; -+ av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret); -+ return ret; - } -+ -+ xlat_pts_in(avctx, s, &s->buf_pkt); - } - -- if (s->draining) -- goto dequeue; -+ if ((ret = check_output_streamon(avctx, s)) != 0) -+ return ret; - -- ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt); -- if (ret < 0 && ret != AVERROR(EAGAIN)) -- goto fail; -+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, -+ avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size, -+ 1); - -- /* if EAGAIN don't unref packet and try to enqueue in the next iteration */ -- if (ret != AVERROR(EAGAIN)) -+ if (ret == AVERROR(EAGAIN)) { -+ // Out of input buffers - keep packet -+ ret = NQ_Q_FULL; -+ } -+ else { -+ // In all other cases we are done with this packet - av_packet_unref(&s->buf_pkt); -+ s->extdata_sent = 1; - -- if (!s->draining) { -- ret = v4l2_try_start(avctx); - if (ret) { -- /* cant recover */ -- if (ret != AVERROR(ENOMEM)) -- ret = 0; -- goto fail; -+ av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret); -+ return ret; -+ } -+ } -+ -+ // Start if we haven't -+ { -+ const int ret2 = v4l2_try_start(avctx); -+ if (ret2) { -+ av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2); -+ ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD; -+ } -+ } -+ -+ return ret; -+} -+ -+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) -+{ -+ V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; -+ int src_rv; -+ int dst_rv = 1; // Non-zero (done), non-negative (error) number -+ -+ do { -+ src_rv = try_enqueue_src(avctx, s); -+ -+ // If we got a frame last time and we have nothing to enqueue then -+ // return now. rv will be AVERROR(EAGAIN) indicating that we want more input -+ // This should mean that once decode starts we enter a stable state where -+ // we alternately ask for input and produce output -+ if (s->req_pkt && src_rv == NQ_SRC_EMPTY) -+ break; -+ -+ if (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) { -+ av_log(avctx, AV_LOG_WARNING, "Poll says src Q has space but enqueue fail"); -+ src_rv = NQ_SRC_EMPTY; // If we can't enqueue pretend that there is nothing to enqueue -+ } -+ -+ // Try to get a new frame if -+ // (a) we haven't already got one AND -+ // (b) enqueue returned a status indicating that decode should be attempted -+ if (dst_rv != 0 && TRY_DQ(src_rv)) { -+ do { -+ // Dequeue frame will unref any previous contents of frame -+ // if it returns success so we don't need an explicit unref -+ // when discarding -+ // This returns AVERROR(EAGAIN) if there isn't a frame ready yet -+ // but there is room in the input Q -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, -1, 1); -+ -+ if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) -+ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", -+ s->draining, s->capture.done); -+ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) -+ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", -+ s->draining, s->capture.done, dst_rv); -+ -+ // Go again if we got a frame that we need to discard -+ } while (dst_rv == 0 && xlat_pts_out(avctx, s, frame)); -+ } -+ -+ // Continue trying to enqueue packets if either -+ // (a) we succeeded last time OR -+ // (b) enqueue failed due to input Q full AND there is now room -+ } while (src_rv == NQ_OK || (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) ); -+ -+ // Ensure that the frame contains nothing if we aren't returning a frame -+ // (might happen when discarding) -+ if (dst_rv) -+ av_frame_unref(frame); -+ -+ // If we got a frame this time ask for a pkt next time -+ s->req_pkt = (dst_rv == 0); -+ -+#if 0 -+ if (dst_rv == 0) -+ { -+ static int z = 0; -+ if (++z > 50) { -+ av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n"); -+ ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); -+ return -1; - } - } -+#endif -+ -+ return dst_rv == 0 ? 0 : -+ src_rv < 0 ? src_rv : -+ dst_rv < 0 ? dst_rv : -+ AVERROR(EAGAIN); -+} -+ -+#if 0 -+#include -+static int64_t us_time(void) -+{ -+ struct timespec ts; -+ clock_gettime(CLOCK_MONOTONIC, &ts); -+ return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; -+} - --dequeue: -- return ff_v4l2_context_dequeue_frame(capture, frame, -1); --fail: -- av_packet_unref(&s->buf_pkt); -+static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) -+{ -+ int ret; -+ const int64_t now = us_time(); -+ int64_t done; -+ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); -+ ret = v4l2_receive_frame2(avctx, frame); -+ done = us_time(); -+ av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret); - return ret; - } -+#endif - - static av_cold int v4l2_decode_init(AVCodecContext *avctx) - { -@@ -185,6 +457,9 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - V4L2m2mPriv *priv = avctx->priv_data; - int ret; - -+ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); -+ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; -+ - ret = ff_v4l2_m2m_create_context(priv, &s); - if (ret < 0) - return ret; -@@ -205,6 +480,28 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; - capture->av_pix_fmt = avctx->pix_fmt; - -+ /* the client requests the codec to generate DRM frames: -+ * - data[0] will therefore point to the returned AVDRMFrameDescriptor -+ * check the ff_v4l2_buffer_to_avframe conversion function. -+ * - the DRM frame format is passed in the DRM frame descriptor layer. -+ * check the v4l2_get_drm_frame function. -+ */ -+ switch (ff_get_format(avctx, avctx->codec->pix_fmts)) { -+ default: -+ s->output_drm = 1; -+ break; -+ } -+ -+ s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); -+ if (!s->device_ref) { -+ ret = AVERROR(ENOMEM); -+ return ret; -+ } -+ -+ ret = av_hwdevice_ctx_init(s->device_ref); -+ if (ret < 0) -+ return ret; -+ - s->avctx = avctx; - ret = ff_v4l2_m2m_codec_init(priv); - if (ret) { -@@ -217,7 +514,53 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - - static av_cold int v4l2_decode_close(AVCodecContext *avctx) - { -- return ff_v4l2_m2m_codec_end(avctx->priv_data); -+ int rv; -+ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); -+ rv = ff_v4l2_m2m_codec_end(avctx->priv_data); -+ av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv); -+ return rv; -+} -+ -+static void v4l2_decode_flush(AVCodecContext *avctx) -+{ -+ // An alternatve and more drastic form of flush is to simply do this: -+ // v4l2_decode_close(avctx); -+ // v4l2_decode_init(avctx); -+ // The downside is that this keeps a decoder open until all the frames -+ // associated with it have been returned. This is a bit wasteful on -+ // possibly limited h/w resources and fails on a Pi for this reason unless -+ // more GPU mem is allocated than is the default. -+ -+ V4L2m2mPriv * const priv = avctx->priv_data; -+ V4L2m2mContext * const s = priv->context; -+ V4L2Context * const output = &s->output; -+ V4L2Context * const capture = &s->capture; -+ int ret, i; -+ -+ av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon); -+ -+ // Reflushing everything is benign, quick and avoids having to worry about -+ // states like EOS processing so don't try to optimize out (having got it -+ // wrong once) -+ -+ ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF); -+ if (ret < 0) -+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s error: %d\n", output->name, ret); -+ -+ // V4L2 makes no guarantees about whether decoded frames are flushed or not -+ // so mark all frames we are tracking to be discarded if they appear -+ for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) -+ s->track_els[i].discard = 1; -+ -+ // resend extradata -+ s->extdata_sent = 0; -+ // clear EOS status vars -+ s->draining = 0; -+ output->done = 0; -+ capture->done = 0; -+ -+ // Stream on will occur when we actually submit a new frame -+ av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__); - } - - #define OFFSET(x) offsetof(V4L2m2mPriv, x) -@@ -227,9 +570,15 @@ static const AVOption options[] = { - V4L_M2M_DEFAULT_OPTS, - { "num_capture_buffers", "Number of buffers in the capture context", - OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS }, -+ { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS }, - { NULL}, - }; - -+static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = { -+ HW_CONFIG_INTERNAL(DRM_PRIME), -+ NULL -+}; -+ - #define M2MDEC_CLASS(NAME) \ - static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \ - .class_name = #NAME "_v4l2m2m_decoder", \ -@@ -250,11 +599,16 @@ static const AVOption options[] = { - .init = v4l2_decode_init, \ - FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \ - .close = v4l2_decode_close, \ -+ .flush = v4l2_decode_flush, \ - .bsfs = bsf_name, \ - .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \ - .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \ - FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \ - .p.wrapper_name = "v4l2m2m", \ -+ .p.pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \ -+ AV_PIX_FMT_NV12, \ -+ AV_PIX_FMT_NONE}, \ -+ .hw_configs = v4l2_m2m_hw_configs, \ - } - - M2MDEC(h264, "H.264", AV_CODEC_ID_H264, "h264_mp4toannexb"); - -From 8a5fcd215d3ac07df4807d207e8a337edccffaeb Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 10 Jun 2021 18:46:21 +0100 -Subject: [PATCH 017/186] Fix crash in hw_device_default_name if type not found - (NONE) - ---- - fftools/ffmpeg_hw.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/fftools/ffmpeg_hw.c b/fftools/ffmpeg_hw.c -index 88fa7824701e..740a5e7153cf 100644 ---- a/fftools/ffmpeg_hw.c -+++ b/fftools/ffmpeg_hw.c -@@ -75,6 +75,8 @@ static char *hw_device_default_name(enum AVHWDeviceType type) - char *name; - size_t index_pos; - int index, index_limit = 1000; -+ if (!type_name) -+ return NULL; - index_pos = strlen(type_name); - name = av_malloc(index_pos + 4); - if (!name) - -From 35c7187e199e4042d30165c015f525bfcb377796 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 10 Jun 2021 18:59:18 +0100 -Subject: [PATCH 018/186] Allow v4l2m2m to select non-drm_prime output formats - ---- - libavcodec/v4l2_buffers.c | 2 +- - libavcodec/v4l2_m2m_dec.c | 14 ++++++++++---- - 2 files changed, 11 insertions(+), 5 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index a003934ca19e..1ca1128db6f4 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -524,7 +524,7 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - offset += dst_stride * out->context->height; - } - if (offset > out->plane_info[0].length) { -- av_log(NULL, AV_LOG_ERROR, "%s: Plane total %d > buffer size %d\n", __func__, offset, out->plane_info[0].length); -+ av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length); - return -1; - } - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 7f6033ac2c41..a4b5a4e7e991 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -455,10 +455,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - V4L2Context *capture, *output; - V4L2m2mContext *s; - V4L2m2mPriv *priv = avctx->priv_data; -+ int gf_pix_fmt; - int ret; - - av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); -- avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; - - ret = ff_v4l2_m2m_create_context(priv, &s); - if (ret < 0) -@@ -486,10 +486,15 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - * - the DRM frame format is passed in the DRM frame descriptor layer. - * check the v4l2_get_drm_frame function. - */ -- switch (ff_get_format(avctx, avctx->codec->pix_fmts)) { -- default: -+ -+ gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); -+ av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s); get_format requested=%d (%s)\n", -+ avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); -+ -+ s->output_drm = 0; -+ if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { -+ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; - s->output_drm = 1; -- break; - } - - s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); -@@ -607,6 +612,7 @@ static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = { - .p.wrapper_name = "v4l2m2m", \ - .p.pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \ - AV_PIX_FMT_NV12, \ -+ AV_PIX_FMT_YUV420P, \ - AV_PIX_FMT_NONE}, \ - .hw_configs = v4l2_m2m_hw_configs, \ - } - -From d5bfb5014aa4692820903ef7287bc0319ebc139f Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 10 Jun 2021 18:59:38 +0100 -Subject: [PATCH 019/186] Fix YUV420P output from v4l2m2m - -Also put get_width get_height inlines in header as they are generally -useful. ---- - libavcodec/v4l2_buffers.c | 12 ++++++------ - libavcodec/v4l2_context.c | 22 ++++++---------------- - libavcodec/v4l2_m2m.h | 12 ++++++++++++ - 3 files changed, 24 insertions(+), 22 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 1ca1128db6f4..f4c11ca8d06d 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -425,17 +425,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) - case AV_PIX_FMT_NV21: - if (avbuf->num_planes > 1) - break; -- frame->linesize[1] = avbuf->plane_info[0].bytesperline; -- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; -+ frame->linesize[1] = frame->linesize[0]; -+ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); - break; - - case AV_PIX_FMT_YUV420P: - if (avbuf->num_planes > 1) - break; -- frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1; -- frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1; -- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; -- frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2); -+ frame->linesize[1] = frame->linesize[0] / 2; -+ frame->linesize[2] = frame->linesize[1]; -+ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); -+ frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2; - break; - - default: -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index be76068af32d..6fe258662786 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -55,16 +55,6 @@ static inline AVCodecContext *logger(V4L2Context *ctx) - return ctx_to_m2mctx(ctx)->avctx; - } - --static inline unsigned int v4l2_get_width(struct v4l2_format *fmt) --{ -- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; --} -- --static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) --{ -- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; --} -- - static AVRational v4l2_get_sar(V4L2Context *ctx) - { - struct AVRational sar = { 0, 1 }; -@@ -96,8 +86,8 @@ static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2 - if (ret) - av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n", - ctx->name, -- v4l2_get_width(fmt1), v4l2_get_height(fmt1), -- v4l2_get_width(fmt2), v4l2_get_height(fmt2)); -+ ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1), -+ ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2)); - - return ret; - } -@@ -195,8 +185,8 @@ static int do_source_change(V4L2m2mContext * const s) - - reinit = v4l2_resolution_changed(&s->capture, &cap_fmt); - if (reinit) { -- s->capture.height = v4l2_get_height(&cap_fmt); -- s->capture.width = v4l2_get_width(&cap_fmt); -+ s->capture.height = ff_v4l2_get_format_height(&cap_fmt); -+ s->capture.width = ff_v4l2_get_format_width(&cap_fmt); - } - s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); - -@@ -973,8 +963,8 @@ static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers - av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name, - V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat), - req.count, -- v4l2_get_width(&ctx->format), -- v4l2_get_height(&ctx->format), -+ ff_v4l2_get_format_width(&ctx->format), -+ ff_v4l2_get_format_height(&ctx->format), - V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage, - V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline); - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 24a9c9486468..8f054f2f50f9 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -160,4 +160,16 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx); - */ - int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx); - -+ -+static inline unsigned int ff_v4l2_get_format_width(struct v4l2_format *fmt) -+{ -+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; -+} -+ -+static inline unsigned int ff_v4l2_get_format_height(struct v4l2_format *fmt) -+{ -+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; -+} -+ -+ - #endif /* AVCODEC_V4L2_M2M_H */ - -From 43b65c3e3d1c8e2c35694764b7ee93e7dbf75a1a Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 10 Jun 2021 19:23:44 +0100 -Subject: [PATCH 020/186] Report buffer overflows in v4l2m2m - ---- - libavcodec/v4l2_buffers.c | 14 ++++++++++---- - libavcodec/v4l2_context.c | 5 ++++- - 2 files changed, 14 insertions(+), 5 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index f4c11ca8d06d..de31f7ced93c 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -364,6 +364,7 @@ static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) - static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset) - { - unsigned int bytesused, length; -+ int rv = 0; - - if (plane >= out->num_planes) - return AVERROR(EINVAL); -@@ -371,11 +372,16 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i - length = out->plane_info[plane].length; - bytesused = FFMIN(size+offset, length); - -- memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset)); -+ if (size > length - offset) { -+ size = length - offset; -+ rv = AVERROR(ENOMEM); -+ } -+ -+ memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size); - - set_buf_length(out, plane, bytesused, length); - -- return 0; -+ return rv; - } - - static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf) -@@ -630,7 +636,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, - } - - ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen); -- if (ret) -+ if (ret && ret != AVERROR(ENOMEM)) - return ret; - - v4l2_set_pts(out, pkt->pts, no_rescale_pts); -@@ -638,7 +644,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, - if (pkt->flags & AV_PKT_FLAG_KEY) - out->flags = V4L2_BUF_FLAG_KEYFRAME; - -- return 0; -+ return ret; - } - - int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 6fe258662786..81aced0c2b5d 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -824,7 +824,10 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, - return AVERROR(EAGAIN); - - ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, no_rescale_pts); -- if (ret) -+ if (ret == AVERROR(ENOMEM)) -+ av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", -+ __func__, pkt->size, avbuf->planes[0].length); -+ else if (ret) - return ret; - - return ff_v4l2_buffer_enqueue(avbuf); - -From b02c14a2e1f9890370eb9d459feccacb7e652e82 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 14 Jun 2021 11:55:16 +0100 -Subject: [PATCH 021/186] Increase V4L2 H264 stateful coded buffer size - -Try to set a min size of frame size / 2 for bitbuffers passed to V4l2. -This fixes a few streams that have large I-frames. You would hope -Annex-A gave useful minCR so an appropriate size could be calculated -but it doesn't really. It gives good guidance for bits required over -time but the instantaneous limits are very weak so it is possible -that even this won't be enough. The correct long term solution would -be to have resizable dmabufs but that is a greter rewrite than seems -sensible now. ---- - libavcodec/v4l2_context.c | 24 +++++++++++++++++++++++- - libavcodec/v4l2_context.h | 6 ++++++ - libavcodec/v4l2_m2m_dec.c | 24 ++++++++++++++++++++++++ - 3 files changed, 53 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 81aced0c2b5d..a17ae027a666 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -902,7 +902,29 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) - - int ff_v4l2_context_set_format(V4L2Context* ctx) - { -- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); -+ int ret; -+ -+ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); -+ if (ret != 0) -+ return ret; -+ -+ // Check returned size against min size and if smaller have another go -+ // Only worry about plane[0] as this is meant to enforce limits for -+ // encoded streams where we might know a bit more about the shape -+ // than the driver -+ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) { -+ if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage) -+ return 0; -+ ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size; -+ } -+ else { -+ if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage) -+ return 0; -+ ctx->format.fmt.pix.sizeimage = ctx->min_buf_size; -+ } -+ -+ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); -+ return ret; - } - - void ff_v4l2_context_release(V4L2Context* ctx) -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 59009d11d1e7..37b0431400d8 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -75,6 +75,12 @@ typedef struct V4L2Context { - AVRational sample_aspect_ratio; - struct v4l2_rect selection; - -+ /** -+ * If the default size of buffer is less than this then try to -+ * set to this. -+ */ -+ uint32_t min_buf_size; -+ - /** - * Indexed array of pointers to V4L2Buffers - */ -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index a4b5a4e7e991..1851acbc93fe 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -450,6 +450,27 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - } - #endif - -+static uint32_t max_coded_size(const AVCodecContext * const avctx) -+{ -+ uint32_t wxh = avctx->coded_width * avctx->coded_height; -+ uint32_t size; -+ -+ // Currently the only thing we try to set our own limits for is H264 -+ if (avctx->codec_id != AV_CODEC_ID_H264) -+ return 0; -+ -+ size = wxh * 3 / 2; -+ // H.264 Annex A table A-1 gives minCR which is either 2 or 4 -+ // unfortunately that doesn't yield an actually useful limit -+ // and it should be noted that frame 0 is special cased to allow -+ // a bigger number which really isn't helpful for us. So just pick -+ // frame_size / 2 -+ size /= 2; -+ // Add 64k to allow for any overheads and/or encoder hopefulness -+ // with small WxH -+ return size + (1 << 16); -+} -+ - static av_cold int v4l2_decode_init(AVCodecContext *avctx) - { - V4L2Context *capture, *output; -@@ -460,6 +481,7 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - - av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); - -+ av_log(avctx, AV_LOG_INFO, "level=%d\n", avctx->level); - ret = ff_v4l2_m2m_create_context(priv, &s); - if (ret < 0) - return ret; -@@ -476,9 +498,11 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - - output->av_codec_id = avctx->codec_id; - output->av_pix_fmt = AV_PIX_FMT_NONE; -+ output->min_buf_size = max_coded_size(avctx); - - capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; - capture->av_pix_fmt = avctx->pix_fmt; -+ capture->min_buf_size = 0; - - /* the client requests the codec to generate DRM frames: - * - data[0] will therefore point to the returned AVDRMFrameDescriptor - -From 1de32953e7f28a262ecb4727eaac0cfd3588379e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 28 Jun 2021 12:13:35 +0100 -Subject: [PATCH 022/186] Fix raw video s.t. it respects any remaining cropping - -This fixes the long standing CONFWIN_A conformance test failure for drm. ---- - libavcodec/rawenc.c | 32 ++++++++--- - libavutil/hwcontext_drm.c | 112 ++++++++++++++++++++++++++++++++++++-- - 2 files changed, 130 insertions(+), 14 deletions(-) - -diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c -index 594a77c42a64..8ca0379e1219 100644 ---- a/libavcodec/rawenc.c -+++ b/libavcodec/rawenc.c -@@ -124,32 +124,41 @@ static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, - - - static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, -- const AVFrame *frame, int *got_packet) -+ const AVFrame *src_frame, int *got_packet) - { - int ret; -+ AVFrame * frame = NULL; - - #if CONFIG_SAND -- if (av_rpi_is_sand_frame(frame)) { -- ret = av_rpi_is_sand8_frame(frame) ? raw_sand8_as_yuv420(avctx, pkt, frame) : -- av_rpi_is_sand16_frame(frame) ? raw_sand16_as_yuv420(avctx, pkt, frame) : -- av_rpi_is_sand30_frame(frame) ? raw_sand30_as_yuv420(avctx, pkt, frame) : -1; -+ if (av_rpi_is_sand_frame(src_frame)) { -+ ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) : -+ av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) : -+ av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1; - *got_packet = (ret == 0); - return ret; - } - #endif - -+ if ((frame = av_frame_clone(src_frame)) == NULL) { -+ ret = AVERROR(ENOMEM); -+ goto fail; -+ } -+ -+ if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0) -+ goto fail; -+ - ret = av_image_get_buffer_size(frame->format, - frame->width, frame->height, 1); - if (ret < 0) -- return ret; -+ goto fail; - - if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0) -- return ret; -+ goto fail; - if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size, - (const uint8_t **)frame->data, frame->linesize, - frame->format, - frame->width, frame->height, 1)) < 0) -- return ret; -+ goto fail; - - if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 && - frame->format == AV_PIX_FMT_YUYV422) { -@@ -165,8 +174,15 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, - AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16); - } - } -+ pkt->flags |= AV_PKT_FLAG_KEY; -+ av_frame_free(&frame); - *got_packet = 1; - return 0; -+ -+fail: -+ av_frame_free(&frame); -+ *got_packet = 0; -+ return ret; - } - - const FFCodec ff_rawvideo_encoder = { -diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c -index 7a9fdbd263d4..baf18920fa14 100644 ---- a/libavutil/hwcontext_drm.c -+++ b/libavutil/hwcontext_drm.c -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - - /* This was introduced in version 4.6. And may not exist all without an - * optional package. So to prevent a hard dependency on needing the Linux -@@ -31,6 +32,7 @@ - #endif - - #include -+#include - #include - - #include "avassert.h" -@@ -38,7 +40,9 @@ - #include "hwcontext_drm.h" - #include "hwcontext_internal.h" - #include "imgutils.h" -- -+#if CONFIG_SAND -+#include "libavutil/rpi_sand_fns.h" -+#endif - - static void drm_device_free(AVHWDeviceContext *hwdev) - { -@@ -53,6 +57,11 @@ static int drm_device_create(AVHWDeviceContext *hwdev, const char *device, - AVDRMDeviceContext *hwctx = hwdev->hwctx; - drmVersionPtr version; - -+ if (device == NULL) { -+ hwctx->fd = -1; -+ return 0; -+ } -+ - hwctx->fd = open(device, O_RDWR); - if (hwctx->fd < 0) - return AVERROR(errno); -@@ -139,6 +148,8 @@ static int drm_map_frame(AVHWFramesContext *hwfc, - if (flags & AV_HWFRAME_MAP_WRITE) - mmap_prot |= PROT_WRITE; - -+ if (dst->format == AV_PIX_FMT_NONE) -+ dst->format = hwfc->sw_format; - #if HAVE_LINUX_DMA_BUF_H - if (flags & AV_HWFRAME_MAP_READ) - map->sync_flags |= DMA_BUF_SYNC_READ; -@@ -185,6 +196,23 @@ static int drm_map_frame(AVHWFramesContext *hwfc, - - dst->width = src->width; - dst->height = src->height; -+ dst->crop_top = src->crop_top; -+ dst->crop_bottom = src->crop_bottom; -+ dst->crop_left = src->crop_left; -+ dst->crop_right = src->crop_right; -+ -+#if CONFIG_SAND -+ // Rework for sand frames -+ if (av_rpi_is_sand_frame(dst)) { -+ // As it stands the sand formats hold stride2 in linesize[3] -+ // linesize[0] & [1] contain stride1 which is always 128 for everything we do -+ // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1] -+ dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier); -+ dst->linesize[0] = 128; -+ dst->linesize[1] = 128; -+ // *** Are we sure src->height is actually what we want ??? -+ } -+#endif - - err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, - &drm_unmap_frame, map); -@@ -212,7 +240,15 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, - if (!pix_fmts) - return AVERROR(ENOMEM); - -- pix_fmts[0] = ctx->sw_format; -+ // **** Offer native sand too ???? -+ pix_fmts[0] = -+#if CONFIG_SAND -+ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ? -+ AV_PIX_FMT_YUV420P : -+ ctx->sw_format == AV_PIX_FMT_RPI4_10 ? -+ AV_PIX_FMT_YUV420P10LE : -+#endif -+ ctx->sw_format; - pix_fmts[1] = AV_PIX_FMT_NONE; - - *formats = pix_fmts; -@@ -231,18 +267,79 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, - map = av_frame_alloc(); - if (!map) - return AVERROR(ENOMEM); -- map->format = dst->format; - -+ // Map to default -+ map->format = AV_PIX_FMT_NONE; - err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ); - if (err) - goto fail; - -- map->width = dst->width; -- map->height = dst->height; -+#if 0 -+ av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__, -+ hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE, -+ map->width, map->height, -+ map->linesize[0], -+ map->linesize[1], -+ map->linesize[2], -+ map->linesize[3], -+ dst->width, dst->height, -+ dst->linesize[0], -+ dst->linesize[1], -+ dst->linesize[2]); -+#endif -+#if CONFIG_SAND -+ if (av_rpi_is_sand_frame(map)) { -+ // Preserve crop - later ffmpeg code assumes that we have in that it -+ // overwrites any crop that we create with the old values -+ const unsigned int w = FFMIN(dst->width, map->width); -+ const unsigned int h = FFMIN(dst->height, map->height); -+ -+ if (map->format == AV_PIX_FMT_RPI4_8 && dst->format == AV_PIX_FMT_YUV420P) { -+ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], -+ map->data[0], -+ 128, stride2, -+ 0, 0, w, h); -+ av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ map->data[1], -+ 128, stride2, -+ 0, 0, w / 2, h / 2); -+ } -+ else if (map->format == AV_PIX_FMT_RPI4_10 && dst->format == AV_PIX_FMT_YUV420P10LE) { -+ av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], -+ map->data[0], -+ 128, stride2, -+ 0, 0, w, h); -+ av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ map->data[1], -+ 128, stride2, -+ 0, 0, w / 2, h / 2); -+ } -+ else -+ { -+ av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__); -+ err = AVERROR(EINVAL); -+ goto fail; -+ } -+ -+ dst->width = w; -+ dst->height = h; -+ } -+ else -+#endif -+ { -+ // Kludge mapped h/w s.t. frame_copy works -+ map->width = dst->width; -+ map->height = dst->height; -+ err = av_frame_copy(dst, map); -+ } - -- err = av_frame_copy(dst, map); - if (err) -+ { -+ av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__); - goto fail; -+ } - - err = 0; - fail: -@@ -257,7 +354,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc, - int err; - - if (src->width > hwfc->width || src->height > hwfc->height) -+ { -+ av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height); - return AVERROR(EINVAL); -+ } - - map = av_frame_alloc(); - if (!map) - -From 2214c119c420bb213917f1c6f85cb82d905772dc Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 13 Aug 2021 15:38:28 +0100 -Subject: [PATCH 023/186] Set frame interlace from V4L2 buffer field - ---- - libavcodec/v4l2_buffers.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index de31f7ced93c..97b8eb1db362 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -222,6 +222,16 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) - return AVCOL_TRC_UNSPECIFIED; - } - -+static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf) -+{ -+ return V4L2_FIELD_IS_INTERLACED(buf->buf.field); -+} -+ -+static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) -+{ -+ return buf->buf.field == V4L2_FIELD_INTERLACED_TB; -+} -+ - static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) - { - AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; -@@ -576,6 +586,8 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_resc - frame->color_trc = v4l2_get_color_trc(avbuf); - frame->pts = v4l2_get_pts(avbuf, no_rescale_pts); - frame->pkt_dts = AV_NOPTS_VALUE; -+ frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf); -+ frame->top_field_first = v4l2_buf_is_top_first(avbuf); - - /* these values are updated also during re-init in v4l2_process_driver_event */ - frame->height = ctx->height; - -From b81ad61c52f3b35cd1b1b4f0ea715758c524522b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 13 Aug 2021 16:11:53 +0100 -Subject: [PATCH 024/186] Fix V4L2 stateful to avoid crash if flush before - start - ---- - libavcodec/v4l2_context.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index a17ae027a666..eb901e8fabf6 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -713,6 +713,10 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p) - static void flush_all_buffers_status(V4L2Context* const ctx) - { - int i; -+ -+ if (!ctx->bufrefs) -+ return; -+ - for (i = 0; i < ctx->num_buffers; ++i) { - struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; - if (buf->status == V4L2BUF_IN_DRIVER) - -From 2d975c0fbcb97b930b1e7164f439830ab2594d1d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 9 Sep 2021 17:44:13 +0100 -Subject: [PATCH 025/186] Copy properties from frame to v4l2 buffer - -Now copies all the properties in ff_v4l2_buffer_avframe_to_buf that -ff_v4l2_buffer_buf_to_avframe copies ---- - libavcodec/v4l2_buffers.c | 126 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 126 insertions(+) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 97b8eb1db362..126d2a17f4fe 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -128,6 +128,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) - return AVCOL_PRI_UNSPECIFIED; - } - -+static void v4l2_set_color(V4L2Buffer *buf, -+ const enum AVColorPrimaries avcp, -+ const enum AVColorSpace avcs, -+ const enum AVColorTransferCharacteristic avxc) -+{ -+ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; -+ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; -+ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; -+ -+ switch (avcp) { -+ case AVCOL_PRI_BT709: -+ cs = V4L2_COLORSPACE_REC709; -+ ycbcr = V4L2_YCBCR_ENC_709; -+ break; -+ case AVCOL_PRI_BT470M: -+ cs = V4L2_COLORSPACE_470_SYSTEM_M; -+ ycbcr = V4L2_YCBCR_ENC_601; -+ break; -+ case AVCOL_PRI_BT470BG: -+ cs = V4L2_COLORSPACE_470_SYSTEM_BG; -+ break; -+ case AVCOL_PRI_SMPTE170M: -+ cs = V4L2_COLORSPACE_SMPTE170M; -+ break; -+ case AVCOL_PRI_SMPTE240M: -+ cs = V4L2_COLORSPACE_SMPTE240M; -+ break; -+ case AVCOL_PRI_BT2020: -+ cs = V4L2_COLORSPACE_BT2020; -+ break; -+ case AVCOL_PRI_SMPTE428: -+ case AVCOL_PRI_SMPTE431: -+ case AVCOL_PRI_SMPTE432: -+ case AVCOL_PRI_EBU3213: -+ case AVCOL_PRI_RESERVED: -+ case AVCOL_PRI_FILM: -+ case AVCOL_PRI_UNSPECIFIED: -+ default: -+ break; -+ } -+ -+ switch (avcs) { -+ case AVCOL_SPC_RGB: -+ cs = V4L2_COLORSPACE_SRGB; -+ break; -+ case AVCOL_SPC_BT709: -+ cs = V4L2_COLORSPACE_REC709; -+ break; -+ case AVCOL_SPC_FCC: -+ cs = V4L2_COLORSPACE_470_SYSTEM_M; -+ break; -+ case AVCOL_SPC_BT470BG: -+ cs = V4L2_COLORSPACE_470_SYSTEM_BG; -+ break; -+ case AVCOL_SPC_SMPTE170M: -+ cs = V4L2_COLORSPACE_SMPTE170M; -+ break; -+ case AVCOL_SPC_SMPTE240M: -+ cs = V4L2_COLORSPACE_SMPTE240M; -+ break; -+ case AVCOL_SPC_BT2020_CL: -+ cs = V4L2_COLORSPACE_BT2020; -+ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; -+ break; -+ case AVCOL_SPC_BT2020_NCL: -+ cs = V4L2_COLORSPACE_BT2020; -+ break; -+ default: -+ break; -+ } -+ -+ switch (xfer) { -+ case AVCOL_TRC_BT709: -+ xfer = V4L2_XFER_FUNC_709; -+ break; -+ case AVCOL_TRC_IEC61966_2_1: -+ xfer = V4L2_XFER_FUNC_SRGB; -+ break; -+ case AVCOL_TRC_SMPTE240M: -+ xfer = V4L2_XFER_FUNC_SMPTE240M; -+ break; -+ case AVCOL_TRC_SMPTE2084: -+ xfer = V4L2_XFER_FUNC_SMPTE2084; -+ break; -+ default: -+ break; -+ } -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { -+ buf->context->format.fmt.pix_mp.colorspace = cs; -+ buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr; -+ buf->context->format.fmt.pix_mp.xfer_func = xfer; -+ } else { -+ buf->context->format.fmt.pix.colorspace = cs; -+ buf->context->format.fmt.pix.ycbcr_enc = ycbcr; -+ buf->context->format.fmt.pix.xfer_func = xfer; -+ } -+} -+ - static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) - { - enum v4l2_quantization qt; -@@ -146,6 +245,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) - return AVCOL_RANGE_UNSPECIFIED; - } - -+static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr) -+{ -+ const enum v4l2_quantization q = -+ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : -+ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : -+ V4L2_QUANTIZATION_DEFAULT; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { -+ buf->context->format.fmt.pix_mp.quantization = q; -+ } else { -+ buf->context->format.fmt.pix.quantization = q; -+ } -+} -+ - static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) - { - enum v4l2_ycbcr_encoding ycbcr; -@@ -232,6 +345,12 @@ static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) - return buf->buf.field == V4L2_FIELD_INTERLACED_TB; - } - -+static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) -+{ -+ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : -+ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; -+} -+ - static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) - { - AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; -@@ -561,7 +680,14 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - - int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - { -+ out->buf.flags = frame->key_frame ? (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME) : (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME); -+ // Beware that colour info is held in format rather than the actual -+ // v4l2 buffer struct so this may not be as useful as you might hope -+ v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); -+ v4l2_set_color_range(out, frame->color_range); -+ // PTS & interlace are buffer vars - v4l2_set_pts(out, frame->pts, 0); -+ v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first); - - return v4l2_buffer_swframe_to_buf(frame, out); - } - -From 29e4140983c9922f7375153d5ba515bb70b047be Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 17 Nov 2021 16:49:01 +0000 -Subject: [PATCH 026/186] ffmpeg: Do not inc DTS on no decode output - -V4L2 H264 decode has long latency and sometimes spits out a long stream -of output without input. In this case incrementing DTS is wrong. There -may be cases where the condition as written is correct so only "fix" in -the cases which cause problems ---- - fftools/ffmpeg.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c -index 719463016216..04bea4ef4fe9 100644 ---- a/fftools/ffmpeg.c -+++ b/fftools/ffmpeg.c -@@ -2612,7 +2612,12 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo - case AVMEDIA_TYPE_VIDEO: - ret = decode_video (ist, repeating ? NULL : avpkt, &got_output, &duration_pts, !pkt, - &decode_failed); -- if (!repeating || !pkt || got_output) { -+ // Pi: Do not inc dts if no_cvt_hw set -+ // V4L2 H264 decode has long latency and sometimes spits out a long -+ // stream of output without input. In this case incrementing DTS is wrong. -+ // There may be cases where the condition as written is correct so only -+ // "fix" in the cases which cause problems -+ if (!repeating || !pkt || (got_output && !no_cvt_hw)) { - if (pkt && pkt->duration) { - duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q); - } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) { - -From eebda1bffbbf81eb486665c73ace4a86303c1e69 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 17 Nov 2021 17:32:59 +0000 -Subject: [PATCH 027/186] v4l2_m2m_dec: Adjust timebase if H264 - -Adjust AVCodecContext time_base if H264 in the same way that the -software decoder does. ---- - libavcodec/v4l2_m2m_dec.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 1851acbc93fe..aa1e5c159720 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -481,6 +481,16 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - - av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); - -+ if (avctx->codec_id == AV_CODEC_ID_H264) { -+ if (avctx->ticks_per_frame == 1) { -+ if(avctx->time_base.den < INT_MAX/2) { -+ avctx->time_base.den *= 2; -+ } else -+ avctx->time_base.num /= 2; -+ } -+ avctx->ticks_per_frame = 2; -+ } -+ - av_log(avctx, AV_LOG_INFO, "level=%d\n", avctx->level); - ret = ff_v4l2_m2m_create_context(priv, &s); - if (ret < 0) - -From 4e12f09479a88b648f17fad0e475fdb60ac93541 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 17 Nov 2021 17:38:27 +0000 -Subject: [PATCH 028/186] v4l2_m2m_dec: Produce best guess PTSs if none - supplied - -Filter scheduling gets confused by missing PTSs and makes poor guesses -more often than not. Try to generate plausible timestamps where we are -missing them. ---- - libavcodec/v4l2_m2m.h | 12 ++++++++ - libavcodec/v4l2_m2m_dec.c | 64 +++++++++++++++++++++++++++++++++++++-- - 2 files changed, 74 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 8f054f2f50f9..82feb0afdbe3 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -52,6 +52,16 @@ typedef struct V4L2m2mTrackEl { - int64_t track_pts; - } V4L2m2mTrackEl; - -+typedef struct pts_stats_s -+{ -+ void * logctx; -+ const char * name; // For debug -+ unsigned int last_count; -+ unsigned int last_interval; -+ int64_t last_pts; -+ int64_t guess; -+} pts_stats_t; -+ - typedef struct V4L2m2mContext { - char devname[PATH_MAX]; - int fd; -@@ -91,6 +101,8 @@ typedef struct V4L2m2mContext { - unsigned int track_no; - V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; - -+ pts_stats_t pts_stat; -+ - /* req pkt */ - int req_pkt; - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index aa1e5c159720..a5a2afbd273c 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -42,6 +42,62 @@ - #include "v4l2_m2m.h" - #include "v4l2_fmt.h" - -+// Pick 64 for max last count - that is >1sec at 60fps -+#define STATS_LAST_COUNT_MAX 64 -+#define STATS_INTERVAL_MAX (1 << 30) -+ -+static int64_t pts_stats_guess(const pts_stats_t * const stats) -+{ -+ if (stats->last_pts == AV_NOPTS_VALUE || -+ stats->last_interval == 0 || -+ stats->last_count >= STATS_LAST_COUNT_MAX) -+ return AV_NOPTS_VALUE; -+ return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval; -+} -+ -+static void pts_stats_add(pts_stats_t * const stats, int64_t pts) -+{ -+ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { -+ if (stats->last_count < STATS_LAST_COUNT_MAX) -+ ++stats->last_count; -+ return; -+ } -+ -+ if (stats->last_pts != AV_NOPTS_VALUE) { -+ const int64_t interval = pts - stats->last_pts; -+ -+ if (interval < 0 || interval >= STATS_INTERVAL_MAX || -+ stats->last_count >= STATS_LAST_COUNT_MAX) { -+ if (stats->last_interval != 0) -+ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", -+ __func__, stats->name, interval, stats->last_count); -+ stats->last_interval = 0; -+ } -+ else { -+ const int64_t frame_time = interval / (int64_t)stats->last_count; -+ -+ if (frame_time != stats->last_interval) -+ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", -+ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); -+ stats->last_interval = frame_time; -+ } -+ } -+ -+ stats->last_pts = pts; -+ stats->last_count = 1; -+} -+ -+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) -+{ -+ *stats = (pts_stats_t){ -+ .logctx = logctx, -+ .name = name, -+ .last_count = 1, -+ .last_interval = 0, -+ .last_pts = AV_NOPTS_VALUE -+ }; -+} -+ - static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s) - { - int ret; -@@ -244,9 +300,11 @@ xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *cons - return -1; - } - -- frame->best_effort_timestamp = frame->pts; -+ pts_stats_add(&s->pts_stat, frame->pts); -+ -+ frame->best_effort_timestamp = pts_stats_guess(&s->pts_stat); - frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? -- av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 ", DTS=%" PRId64 "\n", frame->pts, frame->pkt_dts); -+ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", frame->pts, frame->best_effort_timestamp, frame->pkt_dts); - return 0; - } - -@@ -496,6 +554,8 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - if (ret < 0) - return ret; - -+ pts_stats_init(&s->pts_stat, avctx, "decoder"); -+ - capture = &s->capture; - output = &s->output; - - -From 3a0fa83da24d5ec8739acf9f4cc713b7b1e49038 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 17 Nov 2021 17:59:27 +0000 -Subject: [PATCH 029/186] v4l2_m2m_dec: Try harder to get an initial frame - -If the input Q is full then wait on a short timeout for a capture frame -rather than stuffing yet still another frame into the input if we could -do that first. This attempts to restrict the sometimes daft initial -buffering that ends up confusing the rest of the system. ---- - libavcodec/v4l2_context.c | 2 +- - libavcodec/v4l2_m2m_dec.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index eb901e8fabf6..ee5dc7b8d41d 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -381,7 +381,7 @@ static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout) - start: - if (is_capture) { - /* no need to listen to requests for more input while draining */ -- if (ctx_to_m2mctx(ctx)->draining) -+ if (ctx_to_m2mctx(ctx)->draining || timeout > 0) - pfd.events = POLLIN | POLLRDNORM | POLLPRI; - } else { - pfd.events = POLLOUT | POLLWRNORM; -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index a5a2afbd273c..b49f470c0a1e 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -442,7 +442,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - // when discarding - // This returns AVERROR(EAGAIN) if there isn't a frame ready yet - // but there is room in the input Q -- dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, -1, 1); -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, src_rv == NQ_Q_FULL ? 100 : -1, 1); - - if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) - av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", - -From 33aa90c53d570527c8a8da70d6c805a5431d2f86 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 17 Nov 2021 18:04:56 +0000 -Subject: [PATCH 030/186] Add a V4L2 M2M deinterlace filter - -Add a V4L2 deinterlace filter that will accept DRMPRIME frames. - -Multiple people have contributed to this: -Jernej Skrabec -Alex Bee -popcornmix -John Cox - -There is an unknown delay through the filter of typically one or three -fields which translates to 1 or 2 frames. Frames that are delayed are -lost at end of stream as the V4L2 filter has no flush control. ---- - libavcodec/v4l2_context.c | 4 +- - libavfilter/Makefile | 1 + - libavfilter/allfilters.c | 1 + - libavfilter/vf_deinterlace_v4l2m2m.c | 1269 ++++++++++++++++++++++++++ - 4 files changed, 1273 insertions(+), 2 deletions(-) - create mode 100644 libavfilter/vf_deinterlace_v4l2m2m.c - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index ee5dc7b8d41d..440dfaaba551 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -498,10 +498,10 @@ dequeue: - return NULL; - } - --ctx->q_count; -- av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d\n", -+ av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d field=%d\n", - ctx->name, buf.index, - buf.timestamp.tv_sec, buf.timestamp.tv_usec, -- ctx->q_count, ++ctx->dq_count); -+ ctx->q_count, ++ctx->dq_count, buf.field); - - avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; - avbuf->status = V4L2BUF_AVAILABLE; -diff --git a/libavfilter/Makefile b/libavfilter/Makefile -index c14fc995a0b5..0e7b5856bdd4 100644 ---- a/libavfilter/Makefile -+++ b/libavfilter/Makefile -@@ -262,6 +262,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER) += vf_neighbor.o - OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o - OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_vpp_qsv.o - OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o -+OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER) += vf_deinterlace_v4l2m2m.o - OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o - OBJS-$(CONFIG_DELOGO_FILTER) += vf_delogo.o - OBJS-$(CONFIG_DENOISE_VAAPI_FILTER) += vf_misc_vaapi.o vaapi_vpp.o -diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c -index b990a001529b..357ff61ca803 100644 ---- a/libavfilter/allfilters.c -+++ b/libavfilter/allfilters.c -@@ -248,6 +248,7 @@ extern const AVFilter ff_vf_derain; - extern const AVFilter ff_vf_deshake; - extern const AVFilter ff_vf_deshake_opencl; - extern const AVFilter ff_vf_despill; -+extern const AVFilter ff_vf_deinterlace_v4l2m2m; - extern const AVFilter ff_vf_detelecine; - extern const AVFilter ff_vf_dilation; - extern const AVFilter ff_vf_dilation_opencl; -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -new file mode 100644 -index 000000000000..1a933b7e0a5f ---- /dev/null -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -0,0 +1,1269 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/** -+ * @file -+ * deinterlace video filter - V4L2 M2M -+ */ -+ -+#include -+ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "libavutil/avassert.h" -+#include "libavutil/avstring.h" -+#include "libavutil/common.h" -+#include "libavutil/hwcontext.h" -+#include "libavutil/hwcontext_drm.h" -+#include "libavutil/internal.h" -+#include "libavutil/mathematics.h" -+#include "libavutil/opt.h" -+#include "libavutil/pixdesc.h" -+#include "libavutil/time.h" -+ -+#define FF_INTERNAL_FIELDS 1 -+#include "framequeue.h" -+#include "filters.h" -+#include "avfilter.h" -+#include "formats.h" -+#include "internal.h" -+#include "video.h" -+ -+typedef struct V4L2Queue V4L2Queue; -+typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared; -+ -+typedef struct V4L2PlaneInfo { -+ int bytesperline; -+ size_t length; -+} V4L2PlaneInfo; -+ -+typedef struct V4L2Buffer { -+ int enqueued; -+ int reenqueue; -+ int fd; -+ struct v4l2_buffer buffer; -+ AVFrame frame; -+ struct v4l2_plane planes[VIDEO_MAX_PLANES]; -+ int num_planes; -+ V4L2PlaneInfo plane_info[VIDEO_MAX_PLANES]; -+ AVDRMFrameDescriptor drm_frame; -+ V4L2Queue *q; -+} V4L2Buffer; -+ -+typedef struct V4L2Queue { -+ struct v4l2_format format; -+ int num_buffers; -+ V4L2Buffer *buffers; -+ DeintV4L2M2MContextShared *ctx; -+} V4L2Queue; -+ -+typedef struct pts_stats_s -+{ -+ void * logctx; -+ const char * name; // For debug -+ unsigned int last_count; -+ unsigned int last_interval; -+ int64_t last_pts; -+} pts_stats_t; -+ -+#define PTS_TRACK_SIZE 32 -+typedef struct pts_track_el_s -+{ -+ uint32_t n; -+ unsigned int interval; -+ AVFrame * props; -+} pts_track_el_t; -+ -+typedef struct pts_track_s -+{ -+ uint32_t n; -+ uint32_t last_n; -+ int got_2; -+ void * logctx; -+ pts_stats_t stats; -+ pts_track_el_t a[PTS_TRACK_SIZE]; -+} pts_track_t; -+ -+typedef struct DeintV4L2M2MContextShared { -+ void * logctx; // For logging - will be NULL when done -+ -+ int fd; -+ int done; -+ int width; -+ int height; -+ int orig_width; -+ int orig_height; -+ atomic_uint refcount; -+ -+ AVBufferRef *hw_frames_ctx; -+ -+ unsigned int field_order; -+ -+ pts_track_t track; -+ -+ V4L2Queue output; -+ V4L2Queue capture; -+} DeintV4L2M2MContextShared; -+ -+typedef struct DeintV4L2M2MContext { -+ const AVClass *class; -+ -+ DeintV4L2M2MContextShared *shared; -+} DeintV4L2M2MContext; -+ -+static unsigned int pts_stats_interval(const pts_stats_t * const stats) -+{ -+ return stats->last_interval; -+} -+ -+// Pick 64 for max last count - that is >1sec at 60fps -+#define STATS_LAST_COUNT_MAX 64 -+#define STATS_INTERVAL_MAX (1 << 30) -+static void pts_stats_add(pts_stats_t * const stats, int64_t pts) -+{ -+ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { -+ if (stats->last_count < STATS_LAST_COUNT_MAX) -+ ++stats->last_count; -+ return; -+ } -+ -+ if (stats->last_pts != AV_NOPTS_VALUE) { -+ const int64_t interval = pts - stats->last_pts; -+ -+ if (interval < 0 || interval >= STATS_INTERVAL_MAX || -+ stats->last_count >= STATS_LAST_COUNT_MAX) { -+ if (stats->last_interval != 0) -+ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", -+ __func__, stats->name, interval, stats->last_count); -+ stats->last_interval = 0; -+ } -+ else { -+ const int64_t frame_time = interval / (int64_t)stats->last_count; -+ -+ if (frame_time != stats->last_interval) -+ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", -+ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); -+ stats->last_interval = frame_time; -+ } -+ } -+ -+ stats->last_pts = pts; -+ stats->last_count = 1; -+} -+ -+static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) -+{ -+ *stats = (pts_stats_t){ -+ .logctx = logctx, -+ .name = name, -+ .last_count = 1, -+ .last_interval = 0, -+ .last_pts = AV_NOPTS_VALUE -+ }; -+} -+ -+static inline uint32_t pts_track_next_n(pts_track_t * const trk) -+{ -+ if (++trk->n == 0) -+ trk->n = 1; -+ return trk->n; -+} -+ -+static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst) -+{ -+ uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000); -+ pts_track_el_t * t; -+ -+ // As a first guess assume that n==0 means last frame -+ if (n == 0) { -+ n = trk->last_n; -+ if (n == 0) -+ goto fail; -+ } -+ -+ t = trk->a + (n & (PTS_TRACK_SIZE - 1)); -+ -+ if (t->n != n) { -+ av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n); -+ goto fail; -+ } -+ -+ // 1st frame is simple - just believe it -+ if (n != trk->last_n) { -+ trk->last_n = n; -+ trk->got_2 = 0; -+ return av_frame_copy_props(dst, t->props); -+ } -+ -+ // Only believe in a single interpolated frame -+ if (trk->got_2) -+ goto fail; -+ trk->got_2 = 1; -+ -+ av_frame_copy_props(dst, t->props); -+ -+ -+ // If we can't guess - don't -+ if (t->interval == 0) { -+ dst->best_effort_timestamp = AV_NOPTS_VALUE; -+ dst->pts = AV_NOPTS_VALUE; -+ dst->pkt_dts = AV_NOPTS_VALUE; -+ } -+ else { -+ if (dst->best_effort_timestamp != AV_NOPTS_VALUE) -+ dst->best_effort_timestamp += t->interval / 2; -+ if (dst->pts != AV_NOPTS_VALUE) -+ dst->pts += t->interval / 2; -+ if (dst->pkt_dts != AV_NOPTS_VALUE) -+ dst->pkt_dts += t->interval / 2; -+ } -+ -+ return 0; -+ -+fail: -+ trk->last_n = 0; -+ trk->got_2 = 0; -+ dst->pts = AV_NOPTS_VALUE; -+ dst->pkt_dts = AV_NOPTS_VALUE; -+ return 0; -+} -+ -+static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src) -+{ -+ const uint32_t n = pts_track_next_n(trk); -+ pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1)); -+ -+ pts_stats_add(&trk->stats, src->pts); -+ -+ t->n = n; -+ t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last -+ av_frame_unref(t->props); -+ av_frame_copy_props(t->props, src); -+ -+ // We now know what the previous interval was, rather than having to guess, -+ // so set it. There is a better than decent chance that this is before -+ // we use it. -+ if (t->interval != 0) { -+ pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1)); -+ prev_t->interval = t->interval; -+ } -+ -+ // In case deinterlace interpolates frames use every other usec -+ return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2}; -+} -+ -+static void pts_track_uninit(pts_track_t * const trk) -+{ -+ unsigned int i; -+ for (i = 0; i != PTS_TRACK_SIZE; ++i) { -+ trk->a[i].n = 0; -+ av_frame_free(&trk->a[i].props); -+ } -+} -+ -+static int pts_track_init(pts_track_t * const trk, void *logctx) -+{ -+ unsigned int i; -+ trk->n = 1; -+ pts_stats_init(&trk->stats, logctx, "track"); -+ for (i = 0; i != PTS_TRACK_SIZE; ++i) { -+ trk->a[i].n = 0; -+ if ((trk->a[i].props = av_frame_alloc()) == NULL) { -+ pts_track_uninit(trk); -+ return AVERROR(ENOMEM); -+ } -+ } -+ return 0; -+} -+ -+static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx) -+{ -+ struct v4l2_capability cap; -+ int ret; -+ -+ memset(&cap, 0, sizeof(cap)); -+ ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap); -+ if (ret < 0) -+ return ret; -+ -+ if (!(cap.capabilities & V4L2_CAP_STREAMING)) -+ return AVERROR(EINVAL); -+ -+ if (cap.capabilities & V4L2_CAP_VIDEO_M2M) { -+ ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -+ -+ return 0; -+ } -+ -+ if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) { -+ ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; -+ ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; -+ -+ return 0; -+ } -+ -+ return AVERROR(EINVAL); -+} -+ -+static int deint_v4l2m2m_try_format(V4L2Queue *queue) -+{ -+ struct v4l2_format *fmt = &queue->format; -+ DeintV4L2M2MContextShared *ctx = queue->ctx; -+ int ret, field; -+ -+ ret = ioctl(ctx->fd, VIDIOC_G_FMT, fmt); -+ if (ret) -+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret); -+ -+ if (V4L2_TYPE_IS_OUTPUT(fmt->type)) -+ field = V4L2_FIELD_INTERLACED_TB; -+ else -+ field = V4L2_FIELD_NONE; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ fmt->fmt.pix_mp.pixelformat = V4L2_PIX_FMT_YUV420; -+ fmt->fmt.pix_mp.field = field; -+ fmt->fmt.pix_mp.width = ctx->width; -+ fmt->fmt.pix_mp.height = ctx->height; -+ } else { -+ fmt->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420; -+ fmt->fmt.pix.field = field; -+ fmt->fmt.pix.width = ctx->width; -+ fmt->fmt.pix.height = ctx->height; -+ } -+ -+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__, -+ fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height, -+ fmt->fmt.pix_mp.pixelformat, -+ fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline); -+ -+ ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, fmt); -+ if (ret) -+ return AVERROR(EINVAL); -+ -+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__, -+ fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height, -+ fmt->fmt.pix_mp.pixelformat, -+ fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline); -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ if (fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 || -+ fmt->fmt.pix_mp.field != field) { -+ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type); -+ -+ return AVERROR(EINVAL); -+ } -+ } else { -+ if (fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 || -+ fmt->fmt.pix.field != field) { -+ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type); -+ -+ return AVERROR(EINVAL); -+ } -+ } -+ -+ return 0; -+} -+ -+static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, int height, int pitch, int ysize) -+{ -+ struct v4l2_format *fmt = &queue->format; -+ DeintV4L2M2MContextShared *ctx = queue->ctx; -+ int ret; -+ -+ struct v4l2_selection sel = { -+ .type = fmt->type, -+ .target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP_BOUNDS : V4L2_SEL_TGT_COMPOSE_BOUNDS, -+ }; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ fmt->fmt.pix_mp.field = field; -+ fmt->fmt.pix_mp.width = width; -+ fmt->fmt.pix_mp.height = ysize / pitch; -+ fmt->fmt.pix_mp.plane_fmt[0].bytesperline = pitch; -+ fmt->fmt.pix_mp.plane_fmt[0].sizeimage = ysize + (ysize >> 1); -+ } else { -+ fmt->fmt.pix.field = field; -+ fmt->fmt.pix.width = width; -+ fmt->fmt.pix.height = height; -+ fmt->fmt.pix.sizeimage = 0; -+ fmt->fmt.pix.bytesperline = 0; -+ } -+ -+ ret = ioctl(ctx->fd, VIDIOC_S_FMT, fmt); -+ if (ret) -+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %d\n", ret); -+ -+ ret = ioctl(ctx->fd, VIDIOC_G_SELECTION, &sel); -+ if (ret) -+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_SELECTION failed: %d\n", ret); -+ -+ sel.r.width = width; -+ sel.r.height = height; -+ sel.r.left = 0; -+ sel.r.top = 0; -+ sel.target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE, -+ sel.flags = V4L2_SEL_FLAG_LE; -+ -+ ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &sel); -+ if (ret) -+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_SELECTION failed: %d\n", ret); -+ -+ return ret; -+} -+ -+static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node) -+{ -+ int ret; -+ -+ ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0); -+ if (ctx->fd < 0) -+ return AVERROR(errno); -+ -+ ret = deint_v4l2m2m_prepare_context(ctx); -+ if (ret) -+ goto fail; -+ -+ ret = deint_v4l2m2m_try_format(&ctx->capture); -+ if (ret) -+ goto fail; -+ -+ ret = deint_v4l2m2m_try_format(&ctx->output); -+ if (ret) -+ goto fail; -+ -+ return 0; -+ -+fail: -+ close(ctx->fd); -+ ctx->fd = -1; -+ -+ return ret; -+} -+ -+static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx) -+{ -+ int ret = AVERROR(EINVAL); -+ struct dirent *entry; -+ char node[PATH_MAX]; -+ DIR *dirp; -+ -+ dirp = opendir("/dev"); -+ if (!dirp) -+ return AVERROR(errno); -+ -+ for (entry = readdir(dirp); entry; entry = readdir(dirp)) { -+ -+ if (strncmp(entry->d_name, "video", 5)) -+ continue; -+ -+ snprintf(node, sizeof(node), "/dev/%s", entry->d_name); -+ av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node); -+ ret = deint_v4l2m2m_probe_device(ctx, node); -+ if (!ret) -+ break; -+ } -+ -+ closedir(dirp); -+ -+ if (ret) { -+ av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n"); -+ ctx->fd = -1; -+ -+ return ret; -+ } -+ -+ av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node); -+ -+ return 0; -+} -+ -+static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf) -+{ -+ int ret; -+ -+ ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer); -+ if (ret < 0) -+ return AVERROR(errno); -+ -+ buf->enqueued = 1; -+ -+ return 0; -+} -+ -+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) -+{ -+ struct v4l2_exportbuffer expbuf; -+ int i, ret; -+ -+ for (i = 0; i < avbuf->num_planes; i++) { -+ memset(&expbuf, 0, sizeof(expbuf)); -+ -+ expbuf.index = avbuf->buffer.index; -+ expbuf.type = avbuf->buffer.type; -+ expbuf.plane = i; -+ -+ ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf); -+ if (ret < 0) -+ return AVERROR(errno); -+ -+ avbuf->fd = expbuf.fd; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type)) { -+ /* drm frame */ -+ avbuf->drm_frame.objects[i].size = avbuf->buffer.m.planes[i].length; -+ avbuf->drm_frame.objects[i].fd = expbuf.fd; -+ avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ } else { -+ /* drm frame */ -+ avbuf->drm_frame.objects[0].size = avbuf->buffer.length; -+ avbuf->drm_frame.objects[0].fd = expbuf.fd; -+ avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ } -+ } -+ -+ return 0; -+} -+ -+static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) -+{ -+ struct v4l2_format *fmt = &queue->format; -+ DeintV4L2M2MContextShared *ctx = queue->ctx; -+ struct v4l2_requestbuffers req; -+ int ret, i, j, multiplanar; -+ uint32_t memory; -+ -+ memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ? -+ V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; -+ -+ multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type); -+ -+ memset(&req, 0, sizeof(req)); -+ req.count = queue->num_buffers; -+ req.memory = memory; -+ req.type = fmt->type; -+ -+ ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req); -+ if (ret < 0) { -+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno)); -+ -+ return AVERROR(errno); -+ } -+ -+ queue->num_buffers = req.count; -+ queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer)); -+ if (!queue->buffers) { -+ av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n"); -+ -+ return AVERROR(ENOMEM); -+ } -+ -+ for (i = 0; i < queue->num_buffers; i++) { -+ V4L2Buffer *buf = &queue->buffers[i]; -+ -+ buf->enqueued = 0; -+ buf->fd = -1; -+ buf->q = queue; -+ -+ buf->buffer.type = fmt->type; -+ buf->buffer.memory = memory; -+ buf->buffer.index = i; -+ -+ if (multiplanar) { -+ buf->buffer.length = VIDEO_MAX_PLANES; -+ buf->buffer.m.planes = buf->planes; -+ } -+ -+ ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer); -+ if (ret < 0) { -+ ret = AVERROR(errno); -+ -+ goto fail; -+ } -+ -+ if (multiplanar) -+ buf->num_planes = buf->buffer.length; -+ else -+ buf->num_planes = 1; -+ -+ for (j = 0; j < buf->num_planes; j++) { -+ V4L2PlaneInfo *info = &buf->plane_info[j]; -+ -+ if (multiplanar) { -+ info->bytesperline = fmt->fmt.pix_mp.plane_fmt[j].bytesperline; -+ info->length = buf->buffer.m.planes[j].length; -+ } else { -+ info->bytesperline = fmt->fmt.pix.bytesperline; -+ info->length = buf->buffer.length; -+ } -+ } -+ -+ if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) { -+ ret = deint_v4l2m2m_enqueue_buffer(buf); -+ if (ret) -+ goto fail; -+ -+ ret = v4l2_buffer_export_drm(buf); -+ if (ret) -+ goto fail; -+ } -+ } -+ -+ return 0; -+ -+fail: -+ for (i = 0; i < queue->num_buffers; i++) -+ if (queue->buffers[i].fd >= 0) -+ close(queue->buffers[i].fd); -+ av_free(queue->buffers); -+ queue->buffers = NULL; -+ -+ return ret; -+} -+ -+static int deint_v4l2m2m_streamon(V4L2Queue *queue) -+{ -+ DeintV4L2M2MContextShared * const ctx = queue->ctx; -+ int type = queue->format.type; -+ int ret; -+ -+ ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type); -+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); -+ if (ret < 0) -+ return AVERROR(errno); -+ -+ return 0; -+} -+ -+static int deint_v4l2m2m_streamoff(V4L2Queue *queue) -+{ -+ DeintV4L2M2MContextShared * const ctx = queue->ctx; -+ int type = queue->format.type; -+ int ret; -+ -+ ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type); -+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); -+ if (ret < 0) -+ return AVERROR(errno); -+ -+ return 0; -+} -+ -+// timeout in ms -+static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout) -+{ -+ struct v4l2_plane planes[VIDEO_MAX_PLANES]; -+ DeintV4L2M2MContextShared *ctx = queue->ctx; -+ struct v4l2_buffer buf = { 0 }; -+ V4L2Buffer* avbuf = NULL; -+ struct pollfd pfd; -+ short events; -+ int ret; -+ -+ if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) -+ events = POLLOUT | POLLWRNORM; -+ else -+ events = POLLIN | POLLRDNORM; -+ -+ pfd.events = events; -+ pfd.fd = ctx->fd; -+ -+ for (;;) { -+ ret = poll(&pfd, 1, timeout); -+ if (ret > 0) -+ break; -+ if (errno == EINTR) -+ continue; -+ return NULL; -+ } -+ -+ if (pfd.revents & POLLERR) -+ return NULL; -+ -+ if (pfd.revents & events) { -+ memset(&buf, 0, sizeof(buf)); -+ buf.memory = V4L2_MEMORY_MMAP; -+ buf.type = queue->format.type; -+ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { -+ memset(planes, 0, sizeof(planes)); -+ buf.length = VIDEO_MAX_PLANES; -+ buf.m.planes = planes; -+ } -+ -+ ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf); -+ if (ret) { -+ if (errno != EAGAIN) -+ av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n", -+ av_err2str(AVERROR(errno))); -+ return NULL; -+ } -+ -+ avbuf = &queue->buffers[buf.index]; -+ avbuf->enqueued = 0; -+ avbuf->buffer = buf; -+ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { -+ memcpy(avbuf->planes, planes, sizeof(planes)); -+ avbuf->buffer.m.planes = avbuf->planes; -+ } -+ return avbuf; -+ } -+ -+ return NULL; -+} -+ -+static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue) -+{ -+ int i; -+ V4L2Buffer *buf = NULL; -+ -+ for (i = 0; i < queue->num_buffers; i++) -+ if (!queue->buffers[i].enqueued) { -+ buf = &queue->buffers[i]; -+ break; -+ } -+ return buf; -+} -+ -+static void deint_v4l2m2m_unref_queued(V4L2Queue *queue) -+{ -+ int i; -+ V4L2Buffer *buf = NULL; -+ -+ if (!queue || !queue->buffers) -+ return; -+ for (i = 0; i < queue->num_buffers; i++) { -+ buf = &queue->buffers[i]; -+ if (queue->buffers[i].enqueued) -+ av_frame_unref(&buf->frame); -+ } -+} -+ -+static void recycle_q(V4L2Queue * const queue) -+{ -+ V4L2Buffer* avbuf; -+ while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) { -+ av_frame_unref(&avbuf->frame); -+ } -+} -+ -+static int count_enqueued(V4L2Queue *queue) -+{ -+ int i; -+ int n = 0; -+ -+ if (queue->buffers == NULL) -+ return 0; -+ -+ for (i = 0; i < queue->num_buffers; i++) -+ if (queue->buffers[i].enqueued) -+ ++n; -+ return n; -+} -+ -+static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame) -+{ -+ DeintV4L2M2MContextShared *const ctx = queue->ctx; -+ AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0]; -+ V4L2Buffer *buf; -+ int i; -+ -+ if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) -+ recycle_q(queue); -+ -+ buf = deint_v4l2m2m_find_free_buf(queue); -+ if (!buf) { -+ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0); -+ return AVERROR(EAGAIN); -+ } -+ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type)) -+ for (i = 0; i < drm_desc->nb_objects; i++) -+ buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd; -+ else -+ buf->buffer.m.fd = drm_desc->objects[0].fd; -+ -+ buf->buffer.field = !frame->interlaced_frame ? V4L2_FIELD_NONE : -+ frame->top_field_first ? V4L2_FIELD_INTERLACED_TB : -+ V4L2_FIELD_INTERLACED_BT; -+ -+ if (ctx->field_order != buf->buffer.field) { -+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field); -+ ctx->field_order = buf->buffer.field; -+ } -+ -+ buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame); -+ -+ buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd; -+ -+ av_frame_move_ref(&buf->frame, frame); -+ -+ return deint_v4l2m2m_enqueue_buffer(buf); -+} -+ -+static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx) -+{ -+ if (atomic_fetch_sub(&ctx->refcount, 1) == 1) { -+ V4L2Queue *capture = &ctx->capture; -+ V4L2Queue *output = &ctx->output; -+ int i; -+ -+ av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__); -+ -+ if (ctx->fd >= 0) { -+ deint_v4l2m2m_streamoff(capture); -+ deint_v4l2m2m_streamoff(output); -+ } -+ -+ if (capture->buffers) -+ for (i = 0; i < capture->num_buffers; i++) { -+ capture->buffers[i].q = NULL; -+ if (capture->buffers[i].fd >= 0) -+ close(capture->buffers[i].fd); -+ } -+ -+ deint_v4l2m2m_unref_queued(output); -+ -+ av_buffer_unref(&ctx->hw_frames_ctx); -+ -+ if (capture->buffers) -+ av_free(capture->buffers); -+ -+ if (output->buffers) -+ av_free(output->buffers); -+ -+ if (ctx->fd >= 0) { -+ close(ctx->fd); -+ ctx->fd = -1; -+ } -+ -+ av_free(ctx); -+ } -+} -+ -+static void v4l2_free_buffer(void *opaque, uint8_t *unused) -+{ -+ V4L2Buffer *buf = opaque; -+ DeintV4L2M2MContextShared *ctx = buf->q->ctx; -+ -+ if (!ctx->done) -+ deint_v4l2m2m_enqueue_buffer(buf); -+ -+ deint_v4l2m2m_destroy_context(ctx); -+} -+ -+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height) -+{ -+ int av_pix_fmt = AV_PIX_FMT_YUV420P; -+ AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; -+ AVDRMLayerDescriptor *layer; -+ -+ /* fill the DRM frame descriptor */ -+ drm_desc->nb_objects = avbuf->num_planes; -+ drm_desc->nb_layers = 1; -+ -+ layer = &drm_desc->layers[0]; -+ layer->nb_planes = avbuf->num_planes; -+ -+ for (int i = 0; i < avbuf->num_planes; i++) { -+ layer->planes[i].object_index = i; -+ layer->planes[i].offset = 0; -+ layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; -+ } -+ -+ switch (av_pix_fmt) { -+ case AV_PIX_FMT_YUYV422: -+ -+ layer->format = DRM_FORMAT_YUYV; -+ layer->nb_planes = 1; -+ -+ break; -+ -+ case AV_PIX_FMT_NV12: -+ case AV_PIX_FMT_NV21: -+ -+ layer->format = av_pix_fmt == AV_PIX_FMT_NV12 ? -+ DRM_FORMAT_NV12 : DRM_FORMAT_NV21; -+ -+ if (avbuf->num_planes > 1) -+ break; -+ -+ layer->nb_planes = 2; -+ -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * -+ height; -+ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; -+ break; -+ -+ case AV_PIX_FMT_YUV420P: -+ -+ layer->format = DRM_FORMAT_YUV420; -+ -+ if (avbuf->num_planes > 1) -+ break; -+ -+ layer->nb_planes = 3; -+ -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * -+ height; -+ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1; -+ -+ layer->planes[2].object_index = 0; -+ layer->planes[2].offset = layer->planes[1].offset + -+ ((avbuf->plane_info[0].bytesperline * -+ height) >> 2); -+ layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1; -+ break; -+ -+ default: -+ drm_desc->nb_layers = 0; -+ break; -+ } -+ -+ return (uint8_t *) drm_desc; -+} -+ -+// timeout in ms -+static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout) -+{ -+ DeintV4L2M2MContextShared *ctx = queue->ctx; -+ V4L2Buffer* avbuf; -+ -+ av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__); -+ -+ avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout); -+ if (!avbuf) { -+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout); -+ return AVERROR(EAGAIN); -+ } -+ -+ // Fill in PTS and anciliary info from src frame -+ // we will want to overwrite some fields as only the pts/dts -+ // fields are updated with new timing in this fn -+ pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame); -+ -+ frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame, -+ sizeof(avbuf->drm_frame), v4l2_free_buffer, -+ avbuf, AV_BUFFER_FLAG_READONLY); -+ if (!frame->buf[0]) { -+ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0); -+ return AVERROR(ENOMEM); -+ } -+ -+ atomic_fetch_add(&ctx->refcount, 1); -+ -+ frame->data[0] = (uint8_t *)v4l2_get_drm_frame(avbuf, ctx->orig_height); -+ frame->format = AV_PIX_FMT_DRM_PRIME; -+ if (ctx->hw_frames_ctx) -+ frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); -+ frame->height = ctx->height; -+ frame->width = ctx->width; -+ -+ // Not interlaced now -+ frame->interlaced_frame = 0; -+ frame->top_field_first = 0; -+ // Pkt duration halved -+ frame->pkt_duration /= 2; -+ -+ if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) { -+ av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n"); -+ frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM; -+ } -+ -+ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts); -+ return 0; -+} -+ -+static int deint_v4l2m2m_config_props(AVFilterLink *outlink) -+{ -+ AVFilterLink *inlink = outlink->src->inputs[0]; -+ AVFilterContext *avctx = outlink->src; -+ DeintV4L2M2MContext *priv = avctx->priv; -+ DeintV4L2M2MContextShared *ctx = priv->shared; -+ int ret; -+ -+ ctx->height = avctx->inputs[0]->h; -+ ctx->width = avctx->inputs[0]->w; -+ -+ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d\n", __func__, ctx->width, ctx->height); -+ -+ outlink->time_base = inlink->time_base; -+ outlink->w = inlink->w; -+ outlink->h = inlink->h; -+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; -+ outlink->format = inlink->format; -+ outlink->frame_rate = (AVRational) {1, 0}; // Deny knowledge of frame rate -+ -+ ret = deint_v4l2m2m_find_device(ctx); -+ if (ret) -+ return ret; -+ -+ if (inlink->hw_frames_ctx) { -+ ctx->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx); -+ if (!ctx->hw_frames_ctx) -+ return AVERROR(ENOMEM); -+ } -+ return 0; -+} -+ -+static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) -+{ -+ AVFilterContext *avctx = link->dst; -+ DeintV4L2M2MContext *priv = avctx->priv; -+ DeintV4L2M2MContextShared *ctx = priv->shared; -+ V4L2Queue *capture = &ctx->capture; -+ V4L2Queue *output = &ctx->output; -+ int ret; -+ -+ av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" (%"PRId64") field :%d interlaced: %d aspect:%d/%d\n", -+ __func__, in->pts, AV_NOPTS_VALUE, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den); -+ av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__, -+ avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out); -+ -+ if (ctx->field_order == V4L2_FIELD_ANY) { -+ AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)in->data[0]; -+ ctx->orig_width = drm_desc->layers[0].planes[0].pitch; -+ ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width; -+ -+ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height, -+ drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset); -+ -+ if (in->top_field_first) -+ ctx->field_order = V4L2_FIELD_INTERLACED_TB; -+ else -+ ctx->field_order = V4L2_FIELD_INTERLACED_BT; -+ -+ ret = deint_v4l2m2m_set_format(output, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_set_format(capture, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_allocate_buffers(capture); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_streamon(capture); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_allocate_buffers(output); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_streamon(output); -+ if (ret) -+ return ret; -+ } -+ -+ ret = deint_v4l2m2m_enqueue_frame(output, in); -+ -+ av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret)); -+ return ret; -+} -+ -+static int deint_v4l2m2m_activate(AVFilterContext *avctx) -+{ -+ DeintV4L2M2MContext * const priv = avctx->priv; -+ DeintV4L2M2MContextShared *const s = priv->shared; -+ AVFilterLink * const outlink = avctx->outputs[0]; -+ AVFilterLink * const inlink = avctx->inputs[0]; -+ int n = 0; -+ int cn = 99; -+ int instatus = 0; -+ int64_t inpts = 0; -+ int did_something = 0; -+ -+ av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__); -+ -+ FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx); -+ -+ ff_inlink_acknowledge_status(inlink, &instatus, &inpts); -+ -+ if (!ff_outlink_frame_wanted(outlink)) { -+ av_log(priv, AV_LOG_TRACE, "%s: Not wanted out\n", __func__); -+ } -+ else if (s->field_order != V4L2_FIELD_ANY) // Can't DQ if no setup! -+ { -+ AVFrame * frame = av_frame_alloc(); -+ int rv; -+ -+again: -+ recycle_q(&s->output); -+ n = count_enqueued(&s->output); -+ -+ if (frame == NULL) { -+ av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__); -+ return AVERROR(ENOMEM); -+ } -+ -+ rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, n > 4 ? 300 : 0); -+ if (rv != 0) { -+ av_frame_free(&frame); -+ if (rv != AVERROR(EAGAIN)) { -+ av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv)); -+ return rv; -+ } -+ } -+ else { -+ frame->interlaced_frame = 0; -+ // frame is always consumed by filter_frame - even on error despite -+ // a somewhat confusing comment in the header -+ rv = ff_filter_frame(outlink, frame); -+ -+ if (instatus != 0) { -+ av_log(priv, AV_LOG_TRACE, "%s: eof loop\n", __func__); -+ goto again; -+ } -+ -+ av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv)); -+ did_something = 1; -+ } -+ -+ cn = count_enqueued(&s->capture); -+ } -+ -+ if (instatus != 0) { -+ ff_outlink_set_status(outlink, instatus, inpts); -+ av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(instatus)); -+ return 0; -+ } -+ -+ { -+ AVFrame * frame; -+ int rv; -+ -+ recycle_q(&s->output); -+ n = count_enqueued(&s->output); -+ -+ while (n < 6) { -+ if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { -+ av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); -+ return rv; -+ } -+ -+ if (frame == NULL) { -+ av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); -+ break; -+ } -+ -+ deint_v4l2m2m_filter_frame(inlink, frame); -+ av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); -+ ++n; -+ } -+ } -+ -+ if (n < 6) { -+ ff_inlink_request_frame(inlink); -+ did_something = 1; -+ av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__); -+ } -+ -+ if (n > 4 && ff_outlink_frame_wanted(outlink)) { -+ ff_filter_set_ready(avctx, 1); -+ did_something = 1; -+ av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__); -+ } -+ -+ av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn); -+ return did_something ? 0 : FFERROR_NOT_READY; -+} -+ -+static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) -+{ -+ DeintV4L2M2MContext * const priv = avctx->priv; -+ DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared)); -+ -+ if (!ctx) { -+ av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0); -+ return AVERROR(ENOMEM); -+ } -+ priv->shared = ctx; -+ ctx->logctx = priv; -+ ctx->fd = -1; -+ ctx->output.ctx = ctx; -+ ctx->output.num_buffers = 8; -+ ctx->capture.ctx = ctx; -+ ctx->capture.num_buffers = 12; -+ ctx->done = 0; -+ ctx->field_order = V4L2_FIELD_ANY; -+ -+ pts_track_init(&ctx->track, priv); -+ -+ atomic_init(&ctx->refcount, 1); -+ -+ return 0; -+} -+ -+static void deint_v4l2m2m_uninit(AVFilterContext *avctx) -+{ -+ DeintV4L2M2MContext *priv = avctx->priv; -+ DeintV4L2M2MContextShared *ctx = priv->shared; -+ -+ ctx->done = 1; -+ ctx->logctx = NULL; // Log to NULL works, log to missing crashes -+ pts_track_uninit(&ctx->track); -+ deint_v4l2m2m_destroy_context(ctx); -+} -+ -+static const AVOption deinterlace_v4l2m2m_options[] = { -+ { NULL }, -+}; -+ -+AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m); -+ -+static const AVFilterPad deint_v4l2m2m_inputs[] = { -+ { -+ .name = "default", -+ .type = AVMEDIA_TYPE_VIDEO, -+ }, -+}; -+ -+static const AVFilterPad deint_v4l2m2m_outputs[] = { -+ { -+ .name = "default", -+ .type = AVMEDIA_TYPE_VIDEO, -+ .config_props = deint_v4l2m2m_config_props, -+ }, -+}; -+ -+AVFilter ff_vf_deinterlace_v4l2m2m = { -+ .name = "deinterlace_v4l2m2m", -+ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"), -+ .priv_size = sizeof(DeintV4L2M2MContext), -+ .init = &deint_v4l2m2m_init, -+ .uninit = &deint_v4l2m2m_uninit, -+ FILTER_INPUTS(deint_v4l2m2m_inputs), -+ FILTER_OUTPUTS(deint_v4l2m2m_outputs), -+ FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME), -+ .priv_class = &deinterlace_v4l2m2m_class, -+ .activate = deint_v4l2m2m_activate, -+}; - -From 1956533e4c9b3f45f9fcb83da6e04beec0e0b517 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 2 Dec 2021 17:49:55 +0000 -Subject: [PATCH 031/186] Put no_pts_rescale in context which makes more sense - than an arg - ---- - libavcodec/v4l2_buffers.c | 28 ++++++++++++++-------------- - libavcodec/v4l2_buffers.h | 5 ++--- - libavcodec/v4l2_context.c | 8 ++++---- - libavcodec/v4l2_context.h | 13 +++++++++---- - libavcodec/v4l2_m2m_dec.c | 9 +++++---- - 5 files changed, 34 insertions(+), 29 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 126d2a17f4fe..22da6bd72234 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -39,7 +39,7 @@ - #define USEC_PER_SEC 1000000 - static const AVRational v4l2_timebase = { 1, USEC_PER_SEC }; - --static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf) -+static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf) - { - return V4L2_TYPE_IS_OUTPUT(buf->context->type) ? - container_of(buf->context, V4L2m2mContext, output) : -@@ -51,34 +51,34 @@ static inline AVCodecContext *logger(V4L2Buffer *buf) - return buf_to_m2mctx(buf)->avctx; - } - --static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf) -+static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf) - { -- V4L2m2mContext *s = buf_to_m2mctx(avbuf); -+ const V4L2m2mContext *s = buf_to_m2mctx(avbuf); - const AVRational tb = s->avctx->pkt_timebase.num ? - s->avctx->pkt_timebase : - s->avctx->time_base; - return tb.num && tb.den ? tb : v4l2_timebase; - } - --static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts, int no_rescale) -+static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) - { - /* convert pts to v4l2 timebase */ - const int64_t v4l2_pts = -- no_rescale ? pts : -+ out->context->no_pts_rescale ? pts : - pts == AV_NOPTS_VALUE ? 0 : - av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); - out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; - out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; - } - --static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf, int no_rescale) -+static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf) - { - /* convert pts back to encoder timebase */ - const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + - avbuf->buf.timestamp.tv_usec; - - return -- no_rescale ? v4l2_pts : -+ avbuf->context->no_pts_rescale ? v4l2_pts : - v4l2_pts == 0 ? AV_NOPTS_VALUE : - av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); - } -@@ -686,13 +686,13 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); - v4l2_set_color_range(out, frame->color_range); - // PTS & interlace are buffer vars -- v4l2_set_pts(out, frame->pts, 0); -+ v4l2_set_pts(out, frame->pts); - v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first); - - return v4l2_buffer_swframe_to_buf(frame, out); - } - --int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_rescale_pts) -+int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) - { - int ret; - V4L2Context * const ctx = avbuf->context; -@@ -710,7 +710,7 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_resc - frame->colorspace = v4l2_get_color_space(avbuf); - frame->color_range = v4l2_get_color_range(avbuf); - frame->color_trc = v4l2_get_color_trc(avbuf); -- frame->pts = v4l2_get_pts(avbuf, no_rescale_pts); -+ frame->pts = v4l2_get_pts(avbuf); - frame->pkt_dts = AV_NOPTS_VALUE; - frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf); - frame->top_field_first = v4l2_buf_is_top_first(avbuf); -@@ -757,13 +757,13 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) - pkt->flags |= AV_PKT_FLAG_CORRUPT; - } - -- pkt->dts = pkt->pts = v4l2_get_pts(avbuf, 0); -+ pkt->dts = pkt->pts = v4l2_get_pts(avbuf); - - return 0; - } - - int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -- const void *extdata, size_t extlen, int no_rescale_pts) -+ const void *extdata, size_t extlen) - { - int ret; - -@@ -777,7 +777,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, - if (ret && ret != AVERROR(ENOMEM)) - return ret; - -- v4l2_set_pts(out, pkt->pts, no_rescale_pts); -+ v4l2_set_pts(out, pkt->pts); - - if (pkt->flags & AV_PKT_FLAG_KEY) - out->flags = V4L2_BUF_FLAG_KEYFRAME; -@@ -787,7 +787,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, - - int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) - { -- return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); -+ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0); - } - - -diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index 111526aee315..641e0e147b19 100644 ---- a/libavcodec/v4l2_buffers.h -+++ b/libavcodec/v4l2_buffers.h -@@ -83,12 +83,11 @@ typedef struct V4L2Buffer { - * - * @param[in] frame The AVFRame to push the information to - * @param[in] buf The V4L2Buffer to get the information from -- * @param[in] no_rescale_pts If non-zero do not rescale PTS - * - * @returns 0 in case of success, AVERROR(EINVAL) if the number of planes is incorrect, - * AVERROR(ENOMEM) if the AVBufferRef can't be created. - */ --int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf, int no_rescale_pts); -+int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf); - - /** - * Extracts the data from a V4L2Buffer to an AVPacket -@@ -113,7 +112,7 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); - int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); - - int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -- const void *extdata, size_t extlen, int no_rescale_pts); -+ const void *extdata, size_t extlen); - - /** - * Extracts the data from an AVFrame to a V4L2Buffer -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 440dfaaba551..64540a37b32e 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -808,7 +808,7 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) - } - - int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, -- const void * extdata, size_t extlen, int no_rescale_pts) -+ const void * extdata, size_t extlen) - { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); - V4L2Buffer* avbuf; -@@ -827,7 +827,7 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, - if (!avbuf) - return AVERROR(EAGAIN); - -- ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, no_rescale_pts); -+ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen); - if (ret == AVERROR(ENOMEM)) - av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", - __func__, pkt->size, avbuf->planes[0].length); -@@ -837,7 +837,7 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, - return ff_v4l2_buffer_enqueue(avbuf); - } - --int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout, int no_rescale_pts) -+int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) - { - V4L2Buffer *avbuf; - -@@ -854,7 +854,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout, - return AVERROR(EAGAIN); - } - -- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf, no_rescale_pts); -+ return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); - } - - int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 37b0431400d8..4cc164886c3b 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -102,6 +102,13 @@ typedef struct V4L2Context { - */ - int done; - -+ /** -+ * PTS rescale not wanted -+ * If the PTS is just a dummy frame count then rescale is -+ * actively harmful -+ */ -+ int no_pts_rescale; -+ - AVBufferRef *frames_ref; - int q_count; - int dq_count; -@@ -172,12 +179,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); - * @param[in] ctx The V4L2Context to dequeue from. - * @param[inout] f The AVFrame to dequeue to. - * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) -- * @param[in] no_rescale_pts (0 rescale pts, 1 use pts as -- * timestamp directly) - * - * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. - */ --int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout, int no_rescale_pts); -+int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); - - /** - * Enqueues a buffer to a V4L2Context from an AVPacket -@@ -189,7 +194,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout, int - * @param[in] pkt A pointer to an AVPacket. - * @return 0 in case of success, a negative error otherwise. - */ --int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size, int no_rescale_pts); -+int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size); - - /** - * Enqueues a buffer to a V4L2Context from an AVFrame -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index b49f470c0a1e..36754b314a4b 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -360,7 +360,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - if (!s->draining) { - // Calling enqueue with an empty pkt starts drain - av_assert0(s->buf_pkt.size == 0); -- ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0, 1); -+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); - if (ret) { - av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret); - return ret; -@@ -381,8 +381,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - return ret; - - ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, -- avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size, -- 1); -+ avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size); - - if (ret == AVERROR(EAGAIN)) { - // Out of input buffers - keep packet -@@ -442,7 +441,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - // when discarding - // This returns AVERROR(EAGAIN) if there isn't a frame ready yet - // but there is room in the input Q -- dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, src_rv == NQ_Q_FULL ? 100 : -1, 1); -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, src_rv == NQ_Q_FULL ? 100 : -1); - - if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) - av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", -@@ -569,10 +568,12 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - output->av_codec_id = avctx->codec_id; - output->av_pix_fmt = AV_PIX_FMT_NONE; - output->min_buf_size = max_coded_size(avctx); -+ output->no_pts_rescale = 1; - - capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; - capture->av_pix_fmt = avctx->pix_fmt; - capture->min_buf_size = 0; -+ capture->no_pts_rescale = 1; - - /* the client requests the codec to generate DRM frames: - * - data[0] will therefore point to the returned AVDRMFrameDescriptor - -From fdcdb8519c90f3d2038244b21abf165f56224f08 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 8 Dec 2021 15:00:37 +0000 -Subject: [PATCH 032/186] Use bitbuf min size for all streams - ---- - libavcodec/v4l2_m2m_dec.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 36754b314a4b..48a6810d18b6 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -507,15 +507,12 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - } - #endif - -+// This heuristic is for H264 but use for everything - static uint32_t max_coded_size(const AVCodecContext * const avctx) - { - uint32_t wxh = avctx->coded_width * avctx->coded_height; - uint32_t size; - -- // Currently the only thing we try to set our own limits for is H264 -- if (avctx->codec_id != AV_CODEC_ID_H264) -- return 0; -- - size = wxh * 3 / 2; - // H.264 Annex A table A-1 gives minCR which is either 2 or 4 - // unfortunately that doesn't yield an actually useful limit - -From 6703c26d1e71bcb7a077b07e8cbef015a1204da9 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 3 Dec 2021 12:54:18 +0000 -Subject: [PATCH 033/186] Track pending frames in v4l2 stateful - -Track which frames are pending decode in the v4l2 stateful decoder. -This relies on DTS & PTS having some relationship to reality, so -any use of this code must cope with the results being wrong. - -Also moves the xlat state vars out of the main context and into their -own structure. ---- - libavcodec/v4l2_m2m.h | 15 ++++-- - libavcodec/v4l2_m2m_dec.c | 100 +++++++++++++++++++++++++++++--------- - 2 files changed, 89 insertions(+), 26 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 82feb0afdbe3..3f8680962342 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -44,8 +44,10 @@ - #define FF_V4L2_M2M_TRACK_SIZE 128 - typedef struct V4L2m2mTrackEl { - int discard; // If we see this buffer its been flushed, so discard -+ int pending; - int pkt_size; - int64_t pts; -+ int64_t dts; - int64_t reordered_opaque; - int64_t pkt_pos; - int64_t pkt_duration; -@@ -62,6 +64,14 @@ typedef struct pts_stats_s - int64_t guess; - } pts_stats_t; - -+typedef struct xlat_track_s { -+ unsigned int track_no; -+ int64_t last_pts; -+ int64_t last_pkt_dts; -+ int64_t last_opaque; -+ V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; -+} xlat_track_t; -+ - typedef struct V4L2m2mContext { - char devname[PATH_MAX]; - int fd; -@@ -96,10 +106,7 @@ typedef struct V4L2m2mContext { - int output_drm; - - /* Frame tracking */ -- int64_t last_pkt_dts; -- int64_t last_opaque; -- unsigned int track_no; -- V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; -+ xlat_track_t xlat; - - pts_stats_t pts_stat; - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 48a6810d18b6..d8ebb466cd56 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -242,22 +242,24 @@ static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts - // buffer of all the things we want preserved (including the original PTS) - // indexed by the tracking no. - static void --xlat_pts_in(AVCodecContext *const avctx, V4L2m2mContext *const s, AVPacket *const avpkt) -+xlat_pts_in(AVCodecContext *const avctx, xlat_track_t *const x, AVPacket *const avpkt) - { - int64_t track_pts; - - // Avoid 0 -- if (++s->track_no == 0) -- s->track_no = 1; -+ if (++x->track_no == 0) -+ x->track_no = 1; - -- track_pts = track_to_pts(avctx, s->track_no); -+ track_pts = track_to_pts(avctx, x->track_no); - -- av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, s->track_no); -- s->last_pkt_dts = avpkt->dts; -- s->track_els[s->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ -+ av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); -+ x->last_pkt_dts = avpkt->dts; -+ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ - .discard = 0, -+ .pending = 1, - .pkt_size = avpkt->size, - .pts = avpkt->pts, -+ .dts = avpkt->dts, - .reordered_opaque = avctx->reordered_opaque, - .pkt_pos = avpkt->pos, - .pkt_duration = avpkt->duration, -@@ -268,31 +270,36 @@ xlat_pts_in(AVCodecContext *const avctx, V4L2m2mContext *const s, AVPacket *cons - - // Returns -1 if we should discard the frame - static int --xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *const frame) -+xlat_pts_out(AVCodecContext *const avctx, -+ xlat_track_t * const x, -+ pts_stats_t * const ps, -+ AVFrame *const frame) - { - unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; -- const V4L2m2mTrackEl *const t = s->track_els + n; -+ V4L2m2mTrackEl *const t = x->track_els + n; - if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) - { - av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); - frame->pts = AV_NOPTS_VALUE; -- frame->pkt_dts = s->last_pkt_dts; -- frame->reordered_opaque = s->last_opaque; -+ frame->pkt_dts = x->last_pkt_dts; -+ frame->reordered_opaque = x->last_opaque; - frame->pkt_pos = -1; - frame->pkt_duration = 0; - frame->pkt_size = -1; - } - else if (!t->discard) - { -- frame->pts = t->pts; -- frame->pkt_dts = s->last_pkt_dts; -+ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; -+ frame->pkt_dts = x->last_pkt_dts; - frame->reordered_opaque = t->reordered_opaque; - frame->pkt_pos = t->pkt_pos; - frame->pkt_duration = t->pkt_duration; - frame->pkt_size = t->pkt_size; - -- s->last_opaque = s->track_els[n].reordered_opaque; -- s->track_els[n].pts = AV_NOPTS_VALUE; // If we hit this again deny accurate knowledge of PTS -+ x->last_opaque = x->track_els[n].reordered_opaque; -+ if (frame->pts != AV_NOPTS_VALUE) -+ x->last_pts = frame->pts; -+ t->pending = 0; - } - else - { -@@ -300,14 +307,62 @@ xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *cons - return -1; - } - -- pts_stats_add(&s->pts_stat, frame->pts); -+ pts_stats_add(ps, frame->pts); - -- frame->best_effort_timestamp = pts_stats_guess(&s->pts_stat); -+ frame->best_effort_timestamp = pts_stats_guess(ps); - frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? - av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", frame->pts, frame->best_effort_timestamp, frame->pkt_dts); - return 0; - } - -+static void -+xlat_flush(xlat_track_t * const x) -+{ -+ unsigned int i; -+ for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) { -+ x->track_els[i].pending = 0; -+ x->track_els[i].discard = 1; -+ } -+ x->last_pts = AV_NOPTS_VALUE; -+} -+ -+static void -+xlat_init(xlat_track_t * const x) -+{ -+ memset(x, 0, sizeof(*x)); -+ x->last_pts = AV_NOPTS_VALUE; -+} -+ -+static int -+xlat_pending(const xlat_track_t * const x) -+{ -+ unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE; -+ unsigned int i; -+ int r = 0; -+ int64_t now = AV_NOPTS_VALUE; -+ -+ for (i = 0; i < 32; ++i, n = (n - 1) % FF_V4L2_M2M_TRACK_SIZE) { -+ const V4L2m2mTrackEl * const t = x->track_els + n; -+ -+ if (!t->pending) -+ continue; -+ -+ if (now == AV_NOPTS_VALUE) -+ now = t->dts; -+ -+ if (t->pts == AV_NOPTS_VALUE || -+ ((now == AV_NOPTS_VALUE || t->pts <= now) && -+ (x->last_pts == AV_NOPTS_VALUE || t->pts > x->last_pts))) -+ ++r; -+ } -+ -+ // If we never get any ideas about PTS vs DTS allow a lot more buffer -+ if (now == AV_NOPTS_VALUE) -+ r -= 16; -+ -+ return r; -+} -+ - static inline int stream_started(const V4L2m2mContext * const s) { - return s->capture.streamon && s->output.streamon; - } -@@ -374,7 +429,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - return ret; - } - -- xlat_pts_in(avctx, s, &s->buf_pkt); -+ xlat_pts_in(avctx, &s->xlat, &s->buf_pkt); - } - - if ((ret = check_output_streamon(avctx, s)) != 0) -@@ -417,6 +472,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - int dst_rv = 1; // Non-zero (done), non-negative (error) number - - do { -+ av_log(avctx, AV_LOG_INFO, "Pending=%d\n", xlat_pending(&s->xlat)); - src_rv = try_enqueue_src(avctx, s); - - // If we got a frame last time and we have nothing to enqueue then -@@ -451,7 +507,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - s->draining, s->capture.done, dst_rv); - - // Go again if we got a frame that we need to discard -- } while (dst_rv == 0 && xlat_pts_out(avctx, s, frame)); -+ } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame)); - } - - // Continue trying to enqueue packets if either -@@ -550,6 +606,7 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - if (ret < 0) - return ret; - -+ xlat_init(&s->xlat); - pts_stats_init(&s->pts_stat, avctx, "decoder"); - - capture = &s->capture; -@@ -632,7 +689,7 @@ static void v4l2_decode_flush(AVCodecContext *avctx) - V4L2m2mContext * const s = priv->context; - V4L2Context * const output = &s->output; - V4L2Context * const capture = &s->capture; -- int ret, i; -+ int ret; - - av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon); - -@@ -646,8 +703,7 @@ static void v4l2_decode_flush(AVCodecContext *avctx) - - // V4L2 makes no guarantees about whether decoded frames are flushed or not - // so mark all frames we are tracking to be discarded if they appear -- for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) -- s->track_els[i].discard = 1; -+ xlat_flush(&s->xlat); - - // resend extradata - s->extdata_sent = 0; - -From 74854095e6aac7647a2a04d53110150dd83f3b09 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 15 Dec 2021 17:58:21 +0000 -Subject: [PATCH 034/186] Use pending tracking to reduce v4l2 latency - -If there are more than 5 pending decodes outstanding then add a small -timeout to the capture poll to reduce the rate at which frames are -added. ---- - libavcodec/v4l2_m2m_dec.c | 58 ++++++++++++++++++++++++--------------- - 1 file changed, 36 insertions(+), 22 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index d8ebb466cd56..7e7e4729d08b 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -370,16 +370,19 @@ static inline int stream_started(const V4L2m2mContext * const s) { - #define NQ_OK 0 - #define NQ_Q_FULL 1 - #define NQ_SRC_EMPTY 2 --#define NQ_DRAINING 3 --#define NQ_DEAD 4 -+#define NQ_NONE 3 -+#define NQ_DRAINING 4 -+#define NQ_DEAD 5 - - #define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING) -+#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE) - - // AVERROR_EOF Flushing an already flushed stream - // -ve Error (all errors except EOF are unexpected) - // NQ_OK (0) OK - // NQ_Q_FULL Dst full (retry if we think V4L2 Q has space now) - // NQ_SRC_EMPTY Src empty (do not retry) -+// NQ_NONE Enqueue not attempted - // NQ_DRAINING At EOS, dQ dest until EOS there too - // NQ_DEAD Not running (do not retry, do not attempt capture dQ) - -@@ -468,23 +471,28 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - { - V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; -- int src_rv; -+ int src_rv = NQ_NONE; - int dst_rv = 1; // Non-zero (done), non-negative (error) number -+ unsigned int i = 0; - - do { -- av_log(avctx, AV_LOG_INFO, "Pending=%d\n", xlat_pending(&s->xlat)); -- src_rv = try_enqueue_src(avctx, s); -- -- // If we got a frame last time and we have nothing to enqueue then -- // return now. rv will be AVERROR(EAGAIN) indicating that we want more input -- // This should mean that once decode starts we enter a stable state where -- // we alternately ask for input and produce output -- if (s->req_pkt && src_rv == NQ_SRC_EMPTY) -- break; -- -- if (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) { -- av_log(avctx, AV_LOG_WARNING, "Poll says src Q has space but enqueue fail"); -- src_rv = NQ_SRC_EMPTY; // If we can't enqueue pretend that there is nothing to enqueue -+ const int pending = xlat_pending(&s->xlat); -+ const int prefer_dq = (pending > 5); -+ -+ // Enqueue another pkt for decode if -+ // (a) We don't have a lot of stuff in the buffer already OR -+ // (b) ... we (think we) do but we've failed to get a frame already OR -+ // (c) We've dequeued a lot of frames without asking for input -+ if (!prefer_dq || i != 0 || s->req_pkt > 2) { -+ src_rv = try_enqueue_src(avctx, s); -+ -+ // If we got a frame last time or we've already tried to get a frame and -+ // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN) -+ // indicating that we want more input. -+ // This should mean that once decode starts we enter a stable state where -+ // we alternately ask for input and produce output -+ if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY) -+ break; - } - - // Try to get a new frame if -@@ -495,9 +503,9 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - // Dequeue frame will unref any previous contents of frame - // if it returns success so we don't need an explicit unref - // when discarding -- // This returns AVERROR(EAGAIN) if there isn't a frame ready yet -- // but there is room in the input Q -- dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, src_rv == NQ_Q_FULL ? 100 : -1); -+ // This returns AVERROR(EAGAIN) on timeout or if -+ // there is room in the input Q and timeout == -1 -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, prefer_dq ? 5 : -1); - - if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) - av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", -@@ -510,10 +518,16 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame)); - } - -+ ++i; -+ if (i >= 256) { -+ av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i); -+ src_rv = AVERROR(EIO); -+ } -+ - // Continue trying to enqueue packets if either - // (a) we succeeded last time OR -- // (b) enqueue failed due to input Q full AND there is now room -- } while (src_rv == NQ_OK || (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) ); -+ // (b) we didn't ret a frame and we can retry the input -+ } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv))); - - // Ensure that the frame contains nothing if we aren't returning a frame - // (might happen when discarding) -@@ -521,7 +535,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - av_frame_unref(frame); - - // If we got a frame this time ask for a pkt next time -- s->req_pkt = (dst_rv == 0); -+ s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0; - - #if 0 - if (dst_rv == 0) - -From 584445b9041ff6faed2a9a1ad455f0ab30bf04c6 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 15 Dec 2021 12:23:54 +0000 -Subject: [PATCH 035/186] Allow logger() to take const ctx - ---- - libavcodec/v4l2_buffers.c | 2 +- - libavcodec/v4l2_context.c | 4 ++-- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 22da6bd72234..39c0094aec10 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -46,7 +46,7 @@ static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf) - container_of(buf->context, V4L2m2mContext, capture); - } - --static inline AVCodecContext *logger(V4L2Buffer *buf) -+static inline AVCodecContext *logger(const V4L2Buffer * const buf) - { - return buf_to_m2mctx(buf)->avctx; - } -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 64540a37b32e..d3df48aed499 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -43,14 +43,14 @@ struct v4l2_format_update { - int update_avfmt; - }; - --static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx) -+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) - { - return V4L2_TYPE_IS_OUTPUT(ctx->type) ? - container_of(ctx, V4L2m2mContext, output) : - container_of(ctx, V4L2m2mContext, capture); - } - --static inline AVCodecContext *logger(V4L2Context *ctx) -+static inline AVCodecContext *logger(const V4L2Context *ctx) - { - return ctx_to_m2mctx(ctx)->avctx; - } - -From 80ff275f1183fe466edbdfaadf17b2c40a45fa3e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 15 Dec 2021 13:00:27 +0000 -Subject: [PATCH 036/186] Track numbere of bufs qed with an atomic - -Safer and faster than counting status ---- - libavcodec/v4l2_buffers.c | 6 +++--- - libavcodec/v4l2_context.c | 3 ++- - libavcodec/v4l2_context.h | 3 +-- - 3 files changed, 6 insertions(+), 6 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 39c0094aec10..2cf7be663263 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -922,6 +922,7 @@ fail: - int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) - { - int ret; -+ int qc; - - avbuf->buf.flags = avbuf->flags; - -@@ -941,11 +942,10 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) - return AVERROR(err); - } - -- ++avbuf->context->q_count; -+ qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1; - av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", - avbuf->context->name, avbuf->buf.index, -- avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, -- avbuf->context->q_count); -+ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc); - - avbuf->status = V4L2BUF_IN_DRIVER; - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index d3df48aed499..268a057e53cc 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -599,7 +599,7 @@ static int v4l2_release_buffers(V4L2Context* ctx) - " 2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n"); - } - } -- ctx->q_count = 0; -+ atomic_store(&ctx->q_count, 0); - - return ret; - } -@@ -1019,6 +1019,7 @@ int ff_v4l2_context_init(V4L2Context* ctx) - } - - ff_mutex_init(&ctx->lock, NULL); -+ atomic_init(&ctx->q_count, 0); - - if (s->output_drm) { - AVHWFramesContext *hwframes; -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 4cc164886c3b..a4176448d595 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -110,8 +110,7 @@ typedef struct V4L2Context { - int no_pts_rescale; - - AVBufferRef *frames_ref; -- int q_count; -- int dq_count; -+ atomic_int q_count; - struct ff_weak_link_master *wl_master; - - AVMutex lock; - -From c103328f28905a96632afece258c14cb726a7c48 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 9 Dec 2021 12:01:25 +0000 -Subject: [PATCH 037/186] Clear pkt_buf on flush - ---- - libavcodec/v4l2_m2m_dec.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 7e7e4729d08b..09ec4963517b 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -715,6 +715,9 @@ static void v4l2_decode_flush(AVCodecContext *avctx) - if (ret < 0) - av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s error: %d\n", output->name, ret); - -+ // Clear any buffered input packet -+ av_packet_unref(&s->buf_pkt); -+ - // V4L2 makes no guarantees about whether decoded frames are flushed or not - // so mark all frames we are tracking to be discarded if they appear - xlat_flush(&s->xlat); - -From b7552e6e913b0b894106f735465742dbba915bba Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 15 Dec 2021 12:52:56 +0000 -Subject: [PATCH 038/186] Rework v4l2 buffer dequeue - ---- - libavcodec/v4l2_context.c | 543 ++++++++++++++++++-------------------- - libavcodec/v4l2_context.h | 2 + - libavcodec/v4l2_m2m.c | 1 - - libavcodec/v4l2_m2m.h | 16 +- - libavcodec/v4l2_m2m_dec.c | 138 ++++------ - 5 files changed, 327 insertions(+), 373 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 268a057e53cc..d765181645fb 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -73,19 +73,27 @@ static AVRational v4l2_get_sar(V4L2Context *ctx) - return sar; - } - --static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2) -+static inline int ctx_buffers_alloced(const V4L2Context * const ctx) - { -- struct v4l2_format *fmt1 = &ctx->format; -- int ret = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? -- fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || -- fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height -- : -- fmt1->fmt.pix.width != fmt2->fmt.pix.width || -- fmt1->fmt.pix.height != fmt2->fmt.pix.height; -+ return ctx->bufrefs != NULL; -+} -+ -+// Width/Height changed or we don't have an alloc in the first place? -+static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) -+{ -+ const struct v4l2_format *fmt1 = &ctx->format; -+ int ret = !ctx_buffers_alloced(ctx) || -+ (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? -+ fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || -+ fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height -+ : -+ fmt1->fmt.pix.width != fmt2->fmt.pix.width || -+ fmt1->fmt.pix.height != fmt2->fmt.pix.height); - - if (ret) -- av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n", -+ av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n", - ctx->name, -+ ctx_buffers_alloced(ctx), - ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1), - ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2)); - -@@ -167,10 +175,8 @@ static int do_source_change(V4L2m2mContext * const s) - - int ret; - int reinit; -- int full_reinit; - struct v4l2_format cap_fmt = s->capture.format; - -- s->resize_pending = 0; - s->capture.done = 0; - - ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); -@@ -179,15 +185,21 @@ static int do_source_change(V4L2m2mContext * const s) - return 0; - } - -- s->output.sample_aspect_ratio = v4l2_get_sar(&s->output); -- - get_default_selection(&s->capture, &s->capture.selection); - -- reinit = v4l2_resolution_changed(&s->capture, &cap_fmt); -+ reinit = ctx_resolution_changed(&s->capture, &cap_fmt); -+ s->capture.format = cap_fmt; - if (reinit) { - s->capture.height = ff_v4l2_get_format_height(&cap_fmt); - s->capture.width = ff_v4l2_get_format_width(&cap_fmt); - } -+ -+ // If we don't support selection (or it is bust) and we obviously have HD then kludge -+ if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) && -+ (s->capture.height == 1088 && s->capture.width == 1920)) { -+ s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080}; -+ } -+ - s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); - - av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d\n", -@@ -195,11 +207,11 @@ static int do_source_change(V4L2m2mContext * const s) - s->capture.selection.width, s->capture.selection.height, - s->capture.selection.left, s->capture.selection.top); - -- s->reinit = 1; -- - if (reinit) { - if (avctx) -- ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height); -+ ret = ff_set_dimensions(s->avctx, -+ s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width, -+ s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height); - if (ret < 0) - av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n"); - -@@ -208,11 +220,22 @@ static int do_source_change(V4L2m2mContext * const s) - av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n"); - return AVERROR(EINVAL); - } -+ -+ // Update pixel format - should only actually do something on initial change -+ s->capture.av_pix_fmt = -+ ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO); -+ if (s->output_drm) { -+ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; -+ avctx->sw_pix_fmt = s->capture.av_pix_fmt; -+ } -+ else -+ avctx->pix_fmt = s->capture.av_pix_fmt; -+ - goto reinit_run; - } - - /* Buffers are OK so just stream off to ack */ -- av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only\n", __func__); -+ av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__); - - ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); - if (ret) -@@ -225,54 +248,6 @@ reinit_run: - return 1; - } - --static int ctx_done(V4L2Context * const ctx) --{ -- int rv = 0; -- V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -- -- ctx->done = 1; -- -- if (s->resize_pending && !V4L2_TYPE_IS_OUTPUT(ctx->type)) -- rv = do_source_change(s); -- -- return rv; --} -- --/** -- * handle resolution change event and end of stream event -- * returns 1 if reinit was successful, negative if it failed -- * returns 0 if reinit was not executed -- */ --static int v4l2_handle_event(V4L2Context *ctx) --{ -- V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -- struct v4l2_event evt = { 0 }; -- int ret; -- -- ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt); -- if (ret < 0) { -- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name); -- return 0; -- } -- -- av_log(logger(ctx), AV_LOG_INFO, "Dq event %d\n", evt.type); -- -- if (evt.type == V4L2_EVENT_EOS) { --// ctx->done = 1; -- av_log(logger(ctx), AV_LOG_TRACE, "%s VIDIOC_EVENT_EOS\n", ctx->name); -- return 0; -- } -- -- if (evt.type != V4L2_EVENT_SOURCE_CHANGE) -- return 0; -- -- s->resize_pending = 1; -- if (!ctx->done) -- return 0; -- -- return do_source_change(s); --} -- - static int v4l2_stop_decode(V4L2Context *ctx) - { - struct v4l2_decoder_cmd cmd = { -@@ -313,243 +288,252 @@ static int v4l2_stop_encode(V4L2Context *ctx) - return 0; - } - --static int count_in_driver(const V4L2Context * const ctx) -+// DQ a buffer -+// Amalgamates all the various ways there are of signalling EOS/Event to -+// generate a consistant EPIPE. -+// -+// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped) -+// -+// Returns: -+// 0 Success -+// AVERROR(EPIPE) Nothing more to read -+// * AVERROR(..) -+ -+ static int -+dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf) - { -- int i; -- int n = 0; -+ V4L2m2mContext * const m = ctx_to_m2mctx(ctx); -+ AVCodecContext * const avctx = m->avctx; -+ V4L2Buffer * avbuf; -+ const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type); - -- if (!ctx->bufrefs) -- return -1; -- -- for (i = 0; i < ctx->num_buffers; ++i) { -- V4L2Buffer *const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -- if (avbuf->status == V4L2BUF_IN_DRIVER) -- ++n; -- } -- return n; --} -+ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; - --static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout) --{ -- V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -- const int is_capture = !V4L2_TYPE_IS_OUTPUT(ctx->type); -- struct v4l2_plane planes[VIDEO_MAX_PLANES]; -- struct v4l2_buffer buf = { 0 }; -- V4L2Buffer *avbuf; -- struct pollfd pfd = { -- .events = POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */ -- .fd = ctx_to_m2mctx(ctx)->fd, -+ struct v4l2_buffer buf = { -+ .type = ctx->type, -+ .memory = V4L2_MEMORY_MMAP, - }; -- int i, ret; -- int no_rx_means_done = 0; -- -- if (is_capture && ctx->bufrefs) { -- for (i = 0; i < ctx->num_buffers; i++) { -- avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -- if (avbuf->status == V4L2BUF_IN_DRIVER) -- break; -- } -- if (i == ctx->num_buffers) -- av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers (%d) returned to " -- "userspace. Increase num_capture_buffers " -- "to prevent device deadlock or dropped " -- "packets/frames.\n", i); -+ -+ *ppavbuf = NULL; -+ -+ if (ctx->flag_last) -+ return AVERROR(EPIPE); -+ -+ if (is_mp) { -+ buf.length = VIDEO_MAX_PLANES; -+ buf.m.planes = planes; - } - --#if 0 -- // I think this is true but pointless -- // we will get some other form of EOF signal -- -- /* if we are draining and there are no more capture buffers queued in the driver we are done */ -- if (is_capture && ctx_to_m2mctx(ctx)->draining) { -- for (i = 0; i < ctx->num_buffers; i++) { -- /* capture buffer initialization happens during decode hence -- * detection happens at runtime -- */ -- if (!ctx->bufrefs) -- break; -- -- avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -- if (avbuf->status == V4L2BUF_IN_DRIVER) -- goto start; -+ while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) { -+ const int err = errno; -+ av_assert0(AVERROR(err) < 0); -+ if (err != EINTR) { -+ av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", -+ ctx->name, av_err2str(AVERROR(err))); -+ -+ if (err == EPIPE) -+ ctx->flag_last = 1; -+ -+ return AVERROR(err); - } -- ctx->done = 1; -- return NULL; - } --#endif -- --start: -- if (is_capture) { -- /* no need to listen to requests for more input while draining */ -- if (ctx_to_m2mctx(ctx)->draining || timeout > 0) -- pfd.events = POLLIN | POLLRDNORM | POLLPRI; -- } else { -- pfd.events = POLLOUT | POLLWRNORM; -+ atomic_fetch_sub(&ctx->q_count, 1); -+ -+ avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; -+ avbuf->status = V4L2BUF_AVAILABLE; -+ avbuf->buf = buf; -+ if (is_mp) { -+ memcpy(avbuf->planes, planes, sizeof(planes)); -+ avbuf->buf.m.planes = avbuf->planes; - } -- no_rx_means_done = s->resize_pending && is_capture; - -- for (;;) { -- // If we have a resize pending then all buffers should be Qed -- // With a resize pending we should be in drain but evidence suggests -- // that not all decoders do this so poll to clear -- int t2 = no_rx_means_done ? 0 : timeout < 0 ? 3000 : timeout; -- const int e = pfd.events; -- -- ret = poll(&pfd, 1, t2); -+ if (V4L2_TYPE_IS_CAPTURE(ctx->type)) { -+ // Zero length cap buffer return == EOS -+ if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) { -+ av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n"); - -- if (ret > 0) -- break; -+ // Must reQ so we don't leak -+ // May not matter if the next thing we do is release all the -+ // buffers but better to be tidy. -+ ff_v4l2_buffer_enqueue(avbuf); - -- if (ret < 0) { -- int err = errno; -- if (err == EINTR) -- continue; -- av_log(logger(ctx), AV_LOG_ERROR, "=== poll error %d (%s): events=%#x, cap buffers=%d\n", -- err, strerror(err), -- e, count_in_driver(ctx)); -- return NULL; -+ ctx->flag_last = 1; -+ return AVERROR(EPIPE); - } - -- // ret == 0 (timeout) -- if (no_rx_means_done) { -- av_log(logger(ctx), AV_LOG_DEBUG, "Ctx done on timeout\n"); -- ret = ctx_done(ctx); -- if (ret > 0) -- goto start; -- } -- if (timeout == -1) -- av_log(logger(ctx), AV_LOG_ERROR, "=== poll unexpected TIMEOUT: events=%#x, cap buffers=%d\n", e, count_in_driver(ctx));; -- return NULL; -+#ifdef V4L2_BUF_FLAG_LAST -+ // If flag_last set then this contains data but is the last frame -+ // so remember that but return OK -+ if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0) -+ ctx->flag_last = 1; -+#endif - } - -- /* 0. handle errors */ -- if (pfd.revents & POLLERR) { -- /* if we are trying to get free buffers but none have been queued yet -- no need to raise a warning */ -- if (timeout == 0) { -- for (i = 0; i < ctx->num_buffers; i++) { -- avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -- if (avbuf->status != V4L2BUF_AVAILABLE) -- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); -- } -- } -- else -- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); -+ *ppavbuf = avbuf; -+ return 0; -+} - -- return NULL; -- } -+/** -+ * handle resolution change event and end of stream event -+ * Expects to be called after the stream has stopped -+ * -+ * returns 1 if reinit was successful, negative if it failed -+ * returns 0 if reinit was not executed -+ */ -+static int -+get_event(V4L2m2mContext * const m) -+{ -+ AVCodecContext * const avctx = m->avctx; -+ struct v4l2_event evt = { 0 }; - -- /* 1. handle resolution changes */ -- if (pfd.revents & POLLPRI) { -- ret = v4l2_handle_event(ctx); -- if (ret < 0) { -- /* if re-init failed, abort */ -- ctx->done = 1; -- return NULL; -+ while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) { -+ const int rv = AVERROR(errno); -+ if (rv == AVERROR(EINTR)) -+ continue; -+ if (rv == AVERROR(EAGAIN)) { -+ av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n"); -+ return AVERROR_EOF; - } -- if (ret > 0) -- goto start; -+ av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv)); -+ return rv; -+ } -+ -+ av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type); -+ -+ if (evt.type == V4L2_EVENT_EOS) { -+ av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n"); -+ return AVERROR_EOF; - } - -- /* 2. dequeue the buffer */ -- if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) { -+ if (evt.type == V4L2_EVENT_SOURCE_CHANGE) -+ return do_source_change(m); - -- if (is_capture) { -- /* there is a capture buffer ready */ -- if (pfd.revents & (POLLIN | POLLRDNORM)) -- goto dequeue; -+ return 0; -+} - -- // CAPTURE Q drained -- if (no_rx_means_done) { -- if (ctx_done(ctx) > 0) -- goto start; -- return NULL; -- } - -- /* the driver is ready to accept more input; instead of waiting for the capture -- * buffer to complete we return NULL so input can proceed (we are single threaded) -- */ -- if (pfd.revents & (POLLOUT | POLLWRNORM)) -- return NULL; -+// Get a buffer -+// If output then just gets the buffer in the expected way -+// If capture then runs the capture state m/c to deal with res change etc. -+// If return value == 0 then *ppavbuf != NULL -+ -+static int -+get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout) -+{ -+ V4L2m2mContext * const m = ctx_to_m2mctx(ctx); -+ AVCodecContext * const avctx = m->avctx; -+ const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type); -+ -+ const unsigned int poll_cap = (POLLIN | POLLRDNORM); -+ const unsigned int poll_out = (POLLOUT | POLLWRNORM); -+ const unsigned int poll_event = POLLPRI; -+ -+ *ppavbuf = NULL; -+ -+ for (;;) { -+ struct pollfd pfd = { -+ .fd = m->fd, -+ // If capture && stream not started then assume we are waiting for the initial event -+ .events = !is_cap ? poll_out : -+ !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap : -+ poll_event, -+ }; -+ int ret; -+ -+ if (ctx->done) { -+ av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name); -+ return AVERROR_EOF; - } - --dequeue: -- memset(&buf, 0, sizeof(buf)); -- buf.memory = V4L2_MEMORY_MMAP; -- buf.type = ctx->type; -- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { -- memset(planes, 0, sizeof(planes)); -- buf.length = VIDEO_MAX_PLANES; -- buf.m.planes = planes; -+ // If capture && timeout == -1 then also wait for rx buffer free -+ if (is_cap && timeout == -1 && m->output.streamon && !m->draining) -+ pfd.events |= poll_out; -+ -+ // If nothing Qed all we will get is POLLERR - avoid that -+ if ((pfd.events == poll_out && atomic_load(&m->output.q_count) == 0) || -+ (pfd.events == poll_cap && atomic_load(&m->capture.q_count) == 0) || -+ (pfd.events == (poll_cap | poll_out) && atomic_load(&m->capture.q_count) == 0 && atomic_load(&m->output.q_count) == 0)) { -+ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name); -+ return AVERROR(EAGAIN); - } - -- while ((ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf)) == -1) { -- const int err = errno; -- if (err == EINTR) -+ // Timeout kludged s.t. "forever" eventually gives up & produces logging -+ // If waiting for an event when we have seen a last_frame then we expect -+ // it to be ready already so force a short timeout -+ ret = poll(&pfd, 1, -+ ff_v4l2_ctx_eos(ctx) ? 10 : -+ timeout == -1 ? 3000 : timeout); -+ if (ret < 0) { -+ ret = AVERROR(errno); // Remember errno before logging etc. -+ av_assert0(ret < 0); -+ } -+ -+ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n", -+ ctx->name, ret, timeout, pfd.events, pfd.revents); -+ -+ if (ret < 0) { -+ if (ret == AVERROR(EINTR)) - continue; -- if (err != EAGAIN) { -- // EPIPE on CAPTURE can be used instead of BUF_FLAG_LAST -- if (err != EPIPE || !is_capture) -- av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", -- ctx->name, av_err2str(AVERROR(err))); -- if (ctx_done(ctx) > 0) -- goto start; -+ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret)); -+ return ret; -+ } -+ -+ if (ret == 0) { -+ if (timeout == -1) -+ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events); -+ if (ff_v4l2_ctx_eos(ctx)) { -+ av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name); -+ ret = get_event(m); -+ if (ret < 0) { -+ ctx->done = 1; -+ return ret; -+ } - } -- return NULL; -+ return AVERROR(EAGAIN); - } -- --ctx->q_count; -- av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d field=%d\n", -- ctx->name, buf.index, -- buf.timestamp.tv_sec, buf.timestamp.tv_usec, -- ctx->q_count, ++ctx->dq_count, buf.field); -- -- avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; -- avbuf->status = V4L2BUF_AVAILABLE; -- avbuf->buf = buf; -- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { -- memcpy(avbuf->planes, planes, sizeof(planes)); -- avbuf->buf.m.planes = avbuf->planes; -+ -+ if ((pfd.revents & POLLERR) != 0) { -+ av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name); -+ return AVERROR_UNKNOWN; - } - -- if (ctx_to_m2mctx(ctx)->draining && is_capture) { -- int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ? -- buf.m.planes[0].bytesused : buf.bytesused; -- if (bytesused == 0) { -- av_log(logger(ctx), AV_LOG_DEBUG, "Buffer empty - reQ\n"); -+ if ((pfd.revents & poll_event) != 0) { -+ ret = get_event(m); -+ if (ret < 0) { -+ ctx->done = 1; -+ return ret; -+ } -+ continue; -+ } - -- // Must reQ so we don't leak -- // May not matter if the next thing we do is release all the -- // buffers but better to be tidy. -- ff_v4l2_buffer_enqueue(avbuf); -+ if ((pfd.revents & poll_cap) != 0) { -+ ret = dq_buf(ctx, ppavbuf); -+ if (ret == AVERROR(EPIPE)) -+ continue; -+ return ret; -+ } - -- if (ctx_done(ctx) > 0) -- goto start; -- return NULL; -- } --#ifdef V4L2_BUF_FLAG_LAST -- if (buf.flags & V4L2_BUF_FLAG_LAST) { -- av_log(logger(ctx), AV_LOG_TRACE, "FLAG_LAST set\n"); -- avbuf->status = V4L2BUF_IN_USE; // Avoid flushing this buffer -- ctx_done(ctx); -- } --#endif -+ if ((pfd.revents & poll_out) != 0) { -+ if (is_cap) -+ return AVERROR(EAGAIN); -+ return dq_buf(ctx, ppavbuf); - } - -- return avbuf; -+ av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents); -+ return AVERROR_UNKNOWN; - } -- -- return NULL; - } - - static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) - { -- int timeout = 0; /* return when no more buffers to dequeue */ - int i; - - /* get back as many output buffers as possible */ - if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { -- do { -- } while (v4l2_dequeue_v4l2buf(ctx, timeout)); -+ V4L2Buffer * avbuf; -+ do { -+ get_qbuf(ctx, &avbuf, 0); -+ } while (avbuf); - } - - for (i = 0; i < ctx->num_buffers; i++) { -@@ -722,7 +706,7 @@ static void flush_all_buffers_status(V4L2Context* const ctx) - if (buf->status == V4L2BUF_IN_DRIVER) - buf->status = V4L2BUF_AVAILABLE; - } -- ctx->q_count = 0; -+ atomic_store(&ctx->q_count, 0); - } - - static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx) -@@ -755,6 +739,10 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) - int ret; - AVCodecContext * const avctx = logger(ctx); - -+ // Avoid doing anything if there is nothing we can do -+ if (cmd == VIDIOC_STREAMOFF && !ctx_buffers_alloced(ctx) && !ctx->streamon) -+ return 0; -+ - ff_mutex_lock(&ctx->lock); - - if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type)) -@@ -777,6 +765,9 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) - cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF"); - } - -+ // Both stream off & on effectively clear flag_last -+ ctx->flag_last = 0; -+ - ff_mutex_unlock(&ctx->lock); - - return ret; -@@ -840,19 +831,10 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, - int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) - { - V4L2Buffer *avbuf; -+ int rv; - -- /* -- * timeout=-1 blocks until: -- * 1. decoded frame available -- * 2. an input buffer is ready to be dequeued -- */ -- avbuf = v4l2_dequeue_v4l2buf(ctx, timeout); -- if (!avbuf) { -- if (ctx->done) -- return AVERROR_EOF; -- -- return AVERROR(EAGAIN); -- } -+ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) -+ return rv; - - return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); - } -@@ -860,19 +842,10 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) - int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) - { - V4L2Buffer *avbuf; -+ int rv; - -- /* -- * blocks until: -- * 1. encoded packet available -- * 2. an input buffer ready to be dequeued -- */ -- avbuf = v4l2_dequeue_v4l2buf(ctx, -1); -- if (!avbuf) { -- if (ctx->done) -- return AVERROR_EOF; -- -- return AVERROR(EAGAIN); -- } -+ if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) -+ return rv; - - return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); - } -@@ -956,6 +929,8 @@ static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers - int ret; - int i; - -+ av_assert0(ctx->bufrefs == NULL); -+ - memset(&req, 0, sizeof(req)); - req.count = req_buffers; - req.memory = V4L2_MEMORY_MMAP; -@@ -1033,8 +1008,8 @@ int ff_v4l2_context_init(V4L2Context* ctx) - hwframes = (AVHWFramesContext*)ctx->frames_ref->data; - hwframes->format = AV_PIX_FMT_DRM_PRIME; - hwframes->sw_format = ctx->av_pix_fmt; -- hwframes->width = ctx->width; -- hwframes->height = ctx->height; -+ hwframes->width = ctx->width != 0 ? ctx->width : s->avctx->width; -+ hwframes->height = ctx->height != 0 ? ctx->height : s->avctx->height; - ret = av_hwframe_ctx_init(ctx->frames_ref); - if (ret < 0) - goto fail_unref_hwframes; -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index a4176448d595..565858a1ed17 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -102,6 +102,8 @@ typedef struct V4L2Context { - */ - int done; - -+ int flag_last; -+ - /** - * PTS rescale not wanted - * If the PTS is just a dummy frame count then rescale is -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index 516e6d98583d..e26bd74c3e9a 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -235,7 +235,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) - - /* 5. complete reinit */ - s->draining = 0; -- s->reinit = 0; - - return 0; - } -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 3f8680962342..d71f6b721c94 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -84,8 +84,6 @@ typedef struct V4L2m2mContext { - AVCodecContext *avctx; - sem_t refsync; - atomic_uint refcount; -- int reinit; -- int resize_pending; - - /* null frame/packet received */ - int draining; -@@ -180,15 +178,25 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx); - int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx); - - --static inline unsigned int ff_v4l2_get_format_width(struct v4l2_format *fmt) -+static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt) - { - return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; - } - --static inline unsigned int ff_v4l2_get_format_height(struct v4l2_format *fmt) -+static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt) - { - return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; - } - -+static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt) -+{ -+ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; -+} -+ -+static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx) -+{ -+ return ctx->flag_last; -+} -+ - - #endif /* AVCODEC_V4L2_M2M_H */ -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 09ec4963517b..e4b6569ba5bd 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -113,9 +113,6 @@ static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *co - if (ret < 0) - av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context\n"); - -- if (!s->capture.streamon || ret < 0) -- return ret; -- - ret = ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd); - if (ret < 0) - av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno); -@@ -127,69 +124,12 @@ static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *co - - static int v4l2_try_start(AVCodecContext *avctx) - { -- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; -- V4L2Context *const capture = &s->capture; -- struct v4l2_selection selection = { 0 }; -+ V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context; - int ret; - - /* 1. start the output process */ - if ((ret = check_output_streamon(avctx, s)) != 0) - return ret; -- -- if (capture->streamon) -- return 0; -- -- /* 2. get the capture format */ -- capture->format.type = capture->type; -- ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format); -- if (ret) { -- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n"); -- return ret; -- } -- -- /* 2.1 update the AVCodecContext */ -- capture->av_pix_fmt = -- ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); -- if (s->output_drm) { -- avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; -- avctx->sw_pix_fmt = capture->av_pix_fmt; -- } -- else -- avctx->pix_fmt = capture->av_pix_fmt; -- -- /* 3. set the crop parameters */ --#if 1 -- selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -- selection.target = V4L2_SEL_TGT_CROP_DEFAULT; -- ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); -- av_log(avctx, AV_LOG_INFO, "Post G selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height); --#else -- selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -- selection.r.height = avctx->coded_height; -- selection.r.width = avctx->coded_width; -- av_log(avctx, AV_LOG_INFO, "Try selection %dx%d\n", avctx->coded_width, avctx->coded_height); -- ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection); -- av_log(avctx, AV_LOG_INFO, "Post S selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height); -- if (1) { -- ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); -- if (ret) { -- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n"); -- } else { -- av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height); -- /* update the size of the resulting frame */ -- capture->height = selection.r.height; -- capture->width = selection.r.width; -- } -- } --#endif -- -- /* 5. start the capture process */ -- ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); -- if (ret) { -- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n"); -- return ret; -- } -- - return 0; - } - -@@ -364,7 +304,7 @@ xlat_pending(const xlat_track_t * const x) - } - - static inline int stream_started(const V4L2m2mContext * const s) { -- return s->capture.streamon && s->output.streamon; -+ return s->output.streamon; - } - - #define NQ_OK 0 -@@ -377,6 +317,9 @@ static inline int stream_started(const V4L2m2mContext * const s) { - #define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING) - #define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE) - -+// do_not_get If true then no new packet will be got but status will -+// be set appropriately -+ - // AVERROR_EOF Flushing an already flushed stream - // -ve Error (all errors except EOF are unexpected) - // NQ_OK (0) OK -@@ -386,14 +329,14 @@ static inline int stream_started(const V4L2m2mContext * const s) { - // NQ_DRAINING At EOS, dQ dest until EOS there too - // NQ_DEAD Not running (do not retry, do not attempt capture dQ) - --static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s) -+static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get) - { - int ret; - - // If we don't already have a coded packet - get a new one - // We will already have a coded pkt if the output Q was full last time we - // tried to Q it -- if (!s->buf_pkt.size) { -+ if (!s->buf_pkt.size && !do_not_get) { - ret = ff_decode_get_packet(avctx, &s->buf_pkt); - - if (ret == AVERROR(EAGAIN)) { -@@ -435,6 +378,17 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - xlat_pts_in(avctx, &s->xlat, &s->buf_pkt); - } - -+ if (s->draining) { -+ if (s->buf_pkt.size) { -+ av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n"); -+ av_packet_unref(&s->buf_pkt); -+ } -+ return NQ_DRAINING; -+ } -+ -+ if (!s->buf_pkt.size) -+ return NQ_NONE; -+ - if ((ret = check_output_streamon(avctx, s)) != 0) - return ret; - -@@ -471,7 +425,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - { - V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; -- int src_rv = NQ_NONE; -+ int src_rv; - int dst_rv = 1; // Non-zero (done), non-negative (error) number - unsigned int i = 0; - -@@ -483,31 +437,40 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - // (a) We don't have a lot of stuff in the buffer already OR - // (b) ... we (think we) do but we've failed to get a frame already OR - // (c) We've dequeued a lot of frames without asking for input -- if (!prefer_dq || i != 0 || s->req_pkt > 2) { -- src_rv = try_enqueue_src(avctx, s); -- -- // If we got a frame last time or we've already tried to get a frame and -- // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN) -- // indicating that we want more input. -- // This should mean that once decode starts we enter a stable state where -- // we alternately ask for input and produce output -- if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY) -- break; -- } -+ src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2)); -+ -+ // If we got a frame last time or we've already tried to get a frame and -+ // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN) -+ // indicating that we want more input. -+ // This should mean that once decode starts we enter a stable state where -+ // we alternately ask for input and produce output -+ if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY) -+ break; - - // Try to get a new frame if - // (a) we haven't already got one AND - // (b) enqueue returned a status indicating that decode should be attempted - if (dst_rv != 0 && TRY_DQ(src_rv)) { -+ // Pick a timeout depending on state -+ const int t = -+ src_rv == NQ_DRAINING ? 300 : -+ prefer_dq ? 5 : -+ src_rv == NQ_Q_FULL ? -1 : 0; -+ - do { - // Dequeue frame will unref any previous contents of frame - // if it returns success so we don't need an explicit unref - // when discarding - // This returns AVERROR(EAGAIN) on timeout or if - // there is room in the input Q and timeout == -1 -- dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, prefer_dq ? 5 : -1); -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); - -- if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) -+ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { -+ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); -+ dst_rv = AVERROR_EOF; -+ s->capture.done = 1; -+ } -+ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) - av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", - s->draining, s->capture.done); - else if (dst_rv && dst_rv != AVERROR(EAGAIN)) -@@ -630,8 +593,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - * by the v4l2 driver; this event will trigger a full pipeline reconfig and - * the proper values will be retrieved from the kernel driver. - */ -- output->height = capture->height = avctx->coded_height; -- output->width = capture->width = avctx->coded_width; -+// output->height = capture->height = avctx->coded_height; -+// output->width = capture->width = avctx->coded_width; -+ output->height = capture->height = 0; -+ output->width = capture->width = 0; - - output->av_codec_id = avctx->codec_id; - output->av_pix_fmt = AV_PIX_FMT_NONE; -@@ -703,7 +668,6 @@ static void v4l2_decode_flush(AVCodecContext *avctx) - V4L2m2mContext * const s = priv->context; - V4L2Context * const output = &s->output; - V4L2Context * const capture = &s->capture; -- int ret; - - av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon); - -@@ -711,13 +675,19 @@ static void v4l2_decode_flush(AVCodecContext *avctx) - // states like EOS processing so don't try to optimize out (having got it - // wrong once) - -- ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF); -- if (ret < 0) -- av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s error: %d\n", output->name, ret); -+ ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF); - - // Clear any buffered input packet - av_packet_unref(&s->buf_pkt); - -+ // Clear a pending EOS -+ if (ff_v4l2_ctx_eos(capture)) { -+ // Arguably we could delay this but this is easy and doesn't require -+ // thought or extra vars -+ ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF); -+ ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); -+ } -+ - // V4L2 makes no guarantees about whether decoded frames are flushed or not - // so mark all frames we are tracking to be discarded if they appear - xlat_flush(&s->xlat); - -From 935dad1739bafaa8bf8e24d9461207b71af0d617 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 9 Dec 2021 18:51:00 +0000 -Subject: [PATCH 039/186] Honor result of ff_get_format if possible - ---- - libavcodec/v4l2_m2m_dec.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index e4b6569ba5bd..c9655bcc3b43 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -615,15 +615,19 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - * check the v4l2_get_drm_frame function. - */ - -+ avctx->sw_pix_fmt = avctx->pix_fmt; - gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); - av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s); get_format requested=%d (%s)\n", - avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); - -- s->output_drm = 0; - if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { - avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; - s->output_drm = 1; - } -+ else { -+ capture->av_pix_fmt = gf_pix_fmt; -+ s->output_drm = 0; -+ } - - s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); - if (!s->device_ref) { - -From 18e485cf1252bba30cfd5feef626ad9d90fcde6a Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 14 Dec 2021 16:11:10 +0000 -Subject: [PATCH 040/186] Add an always-reinit quirk - ---- - libavcodec/v4l2_context.c | 7 +++++-- - libavcodec/v4l2_m2m.h | 5 +++++ - libavcodec/v4l2_m2m_dec.c | 33 ++++++++++++++++++++++++++++++++- - 3 files changed, 42 insertions(+), 3 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index d765181645fb..c11b5e68637d 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -188,6 +188,9 @@ static int do_source_change(V4L2m2mContext * const s) - get_default_selection(&s->capture, &s->capture.selection); - - reinit = ctx_resolution_changed(&s->capture, &cap_fmt); -+ if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0) -+ reinit = 1; -+ - s->capture.format = cap_fmt; - if (reinit) { - s->capture.height = ff_v4l2_get_format_height(&cap_fmt); -@@ -202,10 +205,10 @@ static int do_source_change(V4L2m2mContext * const s) - - s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); - -- av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d\n", -+ av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d, reinit=%d\n", - s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den, - s->capture.selection.width, s->capture.selection.height, -- s->capture.selection.left, s->capture.selection.top); -+ s->capture.selection.left, s->capture.selection.top, reinit); - - if (reinit) { - if (avctx) -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index d71f6b721c94..f1923bb26d57 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -113,6 +113,11 @@ typedef struct V4L2m2mContext { - - /* Ext data sent */ - int extdata_sent; -+ -+#define FF_V4L2_QUIRK_REINIT_ALWAYS 1 -+ /* Quirks */ -+ unsigned int quirks; -+ - } V4L2m2mContext; - - typedef struct V4L2m2mPriv { -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index c9655bcc3b43..e2b10f5e3ac3 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -540,6 +540,34 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - } - #endif - -+static int -+get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s) -+{ -+ struct v4l2_capability cap; -+ -+ memset(&cap, 0, sizeof(cap)); -+ while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) { -+ int err = errno; -+ if (err == EINTR) -+ continue; -+ av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err)); -+ return AVERROR(err); -+ } -+ -+ // Could be made table driven if we have a few more but right now there -+ // seems no point -+ -+ // Meson (amlogic) always gives a resolution changed event after output -+ // streamon and userspace must (re)allocate capture buffers and streamon -+ // capture to clear the event even if the capture buffers were the right -+ // size in the first place. -+ if (strcmp(cap.driver, "meson-vdec") == 0) -+ s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS; -+ -+ av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks); -+ return 0; -+} -+ - // This heuristic is for H264 but use for everything - static uint32_t max_coded_size(const AVCodecContext * const avctx) - { -@@ -646,7 +674,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - return ret; - } - -- return v4l2_prepare_decoder(s); -+ if ((ret = v4l2_prepare_decoder(s)) < 0) -+ return ret; -+ -+ return get_quirks(avctx, s); - } - - static av_cold int v4l2_decode_close(AVCodecContext *avctx) - -From 9536a97e9a1119192cdb67b33799e68f39ce7630 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 4 Jan 2022 16:58:31 +0000 -Subject: [PATCH 041/186] v4l2_buffers: rework flags for keyframe - -Previously flags could become confused and keyframe info could be lost. -This fixes that and removes the duplicate flags field in V4L2Buffer. ---- - libavcodec/v4l2_buffers.c | 15 ++++++++++----- - libavcodec/v4l2_buffers.h | 1 - - libavcodec/v4l2_context.c | 18 +++++++++++++++++- - 3 files changed, 27 insertions(+), 7 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 2cf7be663263..62d1c2605363 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -680,7 +680,9 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - - int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - { -- out->buf.flags = frame->key_frame ? (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME) : (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME); -+ out->buf.flags = frame->key_frame ? -+ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : -+ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); - // Beware that colour info is held in format rather than the actual - // v4l2 buffer struct so this may not be as useful as you might hope - v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); -@@ -706,6 +708,10 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) - - /* 2. get frame information */ - frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME); -+ frame->pict_type = frame->key_frame ? AV_PICTURE_TYPE_I : -+ (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P : -+ (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B : -+ AV_PICTURE_TYPE_NONE; - frame->color_primaries = v4l2_get_color_primaries(avbuf); - frame->colorspace = v4l2_get_color_space(avbuf); - frame->color_range = v4l2_get_color_range(avbuf); -@@ -779,8 +785,9 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, - - v4l2_set_pts(out, pkt->pts); - -- if (pkt->flags & AV_PKT_FLAG_KEY) -- out->flags = V4L2_BUF_FLAG_KEYFRAME; -+ out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ? -+ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : -+ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); - - return ret; - } -@@ -924,8 +931,6 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) - int ret; - int qc; - -- avbuf->buf.flags = avbuf->flags; -- - if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) { - av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", - avbuf->context->name, avbuf->buf.index, -diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index 641e0e147b19..3b7ca4d99e1e 100644 ---- a/libavcodec/v4l2_buffers.h -+++ b/libavcodec/v4l2_buffers.h -@@ -73,7 +73,6 @@ typedef struct V4L2Buffer { - struct v4l2_buffer buf; - struct v4l2_plane planes[VIDEO_MAX_PLANES]; - -- int flags; - enum V4L2Buffer_status status; - - } V4L2Buffer; -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index c11b5e68637d..53b522d43e09 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -527,6 +527,22 @@ get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout - } - } - -+// Clear out flags and timestamps that should should be set by the user -+// Returns the passed avbuf -+static V4L2Buffer * -+clean_v4l2_buffer(V4L2Buffer * const avbuf) -+{ -+ struct v4l2_buffer *const buf = &avbuf->buf; -+ -+ buf->flags = 0; -+ buf->field = V4L2_FIELD_ANY; -+ buf->timestamp = (struct timeval){0}; -+ buf->timecode = (struct v4l2_timecode){0}; -+ buf->sequence = 0; -+ -+ return avbuf; -+} -+ - static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) - { - int i; -@@ -542,7 +558,7 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) - for (i = 0; i < ctx->num_buffers; i++) { - V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; - if (avbuf->status == V4L2BUF_AVAILABLE) -- return avbuf; -+ return clean_v4l2_buffer(avbuf); - } - - return NULL; - -From a1280d98cefbf5ef7d92a51261ad3485e0a2ca74 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 22 Mar 2022 11:44:30 +0000 -Subject: [PATCH 042/186] v4l2m2m: Rework decode to wait for missing buffer, - add dynamic pending - -Previously receive_frame exited with EAGAIN if no capture buffer -availble in the Q. Now it waits in the hope that another thread will -post one. - -The prefer dQ logic is now dynamic to help with cases where PTS/DTS -lies. If it looks like we are never getting a frame then the -threshold is increased. It then slowly decays over time to cope with -false alarms. ---- - libavcodec/v4l2_buffers.c | 6 +++-- - libavcodec/v4l2_context.c | 7 +++-- - libavcodec/v4l2_context.h | 3 +++ - libavcodec/v4l2_m2m.h | 2 ++ - libavcodec/v4l2_m2m_dec.c | 57 +++++++++++++++++++++++++++++++++++++-- - 5 files changed, 69 insertions(+), 6 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 62d1c2605363..8c4f18dbede2 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -947,12 +947,14 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) - return AVERROR(err); - } - -+ // Lock not wanted - if called from buffer free then lock already obtained - qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1; -+ avbuf->status = V4L2BUF_IN_DRIVER; -+ pthread_cond_broadcast(&avbuf->context->cond); -+ - av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", - avbuf->context->name, avbuf->buf.index, - avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc); - -- avbuf->status = V4L2BUF_IN_DRIVER; -- - return 0; - } -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 53b522d43e09..7ddb7598109c 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -300,6 +300,7 @@ static int v4l2_stop_encode(V4L2Context *ctx) - // Returns: - // 0 Success - // AVERROR(EPIPE) Nothing more to read -+// AVERROR(ENOSPC) No buffers in Q to put result in - // * AVERROR(..) - - static int -@@ -457,7 +458,7 @@ get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout - (pfd.events == poll_cap && atomic_load(&m->capture.q_count) == 0) || - (pfd.events == (poll_cap | poll_out) && atomic_load(&m->capture.q_count) == 0 && atomic_load(&m->output.q_count) == 0)) { - av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name); -- return AVERROR(EAGAIN); -+ return AVERROR(ENOSPC); - } - - // Timeout kludged s.t. "forever" eventually gives up & produces logging -@@ -864,7 +865,7 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) - int rv; - - if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) -- return rv; -+ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC - - return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); - } -@@ -938,6 +939,7 @@ void ff_v4l2_context_release(V4L2Context* ctx) - av_buffer_unref(&ctx->frames_ref); - - ff_mutex_destroy(&ctx->lock); -+ pthread_cond_destroy(&ctx->cond); - } - - -@@ -1013,6 +1015,7 @@ int ff_v4l2_context_init(V4L2Context* ctx) - } - - ff_mutex_init(&ctx->lock, NULL); -+ pthread_cond_init(&ctx->cond, NULL); - atomic_init(&ctx->q_count, 0); - - if (s->output_drm) { -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 565858a1ed17..0efff58f1892 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -116,6 +116,7 @@ typedef struct V4L2Context { - struct ff_weak_link_master *wl_master; - - AVMutex lock; -+ pthread_cond_t cond; - } V4L2Context; - - /** -@@ -182,6 +183,8 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); - * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) - * - * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. -+ * AVERROR(ENOSPC) if no buffer availible to put -+ * the frame in - */ - int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index f1923bb26d57..9a20447030e2 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -105,6 +105,8 @@ typedef struct V4L2m2mContext { - - /* Frame tracking */ - xlat_track_t xlat; -+ int pending_hw; -+ int pending_n; - - pts_stats_t pts_stat; - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index e2b10f5e3ac3..2e30449dfc1b 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -251,7 +251,8 @@ xlat_pts_out(AVCodecContext *const avctx, - - frame->best_effort_timestamp = pts_stats_guess(ps); - frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? -- av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", frame->pts, frame->best_effort_timestamp, frame->pkt_dts); -+ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", -+ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); - return 0; - } - -@@ -422,6 +423,36 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - return ret; - } - -+static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx) -+{ -+ int rv = 0; -+ -+ ff_mutex_lock(&ctx->lock); -+ -+ while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) { -+ if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) { -+ rv = AVERROR(errno); -+ av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv)); -+ break; -+ } -+ } -+ -+ ff_mutex_unlock(&ctx->lock); -+ return rv; -+} -+ -+// Number of frames over what xlat_pending returns that we keep *16 -+// This is a min value - if it appears to be too small the threshold should -+// adjust dynamically. -+#define PENDING_HW_MIN (3 * 16) -+// Offset to use when setting dynamically -+// Set to %16 == 15 to avoid the threshold changing immediately as we relax -+#define PENDING_HW_OFFSET (PENDING_HW_MIN - 1) -+// Number of consecutive times we've failed to get a frame when we prefer it -+// before we increase the prefer threshold (5ms * N = max expected decode -+// time) -+#define PENDING_N_THRESHOLD 6 -+ - static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - { - V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; -@@ -431,7 +462,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - - do { - const int pending = xlat_pending(&s->xlat); -- const int prefer_dq = (pending > 5); -+ const int prefer_dq = (pending > s->pending_hw / 16); - - // Enqueue another pkt for decode if - // (a) We don't have a lot of stuff in the buffer already OR -@@ -465,6 +496,27 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - // there is room in the input Q and timeout == -1 - dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); - -+ // Failure due to no buffer in Q? -+ if (dst_rv == AVERROR(ENOSPC)) { -+ // Wait & retry -+ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); -+ } -+ } -+ -+ // Adjust dynamic pending threshold -+ if (dst_rv == 0) { -+ if (--s->pending_hw < PENDING_HW_MIN) -+ s->pending_hw = PENDING_HW_MIN; -+ s->pending_n = 0; -+ } -+ else if (dst_rv == AVERROR(EAGAIN)) { -+ if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) { -+ s->pending_hw = pending * 16 + PENDING_HW_OFFSET; -+ s->pending_n = 0; -+ } -+ } -+ - if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { - av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); - dst_rv = AVERROR_EOF; -@@ -613,6 +665,7 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - - xlat_init(&s->xlat); - pts_stats_init(&s->pts_stat, avctx, "decoder"); -+ s->pending_hw = PENDING_HW_MIN; - - capture = &s->capture; - output = &s->output; - -From b88b2c555f42688db681aab4d612c29d862246f7 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 25 Mar 2022 15:37:58 +0000 -Subject: [PATCH 043/186] v4l2_m2m2_dec: Avoid loop if unable to resize buffers - -If source change signals a buffer size that cannot be honored give up -rather than looping indefinitely. This happens on Pi if (say) a -2560x1440 h264 stream is presented to the decode. ---- - libavcodec/v4l2_context.c | 13 +++++++++++-- - 1 file changed, 11 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 7ddb7598109c..007a58c8f1db 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -205,8 +205,9 @@ static int do_source_change(V4L2m2mContext * const s) - - s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); - -- av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d, reinit=%d\n", -+ av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n", - s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den, -+ s->capture.width, s->capture.height, - s->capture.selection.width, s->capture.selection.height, - s->capture.selection.left, s->capture.selection.top, reinit); - -@@ -224,9 +225,17 @@ static int do_source_change(V4L2m2mContext * const s) - return AVERROR(EINVAL); - } - -+ if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) || -+ s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) { -+ av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n", -+ s->capture.width, s->capture.height, -+ ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format)); -+ return AVERROR(EINVAL); -+ } -+ - // Update pixel format - should only actually do something on initial change - s->capture.av_pix_fmt = -- ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO); -+ ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO); - if (s->output_drm) { - avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; - avctx->sw_pix_fmt = s->capture.av_pix_fmt; - -From 0ec92a29d4cd4baa7820f20918d78cf117200a3b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 25 Mar 2022 18:14:40 +0000 -Subject: [PATCH 044/186] v4l2dec: Improve size/format validation on init - ---- - libavcodec/v4l2_m2m_dec.c | 84 ++++++++++++++++++++++++++++++++-- - libavcodec/v4l2_request_hevc.c | 11 +++++ - 2 files changed, 92 insertions(+), 3 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 2e30449dfc1b..8dcadf461bb8 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -592,6 +592,76 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - } - #endif - -+static int -+check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) -+{ -+ unsigned int i; -+ const uint32_t fcc = ff_v4l2_get_format_pixelformat(&s->capture.format); -+ const uint32_t w = avctx->coded_width; -+ const uint32_t h = avctx->coded_height; -+ -+ if (w == 0 || h == 0 || fcc == 0) { -+ av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc)); -+ return 0; -+ } -+ -+ for (i = 0;; ++i) { -+ struct v4l2_frmsizeenum fs = { -+ .index = i, -+ .pixel_format = fcc, -+ }; -+ -+ while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) { -+ const int err = AVERROR(errno); -+ if (err == AVERROR(EINTR)) -+ continue; -+ if (i == 0 && err == AVERROR(ENOTTY)) { -+ av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n"); -+ return 0; -+ } -+ if (err != AVERROR(EINVAL)) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err)); -+ return err; -+ } -+ av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in frame size enums\n", -+ w, h, av_fourcc2str(fcc)); -+ return err; -+ } -+ -+ switch (fs.type) { -+ case V4L2_FRMSIZE_TYPE_DISCRETE: -+ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i, -+ fs.discrete.width,fs.discrete.height); -+ if (w == fs.discrete.width && h == fs.discrete.height) -+ return 0; -+ break; -+ case V4L2_FRMSIZE_TYPE_STEPWISE: -+ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, -+ fs.stepwise.min_width, fs.stepwise.min_height, -+ fs.stepwise.max_width, fs.stepwise.max_height, -+ fs.stepwise.step_width,fs.stepwise.step_height); -+ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && -+ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height && -+ (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 && -+ (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0) -+ return 0; -+ break; -+ case V4L2_FRMSIZE_TYPE_CONTINUOUS: -+ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, -+ fs.stepwise.min_width, fs.stepwise.min_height, -+ fs.stepwise.max_width, fs.stepwise.max_height, -+ fs.stepwise.step_width,fs.stepwise.step_height); -+ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && -+ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height) -+ return 0; -+ break; -+ default: -+ av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type); -+ return AVERROR(EINVAL); -+ } -+ } -+} -+ - static int - get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s) - { -@@ -698,8 +768,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - - avctx->sw_pix_fmt = avctx->pix_fmt; - gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); -- av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s); get_format requested=%d (%s)\n", -- avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); -+ av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", -+ avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), -+ avctx->coded_width, avctx->coded_height, -+ gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); - - if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { - avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; -@@ -730,7 +802,13 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - if ((ret = v4l2_prepare_decoder(s)) < 0) - return ret; - -- return get_quirks(avctx, s); -+ if ((ret = get_quirks(avctx, s)) != 0) -+ return ret; -+ -+ if ((ret = check_size(avctx, s)) != 0) -+ return ret; -+ -+ return 0; - } - - static av_cold int v4l2_decode_close(AVCodecContext *avctx) -diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -index b0a5930844a8..76ab0916cd6a 100644 ---- a/libavcodec/v4l2_request_hevc.c -+++ b/libavcodec/v4l2_request_hevc.c -@@ -147,6 +147,17 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - - av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); - -+ // Give up immediately if this is something that we have no code to deal with -+ if (h->ps.sps->chroma_format_idc != 1) { -+ av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", h->ps.sps->chroma_format_idc); -+ return AVERROR_PATCHWELCOME; -+ } -+ if (!(h->ps.sps->bit_depth == 10 || h->ps.sps->bit_depth == 8) || -+ h->ps.sps->bit_depth != h->ps.sps->bit_depth_chroma) { -+ av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", h->ps.sps->bit_depth, h->ps.sps->bit_depth_chroma); -+ return AVERROR_PATCHWELCOME; -+ } -+ - if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) { - av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n"); - return (AVERROR(-ret)); - -From 5d3752246afe17f69f896bfdee6faa61162c948a Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 13 Apr 2022 16:05:56 +0000 -Subject: [PATCH 045/186] v4l2 stateless hevc: Add another API variation for - linux 5.18 - -This is probably going to be a short lived variation and may end up -being reverted if no release using it ever ends up in the wild. ---- - libavcodec/Makefile | 2 +- - libavcodec/hevc-ctrls-v3.h | 255 +++++++++++++++++++++++++++++++++ - libavcodec/v4l2_req_hevc_v3.c | 3 + - libavcodec/v4l2_req_hevc_vx.c | 17 +++ - libavcodec/v4l2_req_media.c | 15 +- - libavcodec/v4l2_req_media.h | 3 + - libavcodec/v4l2_request_hevc.c | 6 +- - libavcodec/v4l2_request_hevc.h | 1 + - 8 files changed, 295 insertions(+), 7 deletions(-) - create mode 100644 libavcodec/hevc-ctrls-v3.h - create mode 100644 libavcodec/v4l2_req_hevc_v3.c - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index e1aa0ba014ed..2b3c16185d75 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -1000,7 +1000,7 @@ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o - OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o - OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o - OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o\ -- v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o -+ v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o - OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o - OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o - OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o diff --git a/libavcodec/hevc-ctrls-v3.h b/libavcodec/hevc-ctrls-v3.h new file mode 100644 index 000000000000..4e35bd583d58 @@ -20101,3363 +1080,12 @@ index 000000000000..4e35bd583d58 +#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) + +#endif -diff --git a/libavcodec/v4l2_req_hevc_v3.c b/libavcodec/v4l2_req_hevc_v3.c -new file mode 100644 -index 000000000000..dcc8d9563209 ---- /dev/null -+++ b/libavcodec/v4l2_req_hevc_v3.c -@@ -0,0 +1,3 @@ -+#define HEVC_CTRLS_VERSION 3 -+#include "v4l2_req_hevc_vx.c" -+ -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -index 0ae03b10c4a8..611fa21cc319 100644 ---- a/libavcodec/v4l2_req_hevc_vx.c -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -16,6 +16,8 @@ - - #elif HEVC_CTRLS_VERSION == 2 - #include "hevc-ctrls-v2.h" -+#elif HEVC_CTRLS_VERSION == 3 -+#include "hevc-ctrls-v3.h" - #else - #error Unknown HEVC_CTRLS_VERSION - #endif -@@ -147,6 +149,7 @@ static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_t - } - } - -+#if HEVC_CTRLS_VERSION <= 2 - static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp) - { - const HEVCFrame *frame; -@@ -172,6 +175,7 @@ static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp) - - return 0; - } -+#endif - - static unsigned int - get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame, -@@ -247,7 +251,12 @@ fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const - struct v4l2_hevc_dpb_entry * const entry = entries + n++; - - entry->timestamp = frame_capture_dpb(frame->frame); -+#if HEVC_CTRLS_VERSION <= 2 - entry->rps = find_frame_rps_type(h, entry->timestamp); -+#else -+ entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 : -+ V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE; -+#endif - entry->field_pic = frame->frame->interlaced_frame; - - /* TODO: Interleaved: Get the POC for each field. */ -@@ -1011,6 +1020,14 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - }; - const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc); - -+#if HEVC_CTRLS_VERSION == 2 -+ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0)) -+ return AVERROR(EINVAL); -+#elif HEVC_CTRLS_VERSION == 3 -+ if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0)) -+ return AVERROR(EINVAL); -+#endif -+ - if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) { - av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION); - return AVERROR(EINVAL); -diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c -index eb00ecb40623..980b306b8a72 100644 ---- a/libavcodec/v4l2_req_media.c -+++ b/libavcodec/v4l2_req_media.c -@@ -604,6 +604,7 @@ struct mediabufs_ctl { - - struct v4l2_format src_fmt; - struct v4l2_format dst_fmt; -+ struct v4l2_capability capability; - }; - - static int qe_v4l2_queue(struct qent_base *const be, -@@ -1498,20 +1499,24 @@ void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc) - mediabufs_ctl_delete(mbc); - } - -+unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc) -+{ -+ return mbc->capability.version; -+} -+ - static int set_capabilities(struct mediabufs_ctl *const mbc) - { -- struct v4l2_capability capability = { 0 }; - uint32_t caps; - -- if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &capability)) { -+ if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) { - int err = errno; - request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err)); - return -err; - } - -- caps = (capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ? -- capability.device_caps : -- capability.capabilities; -+ caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ? -+ mbc->capability.device_caps : -+ mbc->capability.capabilities; - - if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) { - mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; -diff --git a/libavcodec/v4l2_req_media.h b/libavcodec/v4l2_req_media.h -index 2f826cfb14e7..0307a831defd 100644 ---- a/libavcodec/v4l2_req_media.h -+++ b/libavcodec/v4l2_req_media.h -@@ -142,6 +142,9 @@ MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw, - struct dmabufs_ctl * const dbsc, - unsigned int n); - -+#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c)) -+unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc); -+ - struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, - const char *vpath, struct pollqueue *const pq); - void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc); -diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -index 76ab0916cd6a..20e4e0ab1559 100644 ---- a/libavcodec/v4l2_request_hevc.c -+++ b/libavcodec/v4l2_request_hevc.c -@@ -210,7 +210,11 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - goto fail4; - } - -- if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) { -+ if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { -+ av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n"); -+ ctx->fns = &V2(ff_v4l2_req_hevc, 3); -+ } -+ else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) { - av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n"); - ctx->fns = &V2(ff_v4l2_req_hevc, 2); - } -diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h -index f14f594564d7..ed48d62e2d53 100644 ---- a/libavcodec/v4l2_request_hevc.h -+++ b/libavcodec/v4l2_request_hevc.h -@@ -98,5 +98,6 @@ typedef struct v4l2_req_decode_fns { - - extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); - extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); -+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3); - - #endif - -From a9773d356f79d719d96e2c59434647c0cb1295fd Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 3 May 2022 12:44:42 +0000 -Subject: [PATCH 046/186] Remove V4l2 frame size check for meson-vdec - ---- - libavcodec/v4l2_m2m.h | 3 ++- - libavcodec/v4l2_m2m_dec.c | 10 +++++++--- - 2 files changed, 9 insertions(+), 4 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 9a20447030e2..6bd5e8eda76a 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -116,7 +116,8 @@ typedef struct V4L2m2mContext { - /* Ext data sent */ - int extdata_sent; - --#define FF_V4L2_QUIRK_REINIT_ALWAYS 1 -+#define FF_V4L2_QUIRK_REINIT_ALWAYS 1 -+#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN 2 - /* Quirks */ - unsigned int quirks; - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 8dcadf461bb8..888ba67fea8c 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -604,6 +604,10 @@ check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) - av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc)); - return 0; - } -+ if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) { -+ av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc)); -+ return 0; -+ } - - for (i = 0;; ++i) { - struct v4l2_frmsizeenum fs = { -@@ -623,8 +627,8 @@ check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) - av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err)); - return err; - } -- av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in frame size enums\n", -- w, h, av_fourcc2str(fcc)); -+ av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n", -+ w, h, av_fourcc2str(fcc), i); - return err; - } - -@@ -684,7 +688,7 @@ get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s) - // capture to clear the event even if the capture buffers were the right - // size in the first place. - if (strcmp(cap.driver, "meson-vdec") == 0) -- s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS; -+ s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN; - - av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks); - return 0; - -From 2e80c1992c272b3b23d47e7afae817f45b0b2a88 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 23 May 2022 18:05:20 +0100 -Subject: [PATCH 047/186] v4l2m2m_dec: Make some error rturns a bit more robust - ---- - libavcodec/v4l2_context.c | 5 ++--- - libavcodec/v4l2_m2m_dec.c | 23 ++++++++++++++--------- - 2 files changed, 16 insertions(+), 12 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 007a58c8f1db..b3662aedaa9f 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -765,7 +765,7 @@ static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx) - int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) - { - int type = ctx->type; -- int ret; -+ int ret = 0; - AVCodecContext * const avctx = logger(ctx); - - // Avoid doing anything if there is nothing we can do -@@ -777,8 +777,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) - if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type)) - stuff_all_buffers(avctx, ctx); - -- ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type); -- if (ret < 0) { -+ if (ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type) < 0) { - const int err = errno; - av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name, - cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err); -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 888ba67fea8c..88a341aae2c2 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -110,16 +110,21 @@ static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *co - return 0; - - ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON); -- if (ret < 0) -- av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context\n"); -- -- ret = ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd); -- if (ret < 0) -- av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno); -- else -- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_DECODER_CMD start OK\n"); -+ if (ret != 0) { -+ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret)); -+ return ret; -+ } - -- return ret; -+ // STREAMON should do implicit START so this just for those that don't. -+ // It is optional so don't worry if it fails -+ if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) { -+ ret = AVERROR(errno); -+ av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret)); -+ } -+ else { -+ av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n"); -+ } -+ return 0; - } - - static int v4l2_try_start(AVCodecContext *avctx) - -From 1c9856de210cdf151f53ce249cb4781722adfb3d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 24 May 2022 17:02:58 +0000 -Subject: [PATCH 048/186] v4l2m2m_dec: Support in-pkt AV_PKT_DATA_NEW_EXTRADATA - -Support packet side-data containing AV_PKT_DATA_NEW_EXTRADATA. Should -also detect and complain about unexpected streams of empty packets. - -This functionality untested as I haven't yet found anything that creates -NEW_EXTRADATA side data. ---- - libavcodec/v4l2_m2m.c | 1 + - libavcodec/v4l2_m2m.h | 3 +++ - libavcodec/v4l2_m2m_dec.c | 49 ++++++++++++++++++++++++++++++++++++--- - 3 files changed, 50 insertions(+), 3 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index e26bd74c3e9a..6dd01e2e0085 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -251,6 +251,7 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context) - av_frame_unref(s->frame); - av_frame_free(&s->frame); - av_packet_unref(&s->buf_pkt); -+ av_freep(&s->extdata_data); - - av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n"); - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 6bd5e8eda76a..19d618698dd7 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -115,6 +115,9 @@ typedef struct V4L2m2mContext { - - /* Ext data sent */ - int extdata_sent; -+ /* Ext data sent in packet - overrides ctx */ -+ uint8_t * extdata_data; -+ size_t extdata_size; - - #define FF_V4L2_QUIRK_REINIT_ALWAYS 1 - #define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN 2 -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 88a341aae2c2..392a68f0c7d2 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -343,7 +343,46 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - // We will already have a coded pkt if the output Q was full last time we - // tried to Q it - if (!s->buf_pkt.size && !do_not_get) { -- ret = ff_decode_get_packet(avctx, &s->buf_pkt); -+ unsigned int i; -+ -+ for (i = 0; i < 256; ++i) { -+ uint8_t * side_data; -+ size_t side_size; -+ -+ ret = ff_decode_get_packet(avctx, &s->buf_pkt); -+ if (ret != 0) -+ break; -+ -+ // New extradata is the only side-data we undertand -+ side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); -+ if (side_data) { -+ av_log(avctx, AV_LOG_DEBUG, "New extradata\n"); -+ av_freep(&s->extdata_data); -+ if ((s->extdata_data = av_malloc(side_size ? side_size : 1)) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to alloc %zd bytes of extra data\n", side_size); -+ return AVERROR(ENOMEM); -+ } -+ memcpy(s->extdata_data, side_data, side_size); -+ s->extdata_size = side_size; -+ s->extdata_sent = 0; -+ } -+ -+ if (s->buf_pkt.size != 0) -+ break; -+ -+ if (s->buf_pkt.side_data_elems == 0) { -+ av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n"); -+ ret = AVERROR_EOF; -+ break; -+ } -+ -+ // Retry a side-data only pkt -+ } -+ // If i >= 256 something has gone wrong -+ if (i >= 256) { -+ av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n"); -+ return AVERROR(EIO); -+ } - - if (ret == AVERROR(EAGAIN)) { - if (!stream_started(s)) { -@@ -398,8 +437,12 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - if ((ret = check_output_streamon(avctx, s)) != 0) - return ret; - -- ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, -- avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size); -+ if (s->extdata_sent) -+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); -+ else if (s->extdata_data) -+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); -+ else -+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, avctx->extradata, avctx->extradata_size); - - if (ret == AVERROR(EAGAIN)) { - // Out of input buffers - keep packet - -From f128df5af073fb4cece2393c1fd0a19defb5675e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 24 May 2022 20:02:48 +0000 -Subject: [PATCH 049/186] v4l2m2m_dec: Catch repeated Q fulls - ---- - libavcodec/v4l2_m2m_dec.c | 8 +++++++- - 1 file changed, 7 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 392a68f0c7d2..7e170447064a 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -504,13 +504,14 @@ static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx) - static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - { - V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; -- int src_rv; -+ int src_rv = NQ_OK; - int dst_rv = 1; // Non-zero (done), non-negative (error) number - unsigned int i = 0; - - do { - const int pending = xlat_pending(&s->xlat); - const int prefer_dq = (pending > s->pending_hw / 16); -+ const int last_src_rv = src_rv; - - // Enqueue another pkt for decode if - // (a) We don't have a lot of stuff in the buffer already OR -@@ -526,6 +527,11 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY) - break; - -+ if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) { -+ av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n"); -+ break; -+ } -+ - // Try to get a new frame if - // (a) we haven't already got one AND - // (b) enqueue returned a status indicating that decode should be attempted - -From 3997265fa559c38feaf4458625e3ec0e353e505b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 25 May 2022 15:22:12 +0000 -Subject: [PATCH 050/186] Remove requirement for epoxy & libudev config options - ---- - configure | 26 +++++++++++++++++--------- - pi-util/conf_native.sh | 2 -- - 2 files changed, 17 insertions(+), 11 deletions(-) - -diff --git a/configure b/configure -index a4ffd8797690..f3991452e4a5 100755 ---- a/configure -+++ b/configure -@@ -205,6 +205,7 @@ External library support: - --disable-bzlib disable bzlib [autodetect] - --disable-coreimage disable Apple CoreImage framework [autodetect] - --enable-chromaprint enable audio fingerprinting with chromaprint [no] -+ --disable-epoxy disable epoxy [autodetect] - --enable-frei0r enable frei0r video filtering [no] - --enable-gcrypt enable gcrypt, needed for rtmp(t)e support - if openssl, librtmp or gmp is not used [no] -@@ -281,7 +282,7 @@ External library support: - if openssl, gnutls or mbedtls is not used [no] - --enable-libtwolame enable MP2 encoding via libtwolame [no] - --enable-libuavs3d enable AVS3 decoding via libuavs3d [no] -- --enable-libudev enable libudev [no] -+ --disable-libudev disable libudev [autodetect] - --enable-libv4l2 enable libv4l2/v4l-utils [no] - --enable-libvidstab enable video stabilization using vid.stab [no] - --enable-libvmaf enable vmaf filter via libvmaf [no] -@@ -1747,7 +1748,9 @@ EXTERNAL_AUTODETECT_LIBRARY_LIST=" - avfoundation - bzlib - coreimage -+ epoxy - iconv -+ libudev - libxcb - libxcb_shm - libxcb_shape -@@ -1819,7 +1822,6 @@ EXTERNAL_LIBRARY_LIST=" - libdav1d - libdc1394 - libdrm -- epoxy - libflite - libfontconfig - libfreetype -@@ -1863,7 +1865,6 @@ EXTERNAL_LIBRARY_LIST=" - libtheora - libtwolame - libuavs3d -- libudev - libv4l2 - libvmaf - libvorbis -@@ -3567,9 +3568,8 @@ v4l2_indev_suggest="libv4l2" - v4l2_outdev_deps="libdrm" - v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h" - v4l2_outdev_suggest="libv4l2" --vout_drm_outdev_deps="libdrm vout_drm" --vout_egl_outdev_deps="xlib" --vout_egl_outdev_select="epoxy" -+vout_drm_outdev_deps="libdrm" -+vout_egl_outdev_deps="xlib epoxy" - vfwcap_indev_deps="vfw32 vfwcap_defines" - xcbgrab_indev_deps="libxcb" - xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes" -@@ -6355,6 +6355,12 @@ if enabled xlib; then - disable xlib - fi - -+enabled libudev && -+ check_pkg_config libudev libudev libudev.h udev_new -+ -+enabled epoxy && -+ check_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version -+ - check_headers direct.h - check_headers dirent.h - check_headers dxgidebug.h -@@ -6601,7 +6607,6 @@ enabled libdav1d && require_pkg_config libdav1d "dav1d >= 0.5.0" "dav1d - enabled libdavs2 && require_pkg_config libdavs2 "davs2 >= 1.6.0" davs2.h davs2_decoder_open - enabled libdc1394 && require_pkg_config libdc1394 libdc1394-2 dc1394/dc1394.h dc1394_new - enabled libdrm && require_pkg_config libdrm libdrm xf86drm.h drmGetVersion --enabled epoxy && require_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version - enabled libfdk_aac && { check_pkg_config libfdk_aac fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen || - { require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac && - warn "using libfdk without pkg-config"; } } -@@ -6713,7 +6718,6 @@ enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame - { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame || - die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } - enabled libuavs3d && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uavs3d.h uavs3d_decode --enabled libudev && require_pkg_config libudev libudev libudev.h udev_new - enabled libv4l2 && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl - enabled libvidstab && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit - enabled libvmaf && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init -@@ -6819,9 +6823,13 @@ enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/r - enabled v4l2_request && { enabled libdrm || - die "ERROR: v4l2-request requires --enable-libdrm"; } && - { enabled libudev || -- die "ERROR: v4l2-request requires --enable-libudev"; } -+ die "ERROR: v4l2-request requires libudev"; } - enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init - -+enabled vout_drm && { enabled libdrm || die "ERROR: vout_drm requires --enable-libdrm"; } -+ -+enabled vout_egl && { enabled epoxy || die "ERROR: vout_egl requires epoxy"; } && -+ { enabled xlib || die "ERROR: vout_egl requires xlib"; } - - if enabled gcrypt; then - GCRYPT_CONFIG="${cross_prefix}libgcrypt-config" -diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh -index 65576846e81f..37cea71756ae 100755 ---- a/pi-util/conf_native.sh -+++ b/pi-util/conf_native.sh -@@ -91,8 +91,6 @@ $FFSRC/configure \ - --disable-thumb\ - --enable-v4l2-request\ - --enable-libdrm\ -- --enable-epoxy\ -- --enable-libudev\ - --enable-vout-egl\ - --enable-vout-drm\ - $SHARED_LIBS\ - -From bcc9de26a54ab85a5f225706f6de36c885d7cb4c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 27 May 2022 09:36:51 +0000 -Subject: [PATCH 051/186] hevc: If hwaccel avoid creation of s/w only vars - ---- - libavcodec/hevc_refs.c | 35 +++++++++++++++++++++-------------- - libavcodec/hevcdec.c | 42 +++++++++++++++++++++++++++++------------- - 2 files changed, 50 insertions(+), 27 deletions(-) - -diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c -index 811e8feff8a1..f7cf14eabccc 100644 ---- a/libavcodec/hevc_refs.c -+++ b/libavcodec/hevc_refs.c -@@ -98,18 +98,22 @@ static HEVCFrame *alloc_frame(HEVCContext *s) - if (!frame->rpl_buf) - goto fail; - -- frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); -- if (!frame->tab_mvf_buf) -- goto fail; -- frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; -+ if (s->tab_mvf_pool) { -+ frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); -+ if (!frame->tab_mvf_buf) -+ goto fail; -+ frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; -+ } - -- frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); -- if (!frame->rpl_tab_buf) -- goto fail; -- frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; -- frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; -- for (j = 0; j < frame->ctb_count; j++) -- frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; -+ if (s->rpl_tab_pool) { -+ frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); -+ if (!frame->rpl_tab_buf) -+ goto fail; -+ frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; -+ frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; -+ for (j = 0; j < frame->ctb_count; j++) -+ frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; -+ } - - frame->frame->top_field_first = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD; - frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD); -@@ -297,14 +301,17 @@ static int init_slice_rpl(HEVCContext *s) - int ctb_count = frame->ctb_count; - int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr]; - int i; -+ RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; - - if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab)) - return AVERROR_INVALIDDATA; - -- for (i = ctb_addr_ts; i < ctb_count; i++) -- frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; -+ if (frame->rpl_tab) { -+ for (i = ctb_addr_ts; i < ctb_count; i++) -+ frame->rpl_tab[i] = tab; -+ } - -- frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts]; -+ frame->refPicList = tab->refPicList; - - return 0; - } -diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c -index e892436f9405..a2c29a611c5a 100644 ---- a/libavcodec/hevcdec.c -+++ b/libavcodec/hevcdec.c -@@ -536,6 +536,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps, - if (!sps) - return 0; - -+ // If hwaccel then we don't need all the s/w decode helper arrays -+ if (s->avctx->hwaccel) { -+ export_stream_params(s, sps); -+ -+ s->avctx->pix_fmt = pix_fmt; -+ s->ps.sps = sps; -+ s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data; -+ return 0; -+ } -+ - ret = pic_arrays_init(s, sps); - if (ret < 0) - goto fail; -@@ -2893,11 +2903,13 @@ static int hevc_frame_start(HEVCContext *s) - ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1); - int ret; - -- memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); -- memset(s->vertical_bs, 0, s->bs_width * s->bs_height); -- memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); -- memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); -- memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); -+ if (s->horizontal_bs) { -+ memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); -+ memset(s->vertical_bs, 0, s->bs_width * s->bs_height); -+ memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); -+ memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); -+ memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); -+ } - - s->is_decoded = 0; - s->first_nal_type = s->nal_unit_type; -@@ -3441,15 +3453,19 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src) - dst->needs_fg = 1; - } - -- dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); -- if (!dst->tab_mvf_buf) -- goto fail; -- dst->tab_mvf = src->tab_mvf; -+ if (src->tab_mvf_buf) { -+ dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); -+ if (!dst->tab_mvf_buf) -+ goto fail; -+ dst->tab_mvf = src->tab_mvf; -+ } - -- dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); -- if (!dst->rpl_tab_buf) -- goto fail; -- dst->rpl_tab = src->rpl_tab; -+ if (src->rpl_tab_buf) { -+ dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); -+ if (!dst->rpl_tab_buf) -+ goto fail; -+ dst->rpl_tab = src->rpl_tab; -+ } - - dst->rpl_buf = av_buffer_ref(src->rpl_buf); - if (!dst->rpl_buf) - -From af130585ebdfcda7ee01819b6869aa6eb6a0172d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 30 May 2022 17:51:44 +0100 -Subject: [PATCH 052/186] rpi_sand: Add SAND30->NV12 conversion - -C code only. Reworks the hwcontext_drm conversion to use the -rpi_sand_fns generic frame convert fn rather than calling the -individual conversion functions directly. This keeps all teh stride and -size logic in a single place. ---- - libavutil/hwcontext_drm.c | 46 ++++++++------------ - libavutil/rpi_sand_fns.c | 89 +++++++++++++++++++++++++++++++++++++++ - libavutil/rpi_sand_fns.h | 5 +++ - 3 files changed, 111 insertions(+), 29 deletions(-) - -diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c -index baf18920fa14..137a952d2c3c 100644 ---- a/libavutil/hwcontext_drm.c -+++ b/libavutil/hwcontext_drm.c -@@ -234,14 +234,14 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, - enum AVHWFrameTransferDirection dir, - enum AVPixelFormat **formats) - { -- enum AVPixelFormat *pix_fmts; -+ enum AVPixelFormat *p; - -- pix_fmts = av_malloc_array(2, sizeof(*pix_fmts)); -- if (!pix_fmts) -+ p = *formats = av_malloc_array(3, sizeof(*p)); -+ if (!p) - return AVERROR(ENOMEM); - - // **** Offer native sand too ???? -- pix_fmts[0] = -+ *p++ = - #if CONFIG_SAND - ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ? - AV_PIX_FMT_YUV420P : -@@ -249,9 +249,14 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, - AV_PIX_FMT_YUV420P10LE : - #endif - ctx->sw_format; -- pix_fmts[1] = AV_PIX_FMT_NONE; - -- *formats = pix_fmts; -+#if CONFIG_SAND -+ if (ctx->sw_format == AV_PIX_FMT_RPI4_10 || -+ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128) -+ *p++ = AV_PIX_FMT_NV12; -+#endif -+ -+ *p = AV_PIX_FMT_NONE; - return 0; - } - -@@ -294,29 +299,12 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, - const unsigned int w = FFMIN(dst->width, map->width); - const unsigned int h = FFMIN(dst->height, map->height); - -- if (map->format == AV_PIX_FMT_RPI4_8 && dst->format == AV_PIX_FMT_YUV420P) { -- av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], -- map->data[0], -- 128, stride2, -- 0, 0, w, h); -- av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], -- dst->data[2], dst->linesize[2], -- map->data[1], -- 128, stride2, -- 0, 0, w / 2, h / 2); -- } -- else if (map->format == AV_PIX_FMT_RPI4_10 && dst->format == AV_PIX_FMT_YUV420P10LE) { -- av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], -- map->data[0], -- 128, stride2, -- 0, 0, w, h); -- av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], -- dst->data[2], dst->linesize[2], -- map->data[1], -- 128, stride2, -- 0, 0, w / 2, h / 2); -- } -- else -+ map->crop_top = 0; -+ map->crop_bottom = 0; -+ map->crop_left = 0; -+ map->crop_right = 0; -+ -+ if (av_rpi_sand_to_planar_frame(dst, map) != 0) - { - av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__); - err = AVERROR(EINVAL); -diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c -index 1f543e935701..256c3d532f38 100644 ---- a/libavutil/rpi_sand_fns.c -+++ b/libavutil/rpi_sand_fns.c -@@ -229,6 +229,75 @@ void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_ - } - } - -+// Fetches a single patch - offscreen fixup not done here -+// w <= stride1 -+// single lose bottom 2 bits truncation -+// _x & _w in pixels, strides in bytes -+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h) -+{ -+ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word -+ const unsigned int xskip0 = _x - (x0 >> 2) * 3; -+ const unsigned int x1 = ((_x + _w) / 3) * 4; -+ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; -+ const unsigned int mask = stride1 - 1; -+ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; -+ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words -+ -+#if HAVE_SAND_ASM && 0 -+ if (_x == 0) { -+ ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); -+ return; -+ } -+#endif -+ -+ if (x0 == x1) { -+ // ******************* -+ // Partial single word xfer -+ return; -+ } -+ -+ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) -+ { -+ unsigned int x = x0; -+ const uint32_t * p = (const uint32_t *)p0; -+ uint8_t * d = dst; -+ -+ if (xskip0 != 0) { -+ const uint32_t p3 = *p++; -+ -+ if (xskip0 == 1) -+ *d++ = (p3 >> 12) & 0xff; -+ *d++ = (p3 >> 22) & 0xff; -+ -+ if (((x += 4) & mask) == 0) -+ p += slice_inc; -+ } -+ -+ while (x != x1) { -+ const uint32_t p3 = *p++; -+ *d++ = (p3 >> 2) & 0xff; -+ *d++ = (p3 >> 12) & 0xff; -+ *d++ = (p3 >> 22) & 0xff; -+ -+ if (((x += 4) & mask) == 0) -+ p += slice_inc; -+ } -+ -+ if (xrem1 != 0) { -+ const uint32_t p3 = *p; -+ -+ *d++ = (p3 >> 2) & 0xff; -+ if (xrem1 == 2) -+ *d++ = (p3 >> 12) & 0xff; -+ } -+ } -+} -+ -+ - - // w/h in pixels - void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, -@@ -310,6 +379,16 @@ int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src) - av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), - x/2, y/2, w/2, h/2); - break; -+ case AV_PIX_FMT_NV12: -+ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], -+ src->data[0], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x, y, w, h); -+ av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1], -+ src->data[1], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x/2, y/2, w, h/2); -+ break; - default: - return -1; - } -@@ -344,6 +423,16 @@ int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src) - av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), - x/2, y/2, w/2, h/2); - break; -+ case AV_PIX_FMT_NV12: -+ av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0], -+ src->data[0], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x, y, w, h); -+ av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1], -+ src->data[1], -+ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), -+ x/2, y/2, w, h/2); -+ break; - default: - return -1; - } -diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h -index 634b55e800dc..462ccb8abd18 100644 ---- a/libavutil/rpi_sand_fns.h -+++ b/libavutil/rpi_sand_fns.h -@@ -85,6 +85,11 @@ void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_ - unsigned int _x, unsigned int y, - unsigned int _w, unsigned int h); - -+void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, -+ const uint8_t * src, -+ unsigned int stride1, unsigned int stride2, -+ unsigned int _x, unsigned int y, -+ unsigned int _w, unsigned int h); - - // w/h in pixels - void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, - -From 474c3010278bb385614f536968681bd5043e81ae Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 1 Jun 2022 17:49:26 +0000 -Subject: [PATCH 053/186] rpi_sand: Add SAND30->NV12 asm for Armv7 & Armv8 - -Also reworks the previous Armv8 SAND30->Y16 function in a slightly more -efficient way that makes it look more like the Armv7 version. ---- - libavutil/aarch64/rpi_sand_neon.S | 549 ++++++++++++++++++------------ - libavutil/aarch64/rpi_sand_neon.h | 4 + - libavutil/arm/rpi_sand_neon.S | 239 ++++++++++--- - libavutil/arm/rpi_sand_neon.h | 11 + - libavutil/rpi_sand_fns.c | 2 +- - 5 files changed, 541 insertions(+), 264 deletions(-) - -diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S -index cdcf71ee6740..2f07d9674c9f 100644 ---- a/libavutil/aarch64/rpi_sand_neon.S -+++ b/libavutil/aarch64/rpi_sand_neon.S -@@ -248,228 +248,6 @@ incomplete_block_loop_end_c8: - ret - endfunc - --//void ff_rpi_sand30_lines_to_planar_y16( --// uint8_t * dest, // [x0] --// unsigned int dst_stride, // [w1] -> assumed to be equal to _w --// const uint8_t * src, // [x2] --// unsigned int src_stride1, // [w3] -> 128 --// unsigned int src_stride2, // [w4] --// unsigned int _x, // [w5] --// unsigned int y, // [w6] --// unsigned int _w, // [w7] --// unsigned int h); // [sp, #0] -- --function ff_rpi_sand30_lines_to_planar_y16, export=1 -- stp x19, x20, [sp, #-48]! -- stp x21, x22, [sp, #16] -- stp x23, x24, [sp, #32] -- -- // w6 = argument h -- ldr w6, [sp, #48] -- -- // slice_inc = ((stride2 - 1) * stride1) -- mov w5, w4 -- sub w5, w5, #1 -- lsl w5, w5, #7 -- -- // total number of bytes per row = (width / 3) * 4 -- mov w8, w7 -- mov w9, #3 -- udiv w8, w8, w9 -- lsl w8, w8, #2 -- -- // number of full 128 byte blocks to be processed -- mov w9, #96 -- udiv w9, w7, w9 // = (width * 4) / (3*128) = width/96 -- -- // w10 = number of full integers to process (4 bytes) -- // w11 = remaning zero to two 10bit values still to copy over -- mov w12, #96 -- mul w12, w9, w12 -- sub w12, w7, w12 // width - blocks*96 = remaining points per row -- mov w11, #3 -- udiv w10, w12, w11 // full integers to process = w12 / 3 -- mul w11, w10, w11 // #integers *3 -- sub w11, w12, w11 // remaining 0-2 points = remaining points - integers*3 -- -- // increase w9 by one if w10+w11 is not zero, and decrease the row count by one -- // this is to efficiently copy incomplete blocks at the end of the rows -- // the last row is handled explicitly to avoid writing out of bounds -- add w22, w10, w11 -- cmp w22, #0 -- cset w22, ne // 1 iff w10+w11 not zero, 0 otherwise -- add w9, w9, w22 -- sub w6, w6, #1 -- -- // store the number of bytes in w20 which we copy too much for every row -- // when the width of the frame is not a multiple of 96 (128bytes storing 96 10bit values) -- mov w20, #96*2 -- mul w20, w20, w9 -- sub w20, w1, w20 -- -- mov w23, #0 // flag to check whether the last line had already been processed -- -- // bitmask to clear the uppper 6bits of the result values -- mov x19, #0x03ff03ff03ff03ff -- dup v22.2d, x19 -- -- // row counter = 0 -- eor w12, w12, w12 --row_loop_y16: -- cmp w12, w6 // jump to row_loop_y16_fin if we processed all rows -- bge row_loop_y16_fin -- -- mov x13, x2 // row src -- eor w14, w14, w14 // full block counter --block_loop_y16: -- cmp w14, w9 -- bge block_loop_y16_fin -- -- // load 64 bytes -- ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x13], #64 -- -- // process v0 and v1 -- xtn v16.4h, v0.4s -- ushr v0.4s, v0.4s, #10 -- xtn v17.4h, v0.4s -- ushr v0.4s, v0.4s, #10 -- xtn v18.4h, v0.4s -- -- xtn2 v16.8h, v1.4s -- and v16.16b, v16.16b, v22.16b -- ushr v1.4s, v1.4s, #10 -- xtn2 v17.8h, v1.4s -- and v17.16b, v17.16b, v22.16b -- ushr v1.4s, v1.4s, #10 -- xtn2 v18.8h, v1.4s -- and v18.16b, v18.16b, v22.16b -- -- st3 { v16.8h, v17.8h, v18.8h }, [x0], #48 -- -- // process v2 and v3 -- xtn v23.4h, v2.4s -- ushr v2.4s, v2.4s, #10 -- xtn v24.4h, v2.4s -- ushr v2.4s, v2.4s, #10 -- xtn v25.4h, v2.4s -- -- xtn2 v23.8h, v3.4s -- and v23.16b, v23.16b, v22.16b -- ushr v3.4s, v3.4s, #10 -- xtn2 v24.8h, v3.4s -- and v24.16b, v24.16b, v22.16b -- ushr v3.4s, v3.4s, #10 -- xtn2 v25.8h, v3.4s -- and v25.16b, v25.16b, v22.16b -- -- st3 { v23.8h, v24.8h, v25.8h }, [x0], #48 -- -- // load the second half of the block -> 64 bytes into registers v4-v7 -- ld1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x13], #64 -- -- // process v4 and v5 -- xtn v16.4h, v4.4s -- ushr v4.4s, v4.4s, #10 -- xtn v17.4h, v4.4s -- ushr v4.4s, v4.4s, #10 -- xtn v18.4h, v4.4s -- -- xtn2 v16.8h, v5.4s -- and v16.16b, v16.16b, v22.16b -- ushr v5.4s, v5.4s, #10 -- xtn2 v17.8h, v5.4s -- and v17.16b, v17.16b, v22.16b -- ushr v5.4s, v5.4s, #10 -- xtn2 v18.8h, v5.4s -- and v18.16b, v18.16b, v22.16b -- -- st3 { v16.8h, v17.8h, v18.8h }, [x0], #48 -- -- // v6 and v7 -- xtn v23.4h, v6.4s -- ushr v6.4s, v6.4s, #10 -- xtn v24.4h, v6.4s -- ushr v6.4s, v6.4s, #10 -- xtn v25.4h, v6.4s -- -- xtn2 v23.8h, v7.4s -- and v23.16b, v23.16b, v22.16b -- ushr v7.4s, v7.4s, #10 -- xtn2 v24.8h, v7.4s -- and v24.16b, v24.16b, v22.16b -- ushr v7.4s, v7.4s, #10 -- xtn2 v25.8h, v7.4s -- and v25.16b, v25.16b, v22.16b -- -- st3 { v23.8h, v24.8h, v25.8h }, [x0], #48 -- -- add x13, x13, x5 // row src += slice_inc -- add w14, w14, #1 -- b block_loop_y16 --block_loop_y16_fin: -- -- -- -- -- add x2, x2, #128 // src += stride1 (start of the next row) -- add x0, x0, w20, sxtw // subtract the bytes we copied too much from dst -- add w12, w12, #1 -- b row_loop_y16 --row_loop_y16_fin: -- -- // check whether we have incomplete blocks at the end of every row -- // in that case decrease row block count by one -- // change height back to it's original value (meaning increase it by 1) -- // and jump back to another iteration of row_loop_y16 -- -- cmp w23, #1 -- beq row_loop_y16_fin2 // don't continue here if we already processed the last row -- add w6, w6, #1 // increase height to the original value -- sub w9, w9, w22 // block count - 1 or 0, depending on the remaining bytes count -- mov w23, #1 -- b row_loop_y16 --row_loop_y16_fin2: -- -- sub x0, x0, w20, sxtw // with the last row we didn't actually move the dst ptr to far ahead, therefore readd the diference -- -- // now we've got to handle the last block in the last row -- eor w12, w12, w12 // w12 = 0 = counter --integer_loop_y16: -- cmp w12, w10 -- bge integer_loop_y16_fin -- ldr w14, [x13], #4 -- and w15, w14, #0x3ff -- strh w15, [x0], #2 -- lsr w14, w14, #10 -- and w15, w14, #0x3ff -- strh w15, [x0], #2 -- lsr w14, w14, #10 -- and w15, w14, #0x3ff -- strh w15, [x0], #2 -- add w12, w12, #1 -- b integer_loop_y16 --integer_loop_y16_fin: -- --final_values_y16: -- // remaining point count = w11 -- ldr w14, [x13], #4 -- cmp w11, #0 -- beq final_values_y16_fin -- and w15, w14, #0x3ff -- strh w15, [x0], #2 -- cmp w11, #1 -- beq final_values_y16_fin -- lsr w14, w14, #10 -- and w15, w14, #0x3ff -- strh w15, [x0], #2 --final_values_y16_fin: -- -- ldp x23, x24, [sp, #32] -- ldp x21, x22, [sp, #16] -- ldp x19, x20, [sp], #48 -- ret --endfunc -- - //void ff_rpi_sand30_lines_to_planar_c16( - // uint8_t * dst_u, // [x0] - // unsigned int dst_stride_u, // [w1] == _w*2 -@@ -674,3 +452,330 @@ endfunc - // unsigned int _w, - // unsigned int h); - -+// void ff_rpi_sand30_lines_to_planar_y8( -+// uint8_t * dest, : x0 -+// unsigned int dst_stride, : w1 -+// const uint8_t * src, : x2 -+// unsigned int src_stride1, : w3, always 128 -+// unsigned int src_stride2, : w4 -+// unsigned int _x, : w5 -+// unsigned int y, : w6 -+// unsigned int _w, : w7 -+// unsigned int h); : [sp, #0] -+// -+// Assumes that we are starting on a stripe boundary and that overreading -+// within the stripe is OK. However it does respect the dest size for wri -+ -+function ff_rpi_sand30_lines_to_planar_y16, export=1 -+ lsl w4, w4, #7 -+ sub w4, w4, #64 -+ sub w1, w1, w7, lsl #1 -+ uxtw x6, w6 -+ add x8, x2, x6, lsl #7 -+ ldr w6, [sp, #0] -+ -+10: -+ mov x2, x8 -+ mov w5, w7 -+1: -+ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 -+ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 -+ -+ subs w5, w5, #96 -+ -+ // v0, v1 -+ -+ shrn v18.4h, v0.4s, #14 -+ xtn v16.4h, v0.4s -+ shrn v17.4h, v0.4s, #10 -+ -+ shrn2 v18.8h, v1.4s, #14 -+ xtn2 v16.8h, v1.4s -+ shrn2 v17.8h, v1.4s, #10 -+ -+ ushr v18.8h, v18.8h, #6 -+ bic v16.8h, #0xfc, lsl #8 -+ bic v17.8h, #0xfc, lsl #8 -+ -+ // v2, v3 -+ -+ shrn v21.4h, v2.4s, #14 -+ xtn v19.4h, v2.4s -+ shrn v20.4h, v2.4s, #10 -+ -+ shrn2 v21.8h, v3.4s, #14 -+ xtn2 v19.8h, v3.4s -+ shrn2 v20.8h, v3.4s, #10 -+ -+ ushr v21.8h, v21.8h, #6 -+ bic v19.8h, #0xfc, lsl #8 -+ bic v20.8h, #0xfc, lsl #8 -+ -+ // v4, v5 -+ -+ shrn v24.4h, v4.4s, #14 -+ xtn v22.4h, v4.4s -+ shrn v23.4h, v4.4s, #10 -+ -+ shrn2 v24.8h, v5.4s, #14 -+ xtn2 v22.8h, v5.4s -+ shrn2 v23.8h, v5.4s, #10 -+ -+ ushr v24.8h, v24.8h, #6 -+ bic v22.8h, #0xfc, lsl #8 -+ bic v23.8h, #0xfc, lsl #8 -+ -+ // v6, v7 -+ -+ shrn v27.4h, v6.4s, #14 -+ xtn v25.4h, v6.4s -+ shrn v26.4h, v6.4s, #10 -+ -+ shrn2 v27.8h, v7.4s, #14 -+ xtn2 v25.8h, v7.4s -+ shrn2 v26.8h, v7.4s, #10 -+ -+ ushr v27.8h, v27.8h, #6 -+ bic v25.8h, #0xfc, lsl #8 -+ bic v26.8h, #0xfc, lsl #8 -+ -+ blt 2f -+ -+ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 -+ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 -+ st3 {v22.8h, v23.8h, v24.8h}, [x0], #48 -+ st3 {v25.8h, v26.8h, v27.8h}, [x0], #48 -+ -+ bne 1b -+ -+11: -+ subs w6, w6, #1 -+ add x0, x0, w1, uxtw -+ add x8, x8, #128 -+ bne 10b -+ -+ ret -+ -+// Partial final write -+2: -+ cmp w5, #48-96 -+ blt 1f -+ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 -+ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 -+ beq 11b -+ mov v16.16b, v22.16b -+ mov v17.16b, v23.16b -+ sub w5, w5, #48 -+ mov v18.16b, v24.16b -+ mov v19.16b, v25.16b -+ mov v20.16b, v26.16b -+ mov v21.16b, v27.16b -+1: -+ cmp w5, #24-96 -+ blt 1f -+ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 -+ beq 11b -+ mov v16.16b, v19.16b -+ mov v17.16b, v20.16b -+ sub w5, w5, #24 -+ mov v18.16b, v21.16b -+1: -+ cmp w5, #12-96 -+ blt 1f -+ st3 {v16.4h, v17.4h, v18.4h}, [x0], #24 -+ beq 11b -+ mov v16.2d[0], v16.2d[1] -+ sub w5, w5, #12 -+ mov v17.2d[0], v17.2d[1] -+ mov v18.2d[0], v18.2d[1] -+1: -+ cmp w5, #6-96 -+ blt 1f -+ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 -+ st3 {v16.h, v17.h, v18.h}[1], [x0], #6 -+ beq 11b -+ mov v16.2s[0], v16.2s[1] -+ sub w5, w5, #6 -+ mov v17.2s[0], v17.2s[1] -+ mov v18.2s[0], v18.2s[1] -+1: -+ cmp w5, #3-96 -+ blt 1f -+ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 -+ beq 11b -+ mov v16.4h[0], v16.4h[1] -+ sub w5, w5, #3 -+ mov v17.4h[0], v17.4h[1] -+1: -+ cmp w5, #2-96 -+ blt 1f -+ st2 {v16.h, v17.h}[0], [x0], #4 -+ b 11b -+1: -+ st1 {v16.h}[0], [x0], #2 -+ b 11b -+ -+endfunc -+ -+// void ff_rpi_sand30_lines_to_planar_y8( -+// uint8_t * dest, : x0 -+// unsigned int dst_stride, : w1 -+// const uint8_t * src, : x2 -+// unsigned int src_stride1, : w3, always 128 -+// unsigned int src_stride2, : w4 -+// unsigned int _x, : w5 -+// unsigned int y, : w6 -+// unsigned int _w, : w7 -+// unsigned int h); : [sp, #0] -+// -+// Assumes that we are starting on a stripe boundary and that overreading -+// within the stripe is OK. However it does respect the dest size for wri -+ -+function ff_rpi_sand30_lines_to_planar_y8, export=1 -+ lsl w4, w4, #7 -+ sub w4, w4, #64 -+ sub w1, w1, w7 -+ uxtw x6, w6 -+ add x8, x2, x6, lsl #7 -+ ldr w6, [sp, #0] -+ -+10: -+ mov x2, x8 -+ mov w5, w7 -+1: -+ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 -+ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 -+ -+ subs w5, w5, #96 -+ -+ // v0, v1 -+ -+ shrn v18.4h, v0.4s, #16 -+ xtn v16.4h, v0.4s -+ shrn v17.4h, v0.4s, #12 -+ -+ shrn2 v18.8h, v1.4s, #16 -+ xtn2 v16.8h, v1.4s -+ shrn2 v17.8h, v1.4s, #12 -+ -+ shrn v18.8b, v18.8h, #6 -+ shrn v16.8b, v16.8h, #2 -+ xtn v17.8b, v17.8h -+ -+ // v2, v3 -+ -+ shrn v21.4h, v2.4s, #16 -+ xtn v19.4h, v2.4s -+ shrn v20.4h, v2.4s, #12 -+ -+ shrn2 v21.8h, v3.4s, #16 -+ xtn2 v19.8h, v3.4s -+ shrn2 v20.8h, v3.4s, #12 -+ -+ shrn2 v18.16b, v21.8h, #6 -+ shrn2 v16.16b, v19.8h, #2 -+ xtn2 v17.16b, v20.8h -+ -+ // v4, v5 -+ -+ shrn v24.4h, v4.4s, #16 -+ xtn v22.4h, v4.4s -+ shrn v23.4h, v4.4s, #12 -+ -+ shrn2 v24.8h, v5.4s, #16 -+ xtn2 v22.8h, v5.4s -+ shrn2 v23.8h, v5.4s, #12 -+ -+ shrn v21.8b, v24.8h, #6 -+ shrn v19.8b, v22.8h, #2 -+ xtn v20.8b, v23.8h -+ -+ // v6, v7 -+ -+ shrn v27.4h, v6.4s, #16 -+ xtn v25.4h, v6.4s -+ shrn v26.4h, v6.4s, #12 -+ -+ shrn2 v27.8h, v7.4s, #16 -+ xtn2 v25.8h, v7.4s -+ shrn2 v26.8h, v7.4s, #12 -+ -+ shrn2 v21.16b, v27.8h, #6 -+ shrn2 v19.16b, v25.8h, #2 -+ xtn2 v20.16b, v26.8h -+ -+ blt 2f -+ -+ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 -+ st3 {v19.16b, v20.16b, v21.16b}, [x0], #48 -+ -+ bne 1b -+ -+11: -+ subs w6, w6, #1 -+ add x0, x0, w1, uxtw -+ add x8, x8, #128 -+ bne 10b -+ -+ ret -+ -+// Partial final write -+2: -+ cmp w5, #48-96 -+ blt 1f -+ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 -+ beq 11b -+ mov v16.16b, v22.16b -+ mov v17.16b, v23.16b -+ sub w5, w5, #48 -+ mov v18.16b, v24.16b -+1: -+ cmp w5, #24-96 -+ blt 1f -+ st3 {v16.8b, v17.8b, v18.8b}, [x0], #24 -+ beq 11b -+ mov v16.2d[0], v16.2d[1] -+ sub w5, w5, #24 -+ mov v17.2d[0], v17.2d[1] -+ mov v18.2d[0], v18.2d[1] -+1: -+ cmp w5, #12-96 -+ blt 1f -+ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 -+ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 -+ st3 {v16.b, v17.b, v18.b}[2], [x0], #3 -+ st3 {v16.b, v17.b, v18.b}[3], [x0], #3 -+ beq 11b -+ mov v16.2s[0], v16.2s[1] -+ sub w5, w5, #12 -+ mov v17.2s[0], v17.2s[1] -+ mov v18.2s[0], v18.2s[1] -+1: -+ cmp w5, #6-96 -+ blt 1f -+ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 -+ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 -+ beq 11b -+ mov v16.4h[0], v16.4h[1] -+ sub w5, w5, #6 -+ mov v17.4h[0], v17.4h[1] -+ mov v18.4h[0], v18.4h[1] -+1: -+ cmp w5, #3-96 -+ blt 1f -+ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 -+ beq 11b -+ mov v16.8b[0], v16.8b[1] -+ sub w5, w5, #3 -+ mov v17.8b[0], v17.8b[1] -+1: -+ cmp w5, #2-96 -+ blt 1f -+ st2 {v16.b, v17.b}[0], [x0], #2 -+ b 11b -+1: -+ st1 {v16.b}[0], [x0], #1 -+ b 11b -+ -+endfunc -+ -diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h -index b3aa481ea497..2a56135bc327 100644 ---- a/libavutil/aarch64/rpi_sand_neon.h -+++ b/libavutil/aarch64/rpi_sand_neon.h -@@ -49,6 +49,10 @@ void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_ - uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1, - unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); - -+void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, -+ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, -+ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); -+ - #ifdef __cplusplus - } - #endif -diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S -index 80890fe9854b..60e697f6819b 100644 ---- a/libavutil/arm/rpi_sand_neon.S -+++ b/libavutil/arm/rpi_sand_neon.S -@@ -360,7 +360,6 @@ function ff_rpi_sand30_lines_to_planar_y16, export=1 - ldr r6, [sp, #36] - ldr r7, [sp, #32] @ y - mov r12, #48 -- vmov.u16 q15, #0x3ff - sub r3, #1 - lsl r3, #7 - sub r1, r1, r6, lsl #1 -@@ -376,37 +375,33 @@ function ff_rpi_sand30_lines_to_planar_y16, export=1 - vldm r2!, {q10-q13} - add lr, #64 - -- vshr.u32 q14, q10, #20 @ Cannot vshrn.u32 #20! -+ vshrn.u32 d4 , q10, #14 @ Cannot vshrn.u32 #20! - ands lr, #127 - vshrn.u32 d2, q10, #10 - vmovn.u32 d0, q10 -- vmovn.u32 d4, q14 - -- vshr.u32 q14, q11, #20 -+ vshrn.u32 d5, q11, #14 - it eq - addeq r2, r3 - vshrn.u32 d3, q11, #10 - vmovn.u32 d1, q11 -- vmovn.u32 d5, q14 - - subs r5, #48 -- vand q0, q15 -- vand q1, q15 -- vand q2, q15 -+ vshr.u16 q2, #6 -+ vbic.u16 q0, #0xfc00 -+ vbic.u16 q1, #0xfc00 - -- vshr.u32 q14, q12, #20 -+ vshrn.u32 d20, q12, #14 - vshrn.u32 d18, q12, #10 - vmovn.u32 d16, q12 -- vmovn.u32 d20, q14 - -- vshr.u32 q14, q13, #20 -+ vshrn.u32 d21, q13, #14 - vshrn.u32 d19, q13, #10 - vmovn.u32 d17, q13 -- vmovn.u32 d21, q14 - -- vand q8, q15 -- vand q9, q15 -- vand q10, q15 -+ vshr.u16 q10, #6 -+ vbic.u16 q8, #0xfc00 -+ vbic.u16 q9 , #0xfc00 - blt 2f - - vst3.16 {d0, d2, d4}, [r0], r12 -@@ -499,7 +494,6 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1 - ldr r7, [sp, #48] - ldr r9, [sp, #52] - mov r12, #48 -- vmov.u16 q15, #0x3ff - sub r8, #1 - lsl r8, #7 - add r5, r5, r7, lsl #7 -@@ -515,48 +509,44 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1 - add lr, #64 - - @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2 -- vshr.u32 q14, q0, #20 -- vshrn.u32 d16, q0, #10 -+ vshrn.u32 d20, q0, #14 - vmovn.u32 d18, q0 -+ vshrn.u32 d0, q0, #10 - ands lr, #127 -- vmovn.u32 d20, q14 - -- vshr.u32 q14, q1, #20 -- vshrn.u32 d17, q1, #10 -+ vshrn.u32 d21, q1, #14 - vmovn.u32 d19, q1 -- vmovn.u32 d21, q14 -+ vshrn.u32 d1, q1, #10 - -- vshr.u32 q14, q2, #20 - vshrn.u32 d22, q2, #10 -- vmovn.u32 d24, q2 -- vmovn.u32 d26, q14 -+ vmovn.u32 d2, q2 -+ vshrn.u32 d4, q2, #14 - -- vshr.u32 q14, q3, #20 -- vshrn.u32 d23, q3, #10 -- vmovn.u32 d25, q3 - add r10, r0, #24 -- vmovn.u32 d27, q14 -+ vshrn.u32 d23, q3, #10 -+ vmovn.u32 d3, q3 -+ vshrn.u32 d5, q3, #14 - - it eq - addeq r4, r8 -- vuzp.16 q8, q11 -- vuzp.16 q9, q12 -- vuzp.16 q10, q13 -+ vuzp.16 q0, q11 -+ vuzp.16 q9, q1 -+ vuzp.16 q10, q2 - -- @ q8 V0, V3,.. -> q0 -+ @ q0 V0, V3,.. - @ q9 U0, U3... - @ q10 U1, U4... - @ q11 U2, U5,.. -- @ q12 V1, V4,.. -> q1 -- @ q13 V2, V5,.. -> q2 -+ @ q1 V1, V4, -+ @ q2 V2, V5,.. - - subs r6, #24 -- vand q11, q15 -- vand q9, q15 -- vand q10, q15 -- vand q0, q8, q15 -- vand q1, q12, q15 -- vand q2, q13, q15 -+ vbic.u16 q11, #0xfc00 -+ vbic.u16 q9, #0xfc00 -+ vshr.u16 q10, #6 -+ vshr.u16 q2, #6 -+ vbic.u16 q0, #0xfc00 -+ vbic.u16 q1, #0xfc00 - - blt 2f - -@@ -765,4 +755,171 @@ function ff_rpi_sand30_lines_to_planar_p010, export=1 - endfunc - - -+@ void ff_rpi_sand30_lines_to_planar_y8( -+@ uint8_t * dest, // [r0] -+@ unsigned int dst_stride, // [r1] -+@ const uint8_t * src, // [r2] -+@ unsigned int src_stride1, // [r3] Ignored - assumed 128 -+@ unsigned int src_stride2, // [sp, #0] -> r3 -+@ unsigned int _x, // [sp, #4] Ignored - 0 -+@ unsigned int y, // [sp, #8] (r7 in prefix) -+@ unsigned int _w, // [sp, #12] -> r6 (cur r5) -+@ unsigned int h); // [sp, #16] -> r7 -+@ -+@ Assumes that we are starting on a stripe boundary and that overreading -+@ within the stripe is OK. However it does respect the dest size for wri -+ -+function ff_rpi_sand30_lines_to_planar_y8, export=1 -+ push {r4-r8, lr} @ +24 -+ ldr r3, [sp, #24] -+ ldr r6, [sp, #36] -+ ldr r7, [sp, #32] @ y -+ mov r12, #48 -+ lsl r3, #7 -+ sub r1, r1, r6 -+ add r8, r2, r7, lsl #7 -+ ldr r7, [sp, #40] -+ -+10: -+ mov r2, r8 -+ add r4, r0, #24 -+ mov r5, r6 -+1: -+ vldm r2, {q8-q15} -+ -+ subs r5, #96 -+ -+ vmovn.u32 d0, q8 -+ vshrn.u32 d2, q8, #12 -+ vshrn.u32 d4, q8, #16 @ Cannot vshrn.u32 #20! -+ -+ add r2, r3 -+ -+ vmovn.u32 d1, q9 -+ vshrn.u32 d3, q9, #12 -+ vshrn.u32 d5, q9, #16 -+ -+ pld [r2, #0] -+ -+ vshrn.u16 d0, q0, #2 -+ vmovn.u16 d1, q1 -+ vshrn.u16 d2, q2, #6 -+ -+ vmovn.u32 d16, q10 -+ vshrn.u32 d18, q10, #12 -+ vshrn.u32 d20, q10, #16 -+ -+ vmovn.u32 d17, q11 -+ vshrn.u32 d19, q11, #12 -+ vshrn.u32 d21, q11, #16 -+ -+ pld [r2, #64] -+ -+ vshrn.u16 d4, q8, #2 -+ vmovn.u16 d5, q9 -+ vshrn.u16 d6, q10, #6 -+ -+ vmovn.u32 d16, q12 -+ vshrn.u32 d18, q12, #12 -+ vshrn.u32 d20, q12, #16 -+ -+ vmovn.u32 d17, q13 -+ vshrn.u32 d19, q13, #12 -+ vshrn.u32 d21, q13, #16 -+ -+ vshrn.u16 d16, q8, #2 -+ vmovn.u16 d17, q9 -+ vshrn.u16 d18, q10, #6 -+ -+ vmovn.u32 d20, q14 -+ vshrn.u32 d22, q14, #12 -+ vshrn.u32 d24, q14, #16 -+ -+ vmovn.u32 d21, q15 -+ vshrn.u32 d23, q15, #12 -+ vshrn.u32 d25, q15, #16 -+ -+ vshrn.u16 d20, q10, #2 -+ vmovn.u16 d21, q11 -+ vshrn.u16 d22, q12, #6 -+ -+ blt 2f -+ -+ vst3.8 {d0, d1, d2}, [r0], r12 -+ vst3.8 {d4, d5, d6}, [r4], r12 -+ vst3.8 {d16, d17, d18}, [r0], r12 -+ vst3.8 {d20, d21, d22}, [r4], r12 -+ -+ bne 1b -+ -+11: -+ subs r7, #1 -+ add r0, r1 -+ add r8, #128 -+ bne 10b -+ -+ pop {r4-r8, pc} -+ -+@ Partial final write -+2: -+ cmp r5, #48-96 -+ blt 1f -+ vst3.8 {d0, d1, d2}, [r0], r12 -+ vst3.8 {d4, d5, d6}, [r4], r12 -+ beq 11b -+ vmov q0, q8 -+ vmov q2, q10 -+ sub r5, #48 -+ vmov d2, d18 -+ vmov d6, d22 -+1: -+ cmp r5, #24-96 -+ blt 1f -+ vst3.8 {d0, d1, d2}, [r0]! -+ beq 11b -+ vmov q0, q2 -+ sub r5, #24 -+ vmov d2, d6 -+1: -+ cmp r5, #12-96 -+ blt 1f -+ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! -+ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! -+ vst3.8 {d0[2], d1[2], d2[2]}, [r0]! -+ vst3.8 {d0[3], d1[3], d2[3]}, [r0]! -+ beq 11b -+ vmov s0, s1 -+ sub r5, #12 -+ vmov s2, s3 -+ vmov s4, s5 -+1: -+ cmp r5, #6-96 -+ blt 1f -+ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! -+ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! -+ add r0, #12 -+ beq 11b -+ vshr.u32 d0, #16 -+ sub r5, #6 -+ vshr.u32 d1, #16 -+ vshr.u32 d2, #16 -+1: -+ cmp r5, #3-96 -+ blt 1f -+ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! -+ beq 11b -+ sub r5, #3 -+ vshr.u32 d0, #8 -+ vshr.u32 d1, #8 -+1: -+ cmp r5, #2-96 -+ blt 1f -+ vst2.8 {d0[0], d1[0]}, [r0]! -+ b 11b -+1: -+ vst1.8 {d0[0]}, [r0]! -+ b 11b -+ -+endfunc -+ - -diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h -index 447f367bea8f..d457c1087082 100644 ---- a/libavutil/arm/rpi_sand_neon.h -+++ b/libavutil/arm/rpi_sand_neon.h -@@ -95,5 +95,16 @@ void ff_rpi_sand30_lines_to_planar_p010( - unsigned int _w, // [sp, #12] -> r6 (cur r5) - unsigned int h); // [sp, #16] -> r7 - -+void ff_rpi_sand30_lines_to_planar_y8( -+ uint8_t * dest, // [r0] -+ unsigned int dst_stride, // [r1] -+ const uint8_t * src, // [r2] -+ unsigned int src_stride1, // [r3] Ignored - assumed 128 -+ unsigned int src_stride2, // [sp, #0] -> r3 -+ unsigned int _x, // [sp, #4] Ignored - 0 -+ unsigned int y, // [sp, #8] (r7 in prefix) -+ unsigned int _w, // [sp, #12] -> r6 (cur r5) -+ unsigned int h); // [sp, #16] -> r7 -+ - #endif // AVUTIL_ARM_SAND_NEON_H - -diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c -index 256c3d532f38..b6071e2928f7 100644 ---- a/libavutil/rpi_sand_fns.c -+++ b/libavutil/rpi_sand_fns.c -@@ -247,7 +247,7 @@ void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, - const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; - const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words - --#if HAVE_SAND_ASM && 0 -+#if HAVE_SAND_ASM - if (_x == 0) { - ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); - return; - -From 76556eb5987c43cc46decc0d642ed7d762113613 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 7 Jun 2022 14:46:12 +0000 -Subject: [PATCH 054/186] v4l2_m2m_enc: Add the ability to encode DRM_PRIME - frames - ---- - libavcodec/v4l2_buffers.c | 100 +++++++++++--- - libavcodec/v4l2_buffers.h | 20 ++- - libavcodec/v4l2_context.c | 212 +++++++++++++++++++++++++--- - libavcodec/v4l2_context.h | 15 +- - libavcodec/v4l2_m2m.c | 37 +++-- - libavcodec/v4l2_m2m.h | 3 + - libavcodec/v4l2_m2m_dec.c | 171 ++++++----------------- - libavcodec/v4l2_m2m_enc.c | 283 +++++++++++++++++++++++++++++++++++++- - 8 files changed, 643 insertions(+), 198 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 8c4f18dbede2..9ef2f40e3991 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -29,6 +29,8 @@ - #include - #include - #include "libavcodec/avcodec.h" -+#include "libavcodec/internal.h" -+#include "libavutil/avassert.h" - #include "libavutil/pixdesc.h" - #include "libavutil/hwcontext.h" - #include "v4l2_context.h" -@@ -60,27 +62,39 @@ static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf) - return tb.num && tb.den ? tb : v4l2_timebase; - } - -+static inline struct timeval tv_from_int(const int64_t t) -+{ -+ return (struct timeval){ -+ .tv_usec = t % USEC_PER_SEC, -+ .tv_sec = t / USEC_PER_SEC -+ }; -+} -+ -+static inline int64_t int_from_tv(const struct timeval t) -+{ -+ return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec; -+} -+ - static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) - { - /* convert pts to v4l2 timebase */ - const int64_t v4l2_pts = -- out->context->no_pts_rescale ? pts : - pts == AV_NOPTS_VALUE ? 0 : - av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); -- out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; -- out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; -+ out->buf.timestamp = tv_from_int(v4l2_pts); - } - - static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf) - { -+ const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp); -+ return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE; -+#if 0 - /* convert pts back to encoder timebase */ -- const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + -- avbuf->buf.timestamp.tv_usec; -- - return - avbuf->context->no_pts_rescale ? v4l2_pts : - v4l2_pts == 0 ? AV_NOPTS_VALUE : - av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); -+#endif - } - - static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) -@@ -435,7 +449,7 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data) - - ff_mutex_lock(&ctx->lock); - -- avbuf->status = V4L2BUF_AVAILABLE; -+ ff_v4l2_buffer_set_avail(avbuf); - - if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) { - av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name); -@@ -599,6 +613,38 @@ static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes) - return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); - } - -+static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out) -+{ -+ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; -+ -+ if (frame->format != AV_PIX_FMT_DRM_PRIME || !src) -+ return AVERROR(EINVAL); -+ -+ av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF); -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { -+ // Only currently cope with single buffer types -+ if (out->buf.length != 1) -+ return AVERROR_PATCHWELCOME; -+ if (src->nb_objects != 1) -+ return AVERROR(EINVAL); -+ -+ out->planes[0].m.fd = src->objects[0].fd; -+ } -+ else { -+ if (src->nb_objects != 1) -+ return AVERROR(EINVAL); -+ -+ out->buf.m.fd = src->objects[0].fd; -+ } -+ -+ // No need to copy src AVDescriptor and if we did then we may confuse -+ // fd close on free -+ out->ref_buf = av_buffer_ref(frame->buf[0]); -+ -+ return 0; -+} -+ - static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - { - int i; -@@ -678,7 +724,7 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - * - ******************************************************************************/ - --int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) -+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts) - { - out->buf.flags = frame->key_frame ? - (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : -@@ -688,10 +734,15 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) - v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); - v4l2_set_color_range(out, frame->color_range); - // PTS & interlace are buffer vars -- v4l2_set_pts(out, frame->pts); -+ if (track_ts) -+ out->buf.timestamp = tv_from_int(track_ts); -+ else -+ v4l2_set_pts(out, frame->pts); - v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first); - -- return v4l2_buffer_swframe_to_buf(frame, out); -+ return frame->format == AV_PIX_FMT_DRM_PRIME ? -+ v4l2_buffer_primeframe_to_buf(frame, out) : -+ v4l2_buffer_swframe_to_buf(frame, out); - } - - int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) -@@ -754,6 +805,7 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) - - pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused; - pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset; -+ pkt->flags = 0; - - if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) - pkt->flags |= AV_PKT_FLAG_KEY; -@@ -768,8 +820,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) - return 0; - } - --int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -- const void *extdata, size_t extlen) -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, -+ const void *extdata, size_t extlen, -+ const int64_t timestamp) - { - int ret; - -@@ -783,7 +836,10 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, - if (ret && ret != AVERROR(ENOMEM)) - return ret; - -- v4l2_set_pts(out, pkt->pts); -+ if (timestamp) -+ out->buf.timestamp = tv_from_int(timestamp); -+ else -+ v4l2_set_pts(out, pkt->pts); - - out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ? - (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : -@@ -794,7 +850,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, - - int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) - { -- return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0); -+ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); - } - - -@@ -814,13 +870,15 @@ static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data) - close(avbuf->drm_frame.objects[i].fd); - } - -+ av_buffer_unref(&avbuf->ref_buf); -+ - ff_weak_link_unref(&avbuf->context_wl); - - av_free(avbuf); - } - - --int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx) -+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem) - { - int ret, i; - V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); -@@ -837,7 +895,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - } - - avbuf->context = ctx; -- avbuf->buf.memory = V4L2_MEMORY_MMAP; -+ avbuf->buf.memory = mem; - avbuf->buf.type = ctx->type; - avbuf->buf.index = index; - -@@ -867,6 +925,8 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - avbuf->num_planes = 1; - - for (i = 0; i < avbuf->num_planes; i++) { -+ const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && -+ (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); - - avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? - ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline : -@@ -875,21 +935,17 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; - -- if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) || -- !buf_to_m2mctx(avbuf)->output_drm) { -+ if (want_mmap) - avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, - PROT_READ | PROT_WRITE, MAP_SHARED, - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); -- } - } else { - avbuf->plane_info[i].length = avbuf->buf.length; - -- if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) || -- !buf_to_m2mctx(avbuf)->output_drm) { -+ if (want_mmap) - avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, - PROT_READ | PROT_WRITE, MAP_SHARED, - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); -- } - } - - if (avbuf->plane_info[i].mm_addr == MAP_FAILED) { -diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index 3b7ca4d99e1e..1ac32c5989f1 100644 ---- a/libavcodec/v4l2_buffers.h -+++ b/libavcodec/v4l2_buffers.h -@@ -59,6 +59,10 @@ typedef struct V4L2Buffer { - - /* DRM descriptor */ - AVDRMFrameDescriptor drm_frame; -+ /* For DRM_PRIME encode - need to keep a ref to the source buffer till we -+ * are done -+ */ -+ AVBufferRef * ref_buf; - - /* keep track of the mmap address and mmap length */ - struct V4L2Plane_info { -@@ -110,8 +114,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); - */ - int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); - --int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -- const void *extdata, size_t extlen); -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, -+ const void *extdata, size_t extlen, -+ const int64_t timestamp); - - /** - * Extracts the data from an AVFrame to a V4L2Buffer -@@ -121,7 +126,7 @@ int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, - * - * @returns 0 in case of success, a negative AVERROR code otherwise - */ --int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); -+int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts); - - /** - * Initializes a V4L2Buffer -@@ -131,7 +136,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); - * - * @returns 0 in case of success, a negative AVERROR code otherwise - */ --int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx); -+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem); - - /** - * Enqueues a V4L2Buffer -@@ -142,5 +147,12 @@ int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context - */ - int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf); - -+static inline void -+ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf) -+{ -+ avbuf->status = V4L2BUF_AVAILABLE; -+ av_buffer_unref(&avbuf->ref_buf); -+} -+ - - #endif // AVCODEC_V4L2_BUFFERS_H -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index b3662aedaa9f..7a707d21fc7a 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -43,6 +43,160 @@ struct v4l2_format_update { - int update_avfmt; - }; - -+ -+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) -+{ -+ return (int64_t)n; -+} -+ -+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) -+{ -+ return (unsigned int)pts; -+} -+ -+// FFmpeg requires us to propagate a number of vars from the coded pkt into -+// the decoded frame. The only thing that tracks like that in V4L2 stateful -+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no -+// guarantees about PTS being unique or specified for every frame so replace -+// the supplied PTS with a simple incrementing number and keep a circular -+// buffer of all the things we want preserved (including the original PTS) -+// indexed by the tracking no. -+static int64_t -+xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt) -+{ -+ int64_t track_pts; -+ -+ // Avoid 0 -+ if (++x->track_no == 0) -+ x->track_no = 1; -+ -+ track_pts = track_to_pts(avctx, x->track_no); -+ -+ av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); -+ x->last_pkt_dts = avpkt->dts; -+ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ -+ .discard = 0, -+ .pending = 1, -+ .pkt_size = avpkt->size, -+ .pts = avpkt->pts, -+ .dts = avpkt->dts, -+ .reordered_opaque = avctx->reordered_opaque, -+ .pkt_pos = avpkt->pos, -+ .pkt_duration = avpkt->duration, -+ .track_pts = track_pts -+ }; -+ return track_pts; -+} -+ -+static int64_t -+xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame) -+{ -+ int64_t track_pts; -+ -+ // Avoid 0 -+ if (++x->track_no == 0) -+ x->track_no = 1; -+ -+ track_pts = track_to_pts(avctx, x->track_no); -+ -+ av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no); -+ x->last_pkt_dts = frame->pkt_dts; -+ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ -+ .discard = 0, -+ .pending = 1, -+ .pkt_size = 0, -+ .pts = frame->pts, -+ .dts = AV_NOPTS_VALUE, -+ .reordered_opaque = frame->reordered_opaque, -+ .pkt_pos = frame->pkt_pos, -+ .pkt_duration = frame->pkt_duration, -+ .track_pts = track_pts -+ }; -+ return track_pts; -+} -+ -+ -+// Returns -1 if we should discard the frame -+static int -+xlat_pts_frame_out(AVCodecContext *const avctx, -+ xlat_track_t * const x, -+ AVFrame *const frame) -+{ -+ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; -+ V4L2m2mTrackEl *const t = x->track_els + n; -+ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) -+ { -+ av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, -+ "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -+ frame->pts = AV_NOPTS_VALUE; -+ frame->pkt_dts = x->last_pkt_dts; -+ frame->reordered_opaque = x->last_opaque; -+ frame->pkt_pos = -1; -+ frame->pkt_duration = 0; -+ frame->pkt_size = -1; -+ } -+ else if (!t->discard) -+ { -+ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; -+ frame->pkt_dts = x->last_pkt_dts; -+ frame->reordered_opaque = t->reordered_opaque; -+ frame->pkt_pos = t->pkt_pos; -+ frame->pkt_duration = t->pkt_duration; -+ frame->pkt_size = t->pkt_size; -+ -+ x->last_opaque = x->track_els[n].reordered_opaque; -+ if (frame->pts != AV_NOPTS_VALUE) -+ x->last_pts = frame->pts; -+ t->pending = 0; -+ } -+ else -+ { -+ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -+ return -1; -+ } -+ -+ av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", -+ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); -+ return 0; -+} -+ -+// Returns -1 if we should discard the frame -+static int -+xlat_pts_pkt_out(AVCodecContext *const avctx, -+ xlat_track_t * const x, -+ AVPacket *const pkt) -+{ -+ unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE; -+ V4L2m2mTrackEl *const t = x->track_els + n; -+ if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts) -+ { -+ av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, -+ "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); -+ pkt->pts = AV_NOPTS_VALUE; -+ } -+ else if (!t->discard) -+ { -+ pkt->pts = t->pending ? t->pts : AV_NOPTS_VALUE; -+ -+ x->last_opaque = x->track_els[n].reordered_opaque; -+ if (pkt->pts != AV_NOPTS_VALUE) -+ x->last_pts = pkt->pts; -+ t->pending = 0; -+ } -+ else -+ { -+ av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); -+ return -1; -+ } -+ -+ // * Would like something much better than this...xlat(offset + out_count)? -+ pkt->dts = pkt->pts; -+ av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n", -+ pkt->pts, t->track_pts, n); -+ return 0; -+} -+ -+ - static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) - { - return V4L2_TYPE_IS_OUTPUT(ctx->type) ? -@@ -353,12 +507,14 @@ dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf) - atomic_fetch_sub(&ctx->q_count, 1); - - avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; -- avbuf->status = V4L2BUF_AVAILABLE; -+ ff_v4l2_buffer_set_avail(avbuf); - avbuf->buf = buf; - if (is_mp) { - memcpy(avbuf->planes, planes, sizeof(planes)); - avbuf->buf.m.planes = avbuf->planes; - } -+ // Done with any attached buffer -+ av_buffer_unref(&avbuf->ref_buf); - - if (V4L2_TYPE_IS_CAPTURE(ctx->type)) { - // Zero length cap buffer return == EOS -@@ -733,7 +889,7 @@ static void flush_all_buffers_status(V4L2Context* const ctx) - for (i = 0; i < ctx->num_buffers; ++i) { - struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; - if (buf->status == V4L2BUF_IN_DRIVER) -- buf->status = V4L2BUF_AVAILABLE; -+ ff_v4l2_buffer_set_avail(buf); - } - atomic_store(&ctx->q_count, 0); - } -@@ -787,6 +943,8 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) - { - if (cmd == VIDIOC_STREAMOFF) - flush_all_buffers_status(ctx); -+ else -+ ctx->first_buf = 1; - - ctx->streamon = (cmd == VIDIOC_STREAMON); - av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name, -@@ -803,14 +961,16 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) - - int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) - { -- V4L2m2mContext *s = ctx_to_m2mctx(ctx); -+ V4L2m2mContext *const s = ctx_to_m2mctx(ctx); -+ AVCodecContext *const avctx = s->avctx; -+ int64_t track_ts; - V4L2Buffer* avbuf; - int ret; - - if (!frame) { - ret = v4l2_stop_encode(ctx); - if (ret) -- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name); -+ av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name); - s->draining= 1; - return 0; - } -@@ -819,7 +979,9 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) - if (!avbuf) - return AVERROR(EAGAIN); - -- ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf); -+ track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame); -+ -+ ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts); - if (ret) - return ret; - -@@ -830,14 +992,16 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, - const void * extdata, size_t extlen) - { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); -+ AVCodecContext *const avctx = s->avctx; - V4L2Buffer* avbuf; - int ret; -+ int64_t track_ts; - - if (!pkt->size) { - ret = v4l2_stop_decode(ctx); - // Log but otherwise ignore stop failure - if (ret) -- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); -+ av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); - s->draining = 1; - return 0; - } -@@ -846,7 +1010,9 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, - if (!avbuf) - return AVERROR(EAGAIN); - -- ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen); -+ track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt); -+ -+ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts); - if (ret == AVERROR(ENOMEM)) - av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", - __func__, pkt->size, avbuf->planes[0].length); -@@ -858,24 +1024,36 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, - - int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) - { -+ V4L2m2mContext *s = ctx_to_m2mctx(ctx); -+ AVCodecContext *const avctx = s->avctx; - V4L2Buffer *avbuf; - int rv; - -- if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) -- return rv; -+ do { -+ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) -+ return rv; -+ if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0) -+ return rv; -+ } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0); - -- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); -+ return 0; - } - - int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) - { -+ V4L2m2mContext *s = ctx_to_m2mctx(ctx); -+ AVCodecContext *const avctx = s->avctx; - V4L2Buffer *avbuf; - int rv; - -- if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) -- return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC -+ do { -+ if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) -+ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC -+ if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0) -+ return rv; -+ } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0); - -- return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); -+ return 0; - } - - int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) -@@ -951,7 +1129,7 @@ void ff_v4l2_context_release(V4L2Context* ctx) - } - - --static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers) -+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem) - { - V4L2m2mContext * const s = ctx_to_m2mctx(ctx); - struct v4l2_requestbuffers req; -@@ -962,7 +1140,7 @@ static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers - - memset(&req, 0, sizeof(req)); - req.count = req_buffers; -- req.memory = V4L2_MEMORY_MMAP; -+ req.memory = mem; - req.type = ctx->type; - while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) { - if (errno != EINTR) { -@@ -986,7 +1164,7 @@ static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers - } - - for (i = 0; i < ctx->num_buffers; i++) { -- ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx); -+ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem); - if (ret) { - av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret)); - goto fail_release; -@@ -1052,7 +1230,7 @@ int ff_v4l2_context_init(V4L2Context* ctx) - goto fail_unref_hwframes; - } - -- ret = create_buffers(ctx, ctx->num_buffers); -+ ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); - if (ret < 0) - goto fail_unref_hwframes; - -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 0efff58f1892..21265f1bd77b 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -91,11 +91,19 @@ typedef struct V4L2Context { - */ - int num_buffers; - -+ /** -+ * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF -+ */ -+ enum v4l2_memory buf_mem; -+ - /** - * Whether the stream has been started (VIDIOC_STREAMON has been sent). - */ - int streamon; - -+ /* 1st buffer after stream on */ -+ int first_buf; -+ - /** - * Either no more buffers available or an unrecoverable error was notified - * by the V4L2 kernel driver: once set the context has to be exited. -@@ -105,11 +113,10 @@ typedef struct V4L2Context { - int flag_last; - - /** -- * PTS rescale not wanted -- * If the PTS is just a dummy frame count then rescale is -- * actively harmful -+ * If NZ then when Qing frame/pkt use this rather than the -+ * "real" PTS - */ -- int no_pts_rescale; -+ uint64_t track_ts; - - AVBufferRef *frames_ref; - atomic_int q_count; -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index 6dd01e2e0085..1e30d15fd866 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -35,6 +35,14 @@ - #include "v4l2_fmt.h" - #include "v4l2_m2m.h" - -+static void -+xlat_init(xlat_track_t * const x) -+{ -+ memset(x, 0, sizeof(*x)); -+ x->last_pts = AV_NOPTS_VALUE; -+} -+ -+ - static inline int v4l2_splane_video(struct v4l2_capability *cap) - { - if (cap->capabilities & (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT) && -@@ -67,7 +75,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) - - s->capture.done = s->output.done = 0; - s->capture.name = "capture"; -+ s->capture.buf_mem = V4L2_MEMORY_MMAP; - s->output.name = "output"; -+ s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; - atomic_init(&s->refcount, 0); - sem_init(&s->refsync, 0, 0); - -@@ -334,35 +344,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *priv) - return v4l2_configure_contexts(s); - } - --int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s) -+int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps) - { -- *s = av_mallocz(sizeof(V4L2m2mContext)); -- if (!*s) -+ V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext)); -+ -+ *pps = NULL; -+ if (!s) - return AVERROR(ENOMEM); - -- priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext), -+ priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s), - &v4l2_m2m_destroy_context, NULL, 0); - if (!priv->context_ref) { -- av_freep(s); -+ av_free(s); - return AVERROR(ENOMEM); - } - - /* assign the context */ -- priv->context = *s; -- (*s)->priv = priv; -+ priv->context = s; -+ s->priv = priv; - - /* populate it */ -- priv->context->capture.num_buffers = priv->num_capture_buffers; -- priv->context->output.num_buffers = priv->num_output_buffers; -- priv->context->self_ref = priv->context_ref; -- priv->context->fd = -1; -+ s->capture.num_buffers = priv->num_capture_buffers; -+ s->output.num_buffers = priv->num_output_buffers; -+ s->self_ref = priv->context_ref; -+ s->fd = -1; -+ xlat_init(&s->xlat); - - priv->context->frame = av_frame_alloc(); - if (!priv->context->frame) { - av_buffer_unref(&priv->context_ref); -- *s = NULL; /* freed when unreferencing context_ref */ - return AVERROR(ENOMEM); - } - -+ *pps = s; - return 0; - } -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 19d618698dd7..d6cdaf65e183 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -103,6 +103,9 @@ typedef struct V4L2m2mContext { - /* generate DRM frames */ - int output_drm; - -+ /* input frames are drmprime */ -+ int input_drm; -+ - /* Frame tracking */ - xlat_track_t xlat; - int pending_hw; -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 7e170447064a..fbbfc81342d5 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -169,96 +169,17 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) - return 0; - } - --static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) --{ -- return (int64_t)n; --} -- --static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) --{ -- return (unsigned int)pts; --} -- --// FFmpeg requires us to propagate a number of vars from the coded pkt into --// the decoded frame. The only thing that tracks like that in V4L2 stateful --// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no --// guarantees about PTS being unique or specified for every frame so replace --// the supplied PTS with a simple incrementing number and keep a circular --// buffer of all the things we want preserved (including the original PTS) --// indexed by the tracking no. - static void --xlat_pts_in(AVCodecContext *const avctx, xlat_track_t *const x, AVPacket *const avpkt) --{ -- int64_t track_pts; -- -- // Avoid 0 -- if (++x->track_no == 0) -- x->track_no = 1; -- -- track_pts = track_to_pts(avctx, x->track_no); -- -- av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); -- x->last_pkt_dts = avpkt->dts; -- x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ -- .discard = 0, -- .pending = 1, -- .pkt_size = avpkt->size, -- .pts = avpkt->pts, -- .dts = avpkt->dts, -- .reordered_opaque = avctx->reordered_opaque, -- .pkt_pos = avpkt->pos, -- .pkt_duration = avpkt->duration, -- .track_pts = track_pts -- }; -- avpkt->pts = track_pts; --} -- --// Returns -1 if we should discard the frame --static int --xlat_pts_out(AVCodecContext *const avctx, -- xlat_track_t * const x, -+set_best_effort_pts(AVCodecContext *const avctx, - pts_stats_t * const ps, - AVFrame *const frame) - { -- unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; -- V4L2m2mTrackEl *const t = x->track_els + n; -- if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) -- { -- av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -- frame->pts = AV_NOPTS_VALUE; -- frame->pkt_dts = x->last_pkt_dts; -- frame->reordered_opaque = x->last_opaque; -- frame->pkt_pos = -1; -- frame->pkt_duration = 0; -- frame->pkt_size = -1; -- } -- else if (!t->discard) -- { -- frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; -- frame->pkt_dts = x->last_pkt_dts; -- frame->reordered_opaque = t->reordered_opaque; -- frame->pkt_pos = t->pkt_pos; -- frame->pkt_duration = t->pkt_duration; -- frame->pkt_size = t->pkt_size; -- -- x->last_opaque = x->track_els[n].reordered_opaque; -- if (frame->pts != AV_NOPTS_VALUE) -- x->last_pts = frame->pts; -- t->pending = 0; -- } -- else -- { -- av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -- return -1; -- } -- - pts_stats_add(ps, frame->pts); - - frame->best_effort_timestamp = pts_stats_guess(ps); - frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? -- av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", -- frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); -- return 0; -+ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", -+ frame->pts, frame->best_effort_timestamp, frame->pkt_dts); - } - - static void -@@ -272,13 +193,6 @@ xlat_flush(xlat_track_t * const x) - x->last_pts = AV_NOPTS_VALUE; - } - --static void --xlat_init(xlat_track_t * const x) --{ -- memset(x, 0, sizeof(*x)); -- x->last_pts = AV_NOPTS_VALUE; --} -- - static int - xlat_pending(const xlat_track_t * const x) - { -@@ -419,8 +333,6 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret); - return ret; - } -- -- xlat_pts_in(avctx, &s->xlat, &s->buf_pkt); - } - - if (s->draining) { -@@ -542,49 +454,47 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - prefer_dq ? 5 : - src_rv == NQ_Q_FULL ? -1 : 0; - -- do { -- // Dequeue frame will unref any previous contents of frame -- // if it returns success so we don't need an explicit unref -- // when discarding -- // This returns AVERROR(EAGAIN) on timeout or if -- // there is room in the input Q and timeout == -1 -- dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); -- -- // Failure due to no buffer in Q? -- if (dst_rv == AVERROR(ENOSPC)) { -- // Wait & retry -- if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { -- dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); -- } -+ // Dequeue frame will unref any previous contents of frame -+ // if it returns success so we don't need an explicit unref -+ // when discarding -+ // This returns AVERROR(EAGAIN) on timeout or if -+ // there is room in the input Q and timeout == -1 -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); -+ -+ // Failure due to no buffer in Q? -+ if (dst_rv == AVERROR(ENOSPC)) { -+ // Wait & retry -+ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); - } -+ } -+ -+ // Adjust dynamic pending threshold -+ if (dst_rv == 0) { -+ if (--s->pending_hw < PENDING_HW_MIN) -+ s->pending_hw = PENDING_HW_MIN; -+ s->pending_n = 0; - -- // Adjust dynamic pending threshold -- if (dst_rv == 0) { -- if (--s->pending_hw < PENDING_HW_MIN) -- s->pending_hw = PENDING_HW_MIN; -+ set_best_effort_pts(avctx, &s->pts_stat, frame); -+ } -+ else if (dst_rv == AVERROR(EAGAIN)) { -+ if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) { -+ s->pending_hw = pending * 16 + PENDING_HW_OFFSET; - s->pending_n = 0; - } -- else if (dst_rv == AVERROR(EAGAIN)) { -- if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) { -- s->pending_hw = pending * 16 + PENDING_HW_OFFSET; -- s->pending_n = 0; -- } -- } -+ } - -- if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { -- av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); -- dst_rv = AVERROR_EOF; -- s->capture.done = 1; -- } -- else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) -- av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", -- s->draining, s->capture.done); -- else if (dst_rv && dst_rv != AVERROR(EAGAIN)) -- av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", -- s->draining, s->capture.done, dst_rv); -- -- // Go again if we got a frame that we need to discard -- } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame)); -+ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { -+ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); -+ dst_rv = AVERROR_EOF; -+ s->capture.done = 1; -+ } -+ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) -+ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", -+ s->draining, s->capture.done); -+ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) -+ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", -+ s->draining, s->capture.done, dst_rv); - } - - ++i; -@@ -791,7 +701,6 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - if (ret < 0) - return ret; - -- xlat_init(&s->xlat); - pts_stats_init(&s->pts_stat, avctx, "decoder"); - s->pending_hw = PENDING_HW_MIN; - -@@ -810,12 +719,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - output->av_codec_id = avctx->codec_id; - output->av_pix_fmt = AV_PIX_FMT_NONE; - output->min_buf_size = max_coded_size(avctx); -- output->no_pts_rescale = 1; - - capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; - capture->av_pix_fmt = avctx->pix_fmt; - capture->min_buf_size = 0; -- capture->no_pts_rescale = 1; - - /* the client requests the codec to generate DRM frames: - * - data[0] will therefore point to the returned AVDRMFrameDescriptor -diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c -index 9a0837ecf3d7..05ff6ba72655 100644 ---- a/libavcodec/v4l2_m2m_enc.c -+++ b/libavcodec/v4l2_m2m_enc.c -@@ -24,6 +24,8 @@ - #include - #include - #include -+#include -+ - #include "encode.h" - #include "libavcodec/avcodec.h" - #include "libavutil/pixdesc.h" -@@ -38,6 +40,34 @@ - #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x - #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x - -+// P030 should be defined in drm_fourcc.h and hopefully will be sometime -+// in the future but until then... -+#ifndef DRM_FORMAT_P030 -+#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') -+#endif -+ -+#ifndef DRM_FORMAT_NV15 -+#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') -+#endif -+ -+#ifndef DRM_FORMAT_NV20 -+#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') -+#endif -+ -+#ifndef V4L2_CID_CODEC_BASE -+#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE -+#endif -+ -+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined -+// in videodev2.h hopefully will be sometime in the future but until then... -+#ifndef V4L2_PIX_FMT_NV12_10_COL128 -+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') -+#endif -+ -+#ifndef V4L2_PIX_FMT_NV12_COL128 -+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ -+#endif -+ - static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den) - { - struct v4l2_streamparm parm = { 0 }; -@@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p) - static int v4l2_check_b_frame_support(V4L2m2mContext *s) - { - if (s->avctx->max_b_frames) -- av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n"); -+ av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames); - -- v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0); -+ v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1); - v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0); - if (s->avctx->max_b_frames == 0) - return 0; - - avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding"); -- - return AVERROR_PATCHWELCOME; - } - -@@ -271,13 +300,184 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s) - return 0; - } - -+static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame) -+{ -+ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; -+ -+ const uint32_t drm_fmt = src->layers[0].format; -+ // Treat INVALID as LINEAR -+ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? -+ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; -+ uint32_t pix_fmt = 0; -+ uint32_t w = 0; -+ uint32_t h = 0; -+ uint32_t bpl = src->layers[0].planes[0].pitch; -+ -+ // We really don't expect multiple layers -+ // All formats that we currently cope with are single object -+ -+ if (src->nb_layers != 1 || src->nb_objects != 1) -+ return AVERROR(EINVAL); -+ -+ switch (drm_fmt) { -+ case DRM_FORMAT_YUV420: -+ if (mod == DRM_FORMAT_MOD_LINEAR) { -+ if (src->layers[0].nb_planes != 3) -+ break; -+ pix_fmt = V4L2_PIX_FMT_YUV420; -+ h = src->layers[0].planes[1].offset / bpl; -+ w = bpl; -+ } -+ break; -+ -+ case DRM_FORMAT_NV12: -+ if (mod == DRM_FORMAT_MOD_LINEAR) { -+ if (src->layers[0].nb_planes != 2) -+ break; -+ pix_fmt = V4L2_PIX_FMT_NV12; -+ h = src->layers[0].planes[1].offset / bpl; -+ w = bpl; -+ } -+ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { -+ if (src->layers[0].nb_planes != 2) -+ break; -+ pix_fmt = V4L2_PIX_FMT_NV12_COL128; -+ w = bpl; -+ h = src->layers[0].planes[1].offset / 128; -+ bpl = fourcc_mod_broadcom_param(mod); -+ } -+ break; -+ -+ case DRM_FORMAT_P030: -+ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { -+ if (src->layers[0].nb_planes != 2) -+ break; -+ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; -+ w = bpl / 2; // Matching lie to how we construct this -+ h = src->layers[0].planes[1].offset / 128; -+ bpl = fourcc_mod_broadcom_param(mod); -+ } -+ break; -+ -+ default: -+ break; -+ } -+ -+ if (!pix_fmt) -+ return AVERROR(EINVAL); -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { -+ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; -+ -+ pix->width = w; -+ pix->height = h; -+ pix->pixelformat = pix_fmt; -+ pix->plane_fmt[0].bytesperline = bpl; -+ pix->num_planes = 1; -+ } -+ else { -+ struct v4l2_pix_format *const pix = &format->fmt.pix; -+ -+ pix->width = w; -+ pix->height = h; -+ pix->pixelformat = pix_fmt; -+ pix->bytesperline = bpl; -+ } -+ -+ return 0; -+} -+ -+// Do we have similar enough formats to be usable? -+static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b) -+{ -+ if (a->type != b->type) -+ return 0; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) { -+ const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp; -+ const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp; -+ unsigned int i; -+ if (pa->pixelformat != pb->pixelformat || -+ pa->num_planes != pb->num_planes) -+ return 0; -+ for (i = 0; i != pa->num_planes; ++i) { -+ if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline) -+ return 0; -+ } -+ } -+ else { -+ const struct v4l2_pix_format *const pa = &a->fmt.pix; -+ const struct v4l2_pix_format *const pb = &b->fmt.pix; -+ if (pa->pixelformat != pb->pixelformat || -+ pa->bytesperline != pb->bytesperline) -+ return 0; -+ } -+ return 1; -+} -+ -+ - static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) - { - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const output = &s->output; - -+ // Signal EOF if needed -+ if (!frame) { -+ return ff_v4l2_context_enqueue_frame(output, frame); -+ } -+ -+ if (s->input_drm && !output->streamon) { -+ int rv; -+ struct v4l2_format req_format = {.type = output->format.type}; -+ -+ // Set format when we first get a buffer -+ if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n"); -+ return rv; -+ } -+ -+ ff_v4l2_context_release(output); -+ -+ output->format = req_format; -+ -+ if ((rv = ff_v4l2_context_set_format(output)) != 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n"); -+ return rv; -+ } -+ -+ if (!fmt_eq(&req_format, &output->format)) { -+ av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n"); -+ return AVERROR(EINVAL); -+ } -+ -+ output->selection.top = frame->crop_top; -+ output->selection.left = frame->crop_left; -+ output->selection.width = av_frame_cropped_width(frame); -+ output->selection.height = av_frame_cropped_height(frame); -+ -+ if ((rv = ff_v4l2_context_init(output)) != 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n"); -+ return rv; -+ } -+ -+ { -+ struct v4l2_selection selection = { -+ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT, -+ .target = V4L2_SEL_TGT_CROP, -+ .r = output->selection -+ }; -+ if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) { -+ av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n", -+ selection.r.width, selection.r.height, selection.r.left, selection.r.top, -+ av_err2str(AVERROR(errno))); -+ } -+ av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n", -+ selection.r.width, selection.r.height, selection.r.left, selection.r.top); -+ } -+ } -+ - #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME -- if (frame && frame->pict_type == AV_PICTURE_TYPE_I) -+ if (frame->pict_type == AV_PICTURE_TYPE_I) - v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1); - #endif - -@@ -328,7 +528,70 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) - } - - dequeue: -- return ff_v4l2_context_dequeue_packet(capture, avpkt); -+ if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0) -+ return ret; -+ -+ if (capture->first_buf == 1) { -+ uint8_t * data; -+ const int len = avpkt->size; -+ -+ // 1st buffer after streamon should be SPS/PPS -+ capture->first_buf = 2; -+ -+ // Clear both possible stores so there is no chance of confusion -+ av_freep(&s->extdata_data); -+ s->extdata_size = 0; -+ av_freep(&avctx->extradata); -+ avctx->extradata_size = 0; -+ -+ if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) != NULL) -+ memcpy(data, avpkt->data, len); -+ -+ av_packet_unref(avpkt); -+ -+ if (data == NULL) -+ return AVERROR(ENOMEM); -+ -+ // We need to copy the header, but keep local if not global -+ if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) { -+ avctx->extradata = data; -+ avctx->extradata_size = len; -+ } -+ else { -+ s->extdata_data = data; -+ s->extdata_size = len; -+ } -+ -+ if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0) -+ return ret; -+ } -+ -+ // First frame must be key so mark as such even if encoder forgot -+ if (capture->first_buf == 2) -+ avpkt->flags |= AV_PKT_FLAG_KEY; -+ -+ // Add SPS/PPS to the start of every key frame if non-global headers -+ if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) { -+ const size_t newlen = s->extdata_size + avpkt->size; -+ AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE); -+ -+ if (buf == NULL) { -+ av_packet_unref(avpkt); -+ return AVERROR(ENOMEM); -+ } -+ -+ memcpy(buf->data, s->extdata_data, s->extdata_size); -+ memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size); -+ -+ av_buffer_unref(&avpkt->buf); -+ avpkt->buf = buf; -+ avpkt->data = buf->data; -+ avpkt->size = newlen; -+ } -+ -+// av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret); -+ capture->first_buf = 0; -+ return 0; - } - - static av_cold int v4l2_encode_init(AVCodecContext *avctx) -@@ -340,6 +603,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) - uint32_t v4l2_fmt_output; - int ret; - -+ av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt); -+ - ret = ff_v4l2_m2m_create_context(priv, &s); - if (ret < 0) - return ret; -@@ -347,13 +612,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) - capture = &s->capture; - output = &s->output; - -+ s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME); -+ - /* common settings output/capture */ - output->height = capture->height = avctx->height; - output->width = capture->width = avctx->width; - - /* output context */ - output->av_codec_id = AV_CODEC_ID_RAWVIDEO; -- output->av_pix_fmt = avctx->pix_fmt; -+ output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt : -+ avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt : -+ AV_PIX_FMT_YUV420P; - - /* capture context */ - capture->av_codec_id = avctx->codec_id; -@@ -372,7 +641,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) - v4l2_fmt_output = output->format.fmt.pix.pixelformat; - - pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO); -- if (pix_fmt_output != avctx->pix_fmt) { -+ if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) { - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output); - av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name); - return AVERROR(EINVAL); - -From e073991161bc9fc70a5ea3079d6b25c9d9008a4b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 8 Jun 2022 16:13:31 +0000 -Subject: [PATCH 055/186] v4l2_m2m_dec: Use DTS for best effort PTS if PTS is - always NO_PTS - -If we do have DTS but don't have PTS then assume PTS=DTS. -Also get rid of last_dts from tracking as its info wasn't actually -useful in any way. ---- - libavcodec/v4l2_context.c | 6 ++---- - libavcodec/v4l2_m2m.h | 1 - - libavcodec/v4l2_m2m_dec.c | 8 +++++++- - 3 files changed, 9 insertions(+), 6 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 7a707d21fc7a..6b97eab41ed7 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -73,7 +73,6 @@ xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPack - track_pts = track_to_pts(avctx, x->track_no); - - av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); -- x->last_pkt_dts = avpkt->dts; - x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ - .discard = 0, - .pending = 1, -@@ -100,7 +99,6 @@ xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFr - track_pts = track_to_pts(avctx, x->track_no); - - av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no); -- x->last_pkt_dts = frame->pkt_dts; - x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ - .discard = 0, - .pending = 1, -@@ -129,7 +127,7 @@ xlat_pts_frame_out(AVCodecContext *const avctx, - av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, - "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); - frame->pts = AV_NOPTS_VALUE; -- frame->pkt_dts = x->last_pkt_dts; -+ frame->pkt_dts = AV_NOPTS_VALUE; - frame->reordered_opaque = x->last_opaque; - frame->pkt_pos = -1; - frame->pkt_duration = 0; -@@ -138,7 +136,7 @@ xlat_pts_frame_out(AVCodecContext *const avctx, - else if (!t->discard) - { - frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; -- frame->pkt_dts = x->last_pkt_dts; -+ frame->pkt_dts = t->dts; - frame->reordered_opaque = t->reordered_opaque; - frame->pkt_pos = t->pkt_pos; - frame->pkt_duration = t->pkt_duration; -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index d6cdaf65e183..ee72beb0522b 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -67,7 +67,6 @@ typedef struct pts_stats_s - typedef struct xlat_track_s { - unsigned int track_no; - int64_t last_pts; -- int64_t last_pkt_dts; - int64_t last_opaque; - V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; - } xlat_track_t; -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index fbbfc81342d5..485a96f4b487 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -177,7 +177,13 @@ set_best_effort_pts(AVCodecContext *const avctx, - pts_stats_add(ps, frame->pts); - - frame->best_effort_timestamp = pts_stats_guess(ps); -- frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? -+ // If we can't guess from just PTS - try DTS -+ if (frame->best_effort_timestamp == AV_NOPTS_VALUE) -+ frame->best_effort_timestamp = frame->pkt_dts; -+ -+ // We can't emulate what s/w does in a useful manner and using the -+ // "correct" answer seems to just confuse things. -+ frame->pkt_dts = frame->pts; - av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", - frame->pts, frame->best_effort_timestamp, frame->pkt_dts); - } - -From 256637631cb246663c310d0bc8c3769b4af70692 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 30 Jun 2022 15:59:23 +0000 -Subject: [PATCH 056/186] v4l2: Update H265 request for current API - -This works with v9 of the H265 patch set which hopefully will be the -last one. Hevc controls extracted from patched v4l2-controls into -hevc-ctrls-v4 - if HEVC controls found in the system v4l2-controls then -those will be used instead. ---- - libavcodec/Makefile | 2 +- - libavcodec/hevc-ctrls-v4.h | 515 +++++++++++++++++++++++++++++++++ - libavcodec/v4l2_req_hevc_v4.c | 3 + - libavcodec/v4l2_req_hevc_vx.c | 81 ++++-- - libavcodec/v4l2_request_hevc.c | 6 +- - libavcodec/v4l2_request_hevc.h | 1 + - 6 files changed, 583 insertions(+), 25 deletions(-) - create mode 100644 libavcodec/hevc-ctrls-v4.h - create mode 100644 libavcodec/v4l2_req_hevc_v4.c - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 2b3c16185d75..d433a712366f 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -1000,7 +1000,7 @@ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o - OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o - OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o - OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o\ -- v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o -+ v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o v4l2_req_hevc_v4.o - OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o - OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o - OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h new file mode 100644 -index 000000000000..7e05f6e7c39b +index 000000000000..c02fdbe5a8e9 --- /dev/null +++ b/libavcodec/hevc-ctrls-v4.h -@@ -0,0 +1,515 @@ +@@ -0,0 +1,524 @@ +/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* + * Video for Linux Two controls header file @@ -23513,6 +1141,15 @@ index 000000000000..7e05f6e7c39b +#include +#include + ++#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS ++#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000 /* Stateless codecs controls */ ++#endif ++#ifndef V4L2_CID_CODEC_STATELESS_BASE ++#define V4L2_CID_CODEC_STATELESS_BASE (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900) ++#endif ++ ++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++ +#define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) +#define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401) +#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402) @@ -23973,1282 +1610,6382 @@ index 000000000000..7e05f6e7c39b +}; + +#endif -diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c -new file mode 100644 -index 000000000000..c35579d8e0ab ---- /dev/null -+++ b/libavcodec/v4l2_req_hevc_v4.c -@@ -0,0 +1,3 @@ -+#define HEVC_CTRLS_VERSION 4 -+#include "v4l2_req_hevc_vx.c" +diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c +index 0dc24f82f8a4..e27f480a8e5c 100644 +--- a/libavcodec/hevc/hevcdec.c ++++ b/libavcodec/hevc/hevcdec.c +@@ -366,6 +366,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps) + else + avctx->color_range = AVCOL_RANGE_MPEG; + ++ if (sps->chroma_format_idc == 1) { ++ avctx->chroma_sample_location = sps->vui.common.chroma_loc_info_present_flag ? ++ sps->vui.common.chroma_sample_loc_type_top_field + 1 : ++ AVCHROMA_LOC_LEFT; ++ } ++ else if (sps->chroma_format_idc == 2 || ++ sps->chroma_format_idc == 3) { ++ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; ++ } ++ else { ++ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; ++ } + -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -index 611fa21cc319..761c5b2dc74b 100644 ---- a/libavcodec/v4l2_req_hevc_vx.c -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -6,8 +6,6 @@ - #include "internal.h" - #include "thread.h" - --#include "v4l2_request_hevc.h" -- - #if HEVC_CTRLS_VERSION == 1 - #include "hevc-ctrls-v1.h" - -@@ -18,10 +16,37 @@ - #include "hevc-ctrls-v2.h" - #elif HEVC_CTRLS_VERSION == 3 - #include "hevc-ctrls-v3.h" -+#elif HEVC_CTRLS_VERSION == 4 -+#include -+#if !defined(V4L2_CID_STATELESS_HEVC_SPS) -+#include "hevc-ctrls-v4.h" -+#endif - #else - #error Unknown HEVC_CTRLS_VERSION + if (sps->vui.common.colour_description_present_flag) { + avctx->color_primaries = sps->vui.common.colour_primaries; + avctx->color_trc = sps->vui.common.transfer_characteristics; +@@ -527,6 +540,7 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) + CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \ + CONFIG_HEVC_D3D12VA_HWACCEL + \ + CONFIG_HEVC_NVDEC_HWACCEL + \ ++ CONFIG_HEVC_V4L2REQUEST_HWACCEL + \ + CONFIG_HEVC_VAAPI_HWACCEL + \ + CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ + CONFIG_HEVC_VDPAU_HWACCEL + \ +@@ -561,6 +575,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) #endif - -+#ifndef V4L2_CID_STATELESS_HEVC_SPS -+#define V4L2_CID_STATELESS_HEVC_SPS V4L2_CID_MPEG_VIDEO_HEVC_SPS -+#define V4L2_CID_STATELESS_HEVC_PPS V4L2_CID_MPEG_VIDEO_HEVC_PPS -+#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS -+#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX -+#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS -+#define V4L2_CID_STATELESS_HEVC_DECODE_MODE V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE -+#define V4L2_CID_STATELESS_HEVC_START_CODE V4L2_CID_MPEG_VIDEO_HEVC_START_CODE -+ -+#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED -+#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED -+#define V4L2_STATELESS_HEVC_START_CODE_NONE V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE -+#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B + #if CONFIG_HEVC_VULKAN_HWACCEL + *fmt++ = AV_PIX_FMT_VULKAN; +#endif -+ -+// Should be in videodev2 but we might not have a good enough one -+#ifndef V4L2_PIX_FMT_HEVC_SLICE -+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ -+#endif -+ -+#include "v4l2_request_hevc.h" -+ - #include "libavutil/hwcontext_drm.h" - - #include -@@ -259,9 +284,13 @@ fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const ++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL ++ *fmt++ = AV_PIX_FMT_DRM_PRIME; #endif - entry->field_pic = frame->frame->interlaced_frame; - -+#if HEVC_CTRLS_VERSION <= 3 - /* TODO: Interleaved: Get the POC for each field. */ - entry->pic_order_cnt[0] = frame->poc; - entry->pic_order_cnt[1] = frame->poc; -+#else -+ entry->pic_order_cnt_val = frame->poc; + break; + case AV_PIX_FMT_YUV420P10: +@@ -588,6 +605,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) + #endif + #if CONFIG_HEVC_NVDEC_HWACCEL + *fmt++ = AV_PIX_FMT_CUDA; +#endif ++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL ++ *fmt++ = AV_PIX_FMT_DRM_PRIME; + #endif + break; + case AV_PIX_FMT_YUV444P: +@@ -682,13 +702,16 @@ static int set_sps(HEVCContext *s, HEVCLayerContext *l, const HEVCSPS *sps) + if (!sps) + return 0; + +- ret = pic_arrays_init(l, sps); +- if (ret < 0) +- goto fail; ++ // If hwaccel then we don't need all the s/w decode helper arrays ++ if (!s->avctx->hwaccel) { ++ ret = pic_arrays_init(l, sps); ++ if (ret < 0) ++ goto fail; + +- ff_hevc_pred_init(&s->hpc, sps->bit_depth); +- ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth); +- ff_videodsp_init (&s->vdsp, sps->bit_depth); ++ ff_hevc_pred_init(&s->hpc, sps->bit_depth); ++ ff_hevc_dsp_init (&s->hevcdsp, sps->bit_depth); ++ ff_videodsp_init (&s->vdsp, sps->bit_depth); ++ } + + l->sps = ff_refstruct_ref_c(sps); + s->vps = ff_refstruct_ref_c(sps->vps); +@@ -3179,11 +3202,13 @@ static int hevc_frame_start(HEVCContext *s, HEVCLayerContext *l, } } - return n; -@@ -287,8 +316,11 @@ static void fill_slice_params(const HEVCContext * const h, - *slice_params = (struct v4l2_ctrl_hevc_slice_params) { - .bit_size = bit_size, -+#if HEVC_CTRLS_VERSION <= 3 - .data_bit_offset = bit_offset, -- -+#else -+ .data_byte_offset = bit_offset / 8 + 1, -+#endif - /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ - .slice_segment_addr = sh->slice_segment_addr, - -@@ -376,8 +408,10 @@ static void fill_slice_params(const HEVCContext * const h, - av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); - } - -+#if HEVC_CTRLS_VERSION <= 3 - for (i = 0; i < slice_params->num_entry_point_offsets; i++) - slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; -+#endif - } - - #if HEVC_CTRLS_VERSION >= 2 -@@ -761,30 +795,30 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, - - struct v4l2_ext_control control[] = { - { -- .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS, -+ .id = V4L2_CID_STATELESS_HEVC_SPS, - .ptr = &controls->sps, - .size = sizeof(controls->sps), - }, - { -- .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS, -+ .id = V4L2_CID_STATELESS_HEVC_PPS, - .ptr = &controls->pps, - .size = sizeof(controls->pps), - }, - #if HEVC_CTRLS_VERSION >= 2 - { -- .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS, -+ .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, - .ptr = dec, - .size = sizeof(*dec), - }, - #endif - { -- .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, -+ .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, - .ptr = slices + slice_no, - .size = sizeof(*slices) * slice_count, - }, - // Optional - { -- .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX, -+ .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, - .ptr = &controls->scaling_matrix, - .size = sizeof(controls->scaling_matrix), - }, -@@ -1000,12 +1034,12 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - - // Check for var slice array - struct v4l2_query_ext_ctrl qc[] = { -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS }, -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS }, -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS }, -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX }, -+ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS }, -+ { .id = V4L2_CID_STATELESS_HEVC_SPS }, -+ { .id = V4L2_CID_STATELESS_HEVC_PPS }, -+ { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX }, - #if HEVC_CTRLS_VERSION >= 2 -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS }, -+ { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS }, - #endif - }; - // Order & size must match! -@@ -1042,12 +1076,13 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - - fill_sps(&ctrl_sps, sps); - -- if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_MPEG_VIDEO_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { -+ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { - av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n"); - return AVERROR(EINVAL); - } - - ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0; -+ av_log(avctx, AV_LOG_INFO, "%s SPS muti-slice\n", ctx->multi_slice ? "Has" : "No"); - return 0; - } - -@@ -1058,29 +1093,29 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - int ret; - - struct v4l2_query_ext_ctrl querys[] = { -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, }, -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, }, -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, }, -+ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, -+ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, -+ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, }, - }; - - struct v4l2_ext_control ctrls[] = { -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, }, -- { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, }, -+ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, -+ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, - }; - - mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); - - ctx->decode_mode = querys[0].default_value; - -- if (ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED && -- ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED) { -+ if (ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && -+ ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { - av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode); - return AVERROR(EINVAL); - } - - ctx->start_code = querys[1].default_value; -- if (ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE && -- ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) { -+ if (ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_NONE && -+ ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { - av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code); - return AVERROR(EINVAL); - } -diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -index 20e4e0ab1559..cd79aad5631a 100644 ---- a/libavcodec/v4l2_request_hevc.c -+++ b/libavcodec/v4l2_request_hevc.c -@@ -210,7 +210,11 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - goto fail4; - } - -- if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { -+ if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) { -+ av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n"); -+ ctx->fns = &V2(ff_v4l2_req_hevc, 4); +- memset(l->horizontal_bs, 0, l->bs_width * l->bs_height); +- memset(l->vertical_bs, 0, l->bs_width * l->bs_height); +- memset(l->cbf_luma, 0, sps->min_tb_width * sps->min_tb_height); +- memset(l->is_pcm, 0, (sps->min_pu_width + 1) * (sps->min_pu_height + 1)); +- memset(l->tab_slice_address, -1, pic_size_in_ctb * sizeof(*l->tab_slice_address)); ++ if (l->horizontal_bs) { ++ memset(l->horizontal_bs, 0, l->bs_width * l->bs_height); ++ memset(l->vertical_bs, 0, l->bs_width * l->bs_height); ++ memset(l->cbf_luma, 0, sps->min_tb_width * sps->min_tb_height); ++ memset(l->is_pcm, 0, (sps->min_pu_width + 1) * (sps->min_pu_height + 1)); ++ memset(l->tab_slice_address, -1, pic_size_in_ctb * sizeof(*l->tab_slice_address)); + } -+ else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { - av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n"); - ctx->fns = &V2(ff_v4l2_req_hevc, 3); + + if (IS_IDR(s)) + ff_hevc_clear_refs(l); +@@ -3750,8 +3775,13 @@ static int hevc_receive_frame(AVCodecContext *avctx, AVFrame *frame) } -diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h -index ed48d62e2d53..d4adb3f812a6 100644 ---- a/libavcodec/v4l2_request_hevc.h -+++ b/libavcodec/v4l2_request_hevc.h -@@ -99,5 +99,6 @@ typedef struct v4l2_req_decode_fns { - extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); - extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); - extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3); -+extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4); - #endif - -From d6457f1d161c7430750b1cc925e370314ba9b83c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Sun, 3 Jul 2022 13:40:41 +0000 -Subject: [PATCH 057/186] v4l2_req: Observe limit on size of slice_array - -This in fact provides some minor simplifications by combing the -multi-slice and single-slice paths. - -(cherry picked from commit 7631e6d1a66fca9048605c214f3464c90d37932c) ---- - libavcodec/v4l2_req_hevc_vx.c | 39 ++++++++++++++-------------------- - libavcodec/v4l2_request_hevc.h | 5 +---- - 2 files changed, 17 insertions(+), 27 deletions(-) - -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -index 761c5b2dc74b..9d08d13d9e68 100644 ---- a/libavcodec/v4l2_req_hevc_vx.c -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -840,18 +840,21 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t * - int bcount = get_bits_count(&h->HEVClc->gb); - uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount; - -+ const unsigned int n = rd->num_slices; -+ const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices; + ret = decode_nal_units(s, avpkt->data, avpkt->size); +- if (ret < 0) ++ if (ret < 0) { ++ // Ensure that hwaccel knows this frame is over ++ if (FF_HW_HAS_CB(avctx, abort_frame)) ++ FF_HW_SIMPLE_CALL(avctx, abort_frame); + - int rv; - struct slice_info * si; + return ret; ++ } - if ((rv = slice_add(rd)) != 0) - return rv; - -- si = rd->slices + rd->num_slices - 1; -+ si = rd->slices + n; - si->ptr = buffer; - si->len = size; - -- if (ctx->multi_slice && rd->num_slices > 1) { -- struct slice_info *const si0 = rd->slices; -+ if (n != block_start) { -+ struct slice_info *const si0 = rd->slices + block_start; - const size_t offset = (buffer - si0->ptr); - boff += offset * 8; - size += offset; -@@ -859,11 +862,11 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t * + do_output: + if (ff_container_fifo_read(s->output_fifo, frame) >= 0) { +@@ -3780,8 +3810,10 @@ static int hevc_ref_frame(HEVCFrame *dst, const HEVCFrame *src) } - #if HEVC_CTRLS_VERSION >= 2 -- if (rd->num_slices == 1) -+ if (n == 0) - fill_decode_params(h, &rd->dec); -- fill_slice_params(h, &rd->dec, rd->slice_params + rd->num_slices - 1, size * 8, boff); -+ fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff); - #else -- fill_slice_params(h, rd->slice_params + rd->num_slices - 1, size * 8, boff); -+ fill_slice_params(h, rd->slice_params + n, size * 8, boff); + dst->pps = ff_refstruct_ref_c(src->pps); +- dst->tab_mvf = ff_refstruct_ref(src->tab_mvf); +- dst->rpl_tab = ff_refstruct_ref(src->rpl_tab); ++ if (src->tab_mvf) ++ dst->tab_mvf = ff_refstruct_ref(src->tab_mvf); ++ if (src->rpl_tab) ++ dst->rpl_tab = ff_refstruct_ref(src->rpl_tab); + dst->rpl = ff_refstruct_ref(src->rpl); + dst->nb_rpl_elems = src->nb_rpl_elems; + +@@ -4096,6 +4128,9 @@ const FFCodec ff_hevc_decoder = { + #if CONFIG_HEVC_NVDEC_HWACCEL + HWACCEL_NVDEC(hevc), #endif ++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL ++ HWACCEL_V4L2REQUEST(hevc), ++#endif + #if CONFIG_HEVC_VAAPI_HWACCEL + HWACCEL_VAAPI(hevc), + #endif +diff --git a/libavcodec/hevc/parser.c b/libavcodec/hevc/parser.c +index 16b40e2b10db..cf9830648a78 100644 +--- a/libavcodec/hevc/parser.c ++++ b/libavcodec/hevc/parser.c +@@ -93,6 +93,19 @@ static int hevc_parse_slice_header(AVCodecParserContext *s, H2645NAL *nal, + avctx->profile = sps->ptl.general_ptl.profile_idc; + avctx->level = sps->ptl.general_ptl.level_idc; - return 0; -@@ -997,18 +1000,11 @@ static int v4l2_request_hevc_end_frame(AVCodecContext *avctx) - } - - // Send as slices -- if (ctx->multi_slice) -- { -- if ((rv = send_slice(avctx, rd, &rc, 0, rd->num_slices)) != 0) -+ for (i = 0; i < rd->num_slices; i += ctx->max_slices) { -+ const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices); -+ if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0) ++ if (sps->chroma_format_idc == 1) { ++ avctx->chroma_sample_location = sps->vui.common.chroma_loc_info_present_flag ? ++ sps->vui.common.chroma_sample_loc_type_top_field + 1 : ++ AVCHROMA_LOC_LEFT; ++ } ++ else if (sps->chroma_format_idc == 2 || ++ sps->chroma_format_idc == 3) { ++ avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;; ++ } ++ else { ++ avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED; ++ } ++ + if (sps->vps->vps_timing_info_present_flag) { + num = sps->vps->vps_num_units_in_tick; + den = sps->vps->vps_time_scale; +diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c +index 6ba667e9f520..fc7197ff2bcb 100644 +--- a/libavcodec/hevc/refs.c ++++ b/libavcodec/hevc/refs.c +@@ -140,16 +140,19 @@ static HEVCFrame *alloc_frame(HEVCContext *s, HEVCLayerContext *l) goto fail; - } -- else -- { -- for (i = 0; i != rd->num_slices; ++i) { -- if ((rv = send_slice(avctx, rd, &rc, i, i + 1)) != 0) -- goto fail; -- } -- } + frame->nb_rpl_elems = s->pkt.nb_nals; - // Set the drm_prime desriptor - drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs)); -@@ -1081,8 +1077,6 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - return AVERROR(EINVAL); - } +- frame->tab_mvf = ff_refstruct_pool_get(l->tab_mvf_pool); +- if (!frame->tab_mvf) +- goto fail; +- +- frame->rpl_tab = ff_refstruct_pool_get(l->rpl_tab_pool); +- if (!frame->rpl_tab) +- goto fail; +- frame->ctb_count = l->sps->ctb_width * l->sps->ctb_height; +- for (j = 0; j < frame->ctb_count; j++) +- frame->rpl_tab[j] = frame->rpl; ++ if (l->tab_mvf_pool) { ++ frame->tab_mvf = ff_refstruct_pool_get(l->tab_mvf_pool); ++ if (!frame->tab_mvf) ++ goto fail; ++ } ++ if (l->rpl_tab_pool) { ++ frame->rpl_tab = ff_refstruct_pool_get(l->rpl_tab_pool); ++ if (!frame->rpl_tab) ++ goto fail; ++ frame->ctb_count = l->sps->ctb_width * l->sps->ctb_height; ++ for (j = 0; j < frame->ctb_count; j++) ++ frame->rpl_tab[j] = frame->rpl; ++ } + + if (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) + frame->f->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; +@@ -287,14 +290,17 @@ static int init_slice_rpl(HEVCContext *s) + int ctb_count = frame->ctb_count; + int ctb_addr_ts = s->pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr]; + int i; ++ RefPicListTab * const rpl = frame->rpl + s->slice_idx; + + if (s->slice_idx >= frame->nb_rpl_elems) + return AVERROR_INVALIDDATA; + +- for (i = ctb_addr_ts; i < ctb_count; i++) +- frame->rpl_tab[i] = frame->rpl + s->slice_idx; ++ if (frame->rpl_tab) { ++ for (i = ctb_addr_ts; i < ctb_count; i++) ++ frame->rpl_tab[i] = rpl; ++ } + +- frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts]; ++ frame->refPicList = (RefPicList *)rpl; -- ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0; -- av_log(avctx, AV_LOG_INFO, "%s SPS muti-slice\n", ctx->multi_slice ? "Has" : "No"); return 0; } - -@@ -1120,11 +1114,10 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - return AVERROR(EINVAL); - } - -- ctx->max_slices = querys[2].elems; -- if (ctx->max_slices > MAX_SLICES) { -- av_log(avctx, AV_LOG_ERROR, "%s: unsupported max slices, %d\n", __func__, ctx->max_slices); -- return AVERROR(EINVAL); -- } -+ ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) || -+ querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ? -+ 1 : querys[2].dims[0]; -+ av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices); - - ctrls[0].value = ctx->decode_mode; - ctrls[1].value = ctx->start_code; -diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h -index d4adb3f812a6..0029e2330977 100644 ---- a/libavcodec/v4l2_request_hevc.h -+++ b/libavcodec/v4l2_request_hevc.h -@@ -46,8 +46,6 @@ - #define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 - #endif - --#define MAX_SLICES 128 -- - #define VCAT(name, version) name##_v##version - #define V2(n,v) VCAT(n, v) - #define V(n) V2(n, HEVC_CTRLS_VERSION) -@@ -64,10 +62,9 @@ typedef struct V4L2RequestContextHEVC { - - unsigned int timestamp; // ?? maybe uint64_t - -- int multi_slice; - int decode_mode; - int start_code; -- int max_slices; -+ unsigned int max_slices; - - req_decode_q decode_q; - - -From d4794c95cd25fb87a3dcc7585eb7b057211a87dd Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 4 Jul 2022 14:43:20 +0100 -Subject: [PATCH 058/186] v4l2_req: Add entry point offsets array control - ---- - libavcodec/v4l2_req_hevc_vx.c | 88 +++++++++++++++++++++++++++------- - libavcodec/v4l2_request_hevc.h | 3 +- - 2 files changed, 72 insertions(+), 19 deletions(-) - -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -index 9d08d13d9e68..43ef6631edc1 100644 ---- a/libavcodec/v4l2_req_hevc_vx.c -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -82,11 +82,16 @@ typedef struct V4L2MediaReqDescriptor { - struct v4l2_ctrl_hevc_slice_params * slice_params; - struct slice_info * slices; - -+ size_t num_offsets; -+ size_t alloced_offsets; -+ uint32_t *offsets; +diff --git a/libavcodec/hwaccel_internal.h b/libavcodec/hwaccel_internal.h +index b0cc22bb6899..d319f2abe1f5 100644 +--- a/libavcodec/hwaccel_internal.h ++++ b/libavcodec/hwaccel_internal.h +@@ -161,6 +161,17 @@ typedef struct FFHWAccel { + * Callback to flush the hwaccel state. + */ + void (*flush)(AVCodecContext *avctx); + - } V4L2MediaReqDescriptor; ++ /** ++ * Called if parsing fails ++ * ++ * An error has occured, end_frame will not be called ++ * start_frame & decode_slice may or may not have been called ++ * Optional ++ * ++ * @param avctx the codec context ++ */ ++ void (*abort_frame)(AVCodecContext *avctx); + } FFHWAccel; - struct slice_info { - const uint8_t * ptr; - size_t len; // bytes -+ size_t n_offsets; + static inline const FFHWAccel *ffhwaccel(const AVHWAccel *codec) +diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h +index 5171e4c7d735..0b586f760d11 100644 +--- a/libavcodec/hwaccels.h ++++ b/libavcodec/hwaccels.h +@@ -43,6 +43,7 @@ extern const struct FFHWAccel ff_hevc_d3d11va2_hwaccel; + extern const struct FFHWAccel ff_hevc_d3d12va_hwaccel; + extern const struct FFHWAccel ff_hevc_dxva2_hwaccel; + extern const struct FFHWAccel ff_hevc_nvdec_hwaccel; ++extern const struct FFHWAccel ff_hevc_v4l2request_hwaccel; + extern const struct FFHWAccel ff_hevc_vaapi_hwaccel; + extern const struct FFHWAccel ff_hevc_vdpau_hwaccel; + extern const struct FFHWAccel ff_hevc_videotoolbox_hwaccel; +diff --git a/libavcodec/hwconfig.h b/libavcodec/hwconfig.h +index ee29ca631df0..a7cb81aa044f 100644 +--- a/libavcodec/hwconfig.h ++++ b/libavcodec/hwconfig.h +@@ -67,6 +67,8 @@ void ff_hwaccel_uninit(AVCodecContext *avctx); + HW_CONFIG_HWACCEL(1, 1, 0, D3D11, D3D11VA, ff_ ## codec ## _d3d11va2_hwaccel) + #define HWACCEL_NVDEC(codec) \ + HW_CONFIG_HWACCEL(1, 1, 0, CUDA, CUDA, ff_ ## codec ## _nvdec_hwaccel) ++#define HWACCEL_V4L2REQUEST(codec) \ ++ HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME, DRM, ff_ ## codec ## _v4l2request_hwaccel) + #define HWACCEL_VAAPI(codec) \ + HW_CONFIG_HWACCEL(1, 1, 1, VAAPI, VAAPI, ff_ ## codec ## _vaapi_hwaccel) + #define HWACCEL_VDPAU(codec) \ +diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c +index e42591110fec..70cc0ecabcb9 100644 +--- a/libavcodec/mmaldec.c ++++ b/libavcodec/mmaldec.c +@@ -24,6 +24,9 @@ + * MMAL Video Decoder + */ + ++#pragma GCC diagnostic push ++// Many many redundant decls in the header files ++#pragma GCC diagnostic ignored "-Wredundant-decls" + #include + #include + #include +@@ -31,6 +34,7 @@ + #include + #include + #include ++#pragma GCC diagnostic pop + #include + + #include "avcodec.h" +diff --git a/libavcodec/raw.c b/libavcodec/raw.c +index b73b80e5fdb1..7ca36ac84975 100644 +--- a/libavcodec/raw.c ++++ b/libavcodec/raw.c +@@ -297,6 +297,12 @@ static const PixelFormatTag raw_pix_fmt_tags[] = { + { AV_PIX_FMT_RGB565LE,MKTAG( 3 , 0 , 0 , 0 ) }, /* flipped RGB565LE */ + { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */ + ++ /* RPI (Might as well define for everything) */ ++ { AV_PIX_FMT_SAND128, MKTAG('S', 'A', 'N', 'D') }, ++ { AV_PIX_FMT_RPI4_8, MKTAG('S', 'A', 'N', 'D') }, ++ { AV_PIX_FMT_SAND64_10, MKTAG('S', 'N', 'D', 'A') }, ++ { AV_PIX_FMT_RPI4_10, MKTAG('S', 'N', 'D', 'B') }, ++ + { AV_PIX_FMT_NONE, 0 }, }; - // Handy container for accumulating controls before setting -@@ -245,7 +250,7 @@ static int slice_add(V4L2MediaReqDescriptor * const rd) - if (rd->num_slices >= rd->alloced_slices) { - struct v4l2_ctrl_hevc_slice_params * p2; - struct slice_info * s2; -- size_t n2 = rd->num_slices == 0 ? 8 : rd->num_slices * 2; -+ size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2; +diff --git a/libavcodec/rawenc.c b/libavcodec/rawenc.c +index 8c577006d922..8ca0379e1219 100644 +--- a/libavcodec/rawenc.c ++++ b/libavcodec/rawenc.c +@@ -24,6 +24,7 @@ + * Raw Video Encoder + */ - p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2)); - if (p2 == NULL) -@@ -263,6 +268,23 @@ static int slice_add(V4L2MediaReqDescriptor * const rd) ++#include "config.h" + #include "avcodec.h" + #include "codec_internal.h" + #include "encode.h" +@@ -33,6 +34,10 @@ + #include "libavutil/intreadwrite.h" + #include "libavutil/imgutils.h" + #include "libavutil/internal.h" ++#include "libavutil/avassert.h" ++#if CONFIG_SAND ++#include "libavutil/rpi_sand_fns.h" ++#endif + + static av_cold int raw_encode_init(AVCodecContext *avctx) + { +@@ -46,22 +51,114 @@ static av_cold int raw_encode_init(AVCodecContext *avctx) return 0; } -+static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets) ++#if CONFIG_SAND ++static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, ++ const AVFrame *frame) +{ -+ if (rd->num_offsets + n > rd->alloced_offsets) { -+ size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2; -+ void * p2; -+ while (rd->num_offsets + n > n2) -+ n2 *= 2; -+ if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL) -+ return AVERROR(ENOMEM); -+ rd->offsets = p2; -+ rd->alloced_offsets = n2; -+ } -+ for (size_t i = 0; i != n; ++i) -+ rd->offsets[rd->num_offsets++] = offsets[i] - 1; ++ const int width = av_frame_cropped_width(frame); ++ const int height = av_frame_cropped_height(frame); ++ const int x0 = frame->crop_left; ++ const int y0 = frame->crop_top; ++ const int size = width * height * 3 / 2; ++ uint8_t * dst; ++ int ret; ++ ++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) ++ return ret; ++ ++ dst = pkt->data; ++ ++ av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); ++ dst += width * height; ++ av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2, ++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2); + return 0; +} + - static unsigned int - fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries) - { -@@ -403,12 +425,12 @@ static void fill_slice_params(const HEVCContext * const h, - fill_pred_table(h, &slice_params->pred_weight_table); - - slice_params->num_entry_point_offsets = sh->num_entry_point_offsets; -+#if HEVC_CTRLS_VERSION <= 3 - if (slice_params->num_entry_point_offsets > 256) { - slice_params->num_entry_point_offsets = 256; - av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); - } - --#if HEVC_CTRLS_VERSION <= 3 - for (i = 0; i < slice_params->num_entry_point_offsets; i++) - slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; - #endif -@@ -787,13 +809,17 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, - #if HEVC_CTRLS_VERSION >= 2 - struct v4l2_ctrl_hevc_decode_params * const dec, - #endif -- struct v4l2_ctrl_hevc_slice_params * const slices, -- const unsigned int slice_no, -- const unsigned int slice_count) -+ struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count, -+ void * const offsets, const size_t offset_count) - { - int rv; -+#if HEVC_CTRLS_VERSION >= 2 -+ unsigned int n = 4; -+#else -+ unsigned int n = 3; -+#endif - -- struct v4l2_ext_control control[] = { -+ struct v4l2_ext_control control[6] = { - { - .id = V4L2_CID_STATELESS_HEVC_SPS, - .ptr = &controls->sps, -@@ -813,21 +839,28 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, - #endif - { - .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, -- .ptr = slices + slice_no, -+ .ptr = slices, - .size = sizeof(*slices) * slice_count, - }, -- // Optional -- { -+ }; -+ -+ if (controls->has_scaling) -+ control[n++] = (struct v4l2_ext_control) { - .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, - .ptr = &controls->scaling_matrix, - .size = sizeof(controls->scaling_matrix), -- }, -- }; -+ }; -+ -+#if HEVC_CTRLS_VERSION >= 4 -+ if (offsets) -+ control[n++] = (struct v4l2_ext_control) { -+ .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, -+ .ptr = offsets, -+ .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count, -+ }; -+#endif - -- rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, -- controls->has_scaling ? -- FF_ARRAY_ELEMS(control) : -- FF_ARRAY_ELEMS(control) - 1); -+ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n); - - return rv; - } -@@ -852,6 +885,7 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t * - si = rd->slices + n; - si->ptr = buffer; - si->len = size; -+ si->n_offsets = rd->num_offsets; - - if (n != block_start) { - struct slice_info *const si0 = rd->slices + block_start; -@@ -868,6 +902,9 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t * - #else - fill_slice_params(h, rd->slice_params + n, size * 8, boff); - #endif -+ if (ctx->max_offsets != 0 && -+ (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0) -+ return rv; - - return 0; - } -@@ -893,10 +930,13 @@ static int send_slice(AVCodecContext * const avctx, - { - V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; - -+ const int is_last = (j == rd->num_slices); - struct slice_info *const si = rd->slices + i; - struct media_request * req = NULL; - struct qent_src * src = NULL; - MediaBufsStatus stat; -+ void * offsets = rd->offsets + rd->slices[i].n_offsets; -+ size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets; - - if ((req = media_request_get(ctx->mpool)) == NULL) { - av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__); -@@ -908,8 +948,8 @@ static int send_slice(AVCodecContext * const avctx, - #if HEVC_CTRLS_VERSION >= 2 - &rd->dec, - #endif -- rd->slice_params, -- i, j - i)) { -+ rd->slice_params + i, j - i, -+ offsets, n_offsets)) { - av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__); - goto fail1; - } -@@ -935,7 +975,7 @@ static int send_slice(AVCodecContext * const avctx, - - stat = mediabufs_start_request(ctx->mbufs, &req, &src, - i == 0 ? rd->qe_dst : NULL, -- j == rd->num_slices); -+ is_last); - - if (stat != MEDIABUFS_STATUS_SUCCESS) { - av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__); -@@ -1090,6 +1130,9 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, - { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, - { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, }, -+#if HEVC_CTRLS_VERSION >= 4 -+ { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, }, -+#endif - }; - - struct v4l2_ext_control ctrls[] = { -@@ -1119,6 +1162,14 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - 1 : querys[2].dims[0]; - av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices); - -+#if HEVC_CTRLS_VERSION >= 4 -+ ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ? -+ 0 : querys[3].dims[0]; -+ av_log(avctx, AV_LOG_INFO, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); -+#else -+ ctx->max_offsets = 0; -+#endif -+ - ctrls[0].value = ctx->decode_mode; - ctrls[1].value = ctx->start_code; - -@@ -1141,6 +1192,7 @@ static void v4l2_req_frame_free(void *opaque, uint8_t *data) - - av_freep(&rd->slices); - av_freep(&rd->slice_params); -+ av_freep(&rd->offsets); - - av_free(rd); - } -diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h -index 0029e2330977..99c90064eab8 100644 ---- a/libavcodec/v4l2_request_hevc.h -+++ b/libavcodec/v4l2_request_hevc.h -@@ -64,7 +64,8 @@ typedef struct V4L2RequestContextHEVC { - - int decode_mode; - int start_code; -- unsigned int max_slices; -+ unsigned int max_slices; // 0 => not wanted (frame mode) -+ unsigned int max_offsets; // 0 => not wanted - - req_decode_q decode_q; - - -From b6e9c32d7a2cab41f9fbce46989173f32e2135d8 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 4 Jul 2022 16:22:54 +0100 -Subject: [PATCH 059/186] v4l2_req: Support Annex B - ---- - libavcodec/v4l2_req_hevc_vx.c | 61 +++++++++++++++++++++++------------ - 1 file changed, 41 insertions(+), 20 deletions(-) - -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -index 43ef6631edc1..5e0db9850aad 100644 ---- a/libavcodec/v4l2_req_hevc_vx.c -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -879,6 +879,18 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t * - int rv; - struct slice_info * si; - -+ // This looks dodgy but we know that FFmpeg has parsed this from a buffer -+ // that contains the entire frame including the start code -+ if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { -+ buffer -= 3; -+ size += 3; -+ boff += 24; -+ if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) { -+ av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n", -+ buffer[0], buffer[1], buffer[2]); -+ } -+ } -+ - if ((rv = slice_add(rd)) != 0) - return rv; - -@@ -969,10 +981,6 @@ static int send_slice(AVCodecContext * const avctx, - goto fail2; - } - --#warning ANNEX_B start code --// if (ctx->start_code == V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) { --// } -- - stat = mediabufs_start_request(ctx->mbufs, &req, &src, - i == 0 ? rd->qe_dst : NULL, - is_last); -@@ -1120,6 +1128,12 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - return 0; - } - -+static inline int -+ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) ++static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, ++ const AVFrame *frame) +{ -+ return v >= c->minimum && v <= c->maximum; -+} ++ const int width = av_frame_cropped_width(frame); ++ const int height = av_frame_cropped_height(frame); ++ const int x0 = frame->crop_left; ++ const int y0 = frame->crop_top; ++ const int size = width * height * 3; ++ uint8_t * dst; ++ int ret; + - // Final init - static int - set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) -@@ -1142,21 +1156,6 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - - mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); - -- ctx->decode_mode = querys[0].default_value; -- -- if (ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && -- ctx->decode_mode != V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { -- av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode); -- return AVERROR(EINVAL); -- } -- -- ctx->start_code = querys[1].default_value; -- if (ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_NONE && -- ctx->start_code != V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { -- av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code); -- return AVERROR(EINVAL); -- } -- - ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) || - querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ? - 1 : querys[2].dims[0]; -@@ -1165,11 +1164,33 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - #if HEVC_CTRLS_VERSION >= 4 - ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ? - 0 : querys[3].dims[0]; -- av_log(avctx, AV_LOG_INFO, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); -+ av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); - #else - ctx->max_offsets = 0; - #endif - -+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; -+ -+ if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) -+ { -+ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED; -+ -+ // Prefer NONE as it doesn't require the slightly dodgy look -+ // backwards in our raw buffer -+ if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) -+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; -+ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) -+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; -+ else { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); -+ return AVERROR(EINVAL); -+ } -+ } -+ else -+ { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__); -+ } -+ - ctrls[0].value = ctx->decode_mode; - ctrls[1].value = ctx->start_code; - - -From ed654f6e2703c556582de06129a8052c95e1d934 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 4 Jul 2022 18:24:03 +0100 -Subject: [PATCH 060/186] v4l2_req: Add frame mode decode - ---- - libavcodec/v4l2_req_hevc_vx.c | 69 +++++++++++++++++++++++------------ - 1 file changed, 46 insertions(+), 23 deletions(-) - -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -index 5e0db9850aad..ada53d0d44df 100644 ---- a/libavcodec/v4l2_req_hevc_vx.c -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -814,9 +814,9 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, - { - int rv; - #if HEVC_CTRLS_VERSION >= 2 -- unsigned int n = 4; --#else - unsigned int n = 3; -+#else -+ unsigned int n = 2; - #endif - - struct v4l2_ext_control control[6] = { -@@ -837,12 +837,14 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, - .size = sizeof(*dec), - }, - #endif -- { -+ }; -+ -+ if (slices) -+ control[n++] = (struct v4l2_ext_control) { - .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, - .ptr = slices, - .size = sizeof(*slices) * slice_count, -- }, -- }; -+ }; - - if (controls->has_scaling) - control[n++] = (struct v4l2_ext_control) { -@@ -865,6 +867,8 @@ set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, - return rv; - } - -+// This only works because we started out from a single coded frame buffer -+// that will remain intact until after end_frame - static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) - { - const HEVCContext * const h = avctx->priv_data; -@@ -891,6 +895,17 @@ static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t * - } - } - -+ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { -+ if (rd->slices == NULL) { -+ if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL) -+ return AVERROR(ENOMEM); -+ rd->slices->ptr = buffer; -+ rd->num_slices = 1; -+ } -+ rd->slices->len = buffer - rd->slices->ptr + size; -+ return 0; -+ } -+ - if ((rv = slice_add(rd)) != 0) - return rv; - -@@ -1169,28 +1184,36 @@ set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - ctx->max_offsets = 0; - #endif - -- ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; -- -- if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) -- { -+ if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED || -+ querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) -+ ctx->decode_mode = querys[0].default_value; -+ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)) -+ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED; -+ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) - ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED; -- -- // Prefer NONE as it doesn't require the slightly dodgy look -- // backwards in our raw buffer -- if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) -- ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; -- else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) -- ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; -- else { -- av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); -- return AVERROR(EINVAL); -- } -- } -- else -- { -+ else { - av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__); -+ return AVERROR(EINVAL); - } - -+ if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE || -+ querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) -+ ctx->start_code = querys[1].default_value; -+ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) -+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; -+ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) -+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; -+ else { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); -+ return AVERROR(EINVAL); -+ } -+ -+ // If we are in slice mode & START_CODE_NONE supported then pick that -+ // as it doesn't require the slightly dodgy look backwards in our raw buffer -+ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && -+ ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) -+ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; -+ - ctrls[0].value = ctx->decode_mode; - ctrls[1].value = ctx->start_code; - - -From 765b4048cbe852cb857b64cab54afc2c4aed92cc Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 5 Jul 2022 12:54:22 +0000 -Subject: [PATCH 061/186] v4l2_req: Fix probe for frame based decode - ---- - libavcodec/v4l2_req_hevc_vx.c | 33 +++++++++++++++++++++++---------- - 1 file changed, 23 insertions(+), 10 deletions(-) - -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -index ada53d0d44df..5d083016f89a 100644 ---- a/libavcodec/v4l2_req_hevc_vx.c -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -1082,6 +1082,12 @@ fail: - return rv; - } - -+static inline int -+ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) -+{ -+ return v >= c->minimum && v <= c->maximum; -+} -+ - // Initial check & init - static int - probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) -@@ -1094,6 +1100,7 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - // Check for var slice array - struct v4l2_query_ext_ctrl qc[] = { - { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS }, -+ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, - { .id = V4L2_CID_STATELESS_HEVC_SPS }, - { .id = V4L2_CID_STATELESS_HEVC_PPS }, - { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX }, -@@ -1104,6 +1111,7 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - // Order & size must match! - static const size_t ctrl_sizes[] = { - sizeof(struct v4l2_ctrl_hevc_slice_params), -+ sizeof(int32_t), - sizeof(struct v4l2_ctrl_hevc_sps), - sizeof(struct v4l2_ctrl_hevc_pps), - sizeof(struct v4l2_ctrl_hevc_scaling_matrix), -@@ -1121,11 +1129,22 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - return AVERROR(EINVAL); - #endif - -- if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) { -- av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION); -+ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls); -+ i = 0; -+#if HEVC_CTRLS_VERSION >= 4 -+ // Skip slice check if no slice mode -+ if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) -+ i = 1; -+#else -+ // Fail frame mode silently for anything prior to V4 -+ if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) - return AVERROR(EINVAL); -- } -- for (i = 0; i != noof_ctrls; ++i) { -+#endif -+ for (; i != noof_ctrls; ++i) { -+ if (qc[i].type == 0) { -+ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id); -+ return AVERROR(EINVAL); -+ } - if (ctrl_sizes[i] != (size_t)qc[i].elem_size) { - av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n", - HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size); -@@ -1143,12 +1162,6 @@ probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - return 0; - } - --static inline int --ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) --{ -- return v >= c->minimum && v <= c->maximum; --} -- - // Final init - static int - set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) - -From 64b8c3987f022ed871ba9ed835de13e18c7e38ee Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 26 Jul 2022 15:46:14 +0000 -Subject: [PATCH 062/186] vf_deinterlace_v4l2m2m: Support NV12 through - deinterlace - -Supports NV12 (though not yet NV12M) through deinterlace. -Also improves error handling such that attempting to deinterlace an -unsupported drm format causes an error. -No longer leaks frame structures. ---- - libavfilter/vf_deinterlace_v4l2m2m.c | 160 ++++++++++++++++++--------- - 1 file changed, 107 insertions(+), 53 deletions(-) - -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -index 1a933b7e0a5f..1a3bef5bcba6 100644 ---- a/libavfilter/vf_deinterlace_v4l2m2m.c -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -373,14 +373,16 @@ static int deint_v4l2m2m_try_format(V4L2Queue *queue) - fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline); - - if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -- if (fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 || -+ if ((fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 && -+ fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_NV12) || - fmt->fmt.pix_mp.field != field) { - av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type); - - return AVERROR(EINVAL); - } - } else { -- if (fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 || -+ if ((fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 && -+ fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_NV12) || - fmt->fmt.pix.field != field) { - av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type); - -@@ -391,7 +393,7 @@ static int deint_v4l2m2m_try_format(V4L2Queue *queue) - return 0; - } - --static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, int height, int pitch, int ysize) -+static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height, int pitch, int ysize) - { - struct v4l2_format *fmt = &queue->format; - DeintV4L2M2MContextShared *ctx = queue->ctx; -@@ -402,13 +404,16 @@ static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, - .target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP_BOUNDS : V4L2_SEL_TGT_COMPOSE_BOUNDS, - }; - -+ // This works for most single object 4:2:0 types - if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ fmt->fmt.pix_mp.pixelformat = pixelformat; - fmt->fmt.pix_mp.field = field; - fmt->fmt.pix_mp.width = width; - fmt->fmt.pix_mp.height = ysize / pitch; - fmt->fmt.pix_mp.plane_fmt[0].bytesperline = pitch; - fmt->fmt.pix_mp.plane_fmt[0].sizeimage = ysize + (ysize >> 1); - } else { -+ fmt->fmt.pix.pixelformat = pixelformat; - fmt->fmt.pix.field = field; - fmt->fmt.pix.width = width; - fmt->fmt.pix.height = height; -@@ -417,12 +422,22 @@ static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, - } - - ret = ioctl(ctx->fd, VIDIOC_S_FMT, fmt); -- if (ret) -+ if (ret) { -+ ret = AVERROR(errno); - av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %d\n", ret); ++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) + return ret; ++ ++ dst = pkt->data; ++ ++ av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height); ++ dst += width * height * 2; ++ av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width, ++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2); ++ return 0; ++} ++ ++static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt, ++ const AVFrame *frame) ++{ ++ const int width = av_frame_cropped_width(frame); ++ const int height = av_frame_cropped_height(frame); ++ const int x0 = frame->crop_left; ++ const int y0 = frame->crop_top; ++ const int size = width * height * 3; ++ uint8_t * dst; ++ int ret; ++ ++ if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0) ++ return ret; ++ ++ dst = pkt->data; ++ ++ av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height); ++ dst += width * height * 2; ++ av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width, ++ frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2); ++ return 0; ++} ++#endif ++ ++ + static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, +- const AVFrame *frame, int *got_packet) ++ const AVFrame *src_frame, int *got_packet) + { +- int ret = av_image_get_buffer_size(frame->format, +- frame->width, frame->height, 1); ++ int ret; ++ AVFrame * frame = NULL; + +- if (ret < 0) ++#if CONFIG_SAND ++ if (av_rpi_is_sand_frame(src_frame)) { ++ ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) : ++ av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) : ++ av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1; ++ *got_packet = (ret == 0); + return ret; ++ } ++#endif ++ ++ if ((frame = av_frame_clone(src_frame)) == NULL) { ++ ret = AVERROR(ENOMEM); ++ goto fail; + } + -+ if (pixelformat != fmt->fmt.pix.pixelformat) { -+ av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt->fmt.pix.pixelformat)); -+ return AVERROR(EINVAL); -+ } ++ if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0) ++ goto fail; ++ ++ ret = av_image_get_buffer_size(frame->format, ++ frame->width, frame->height, 1); ++ if (ret < 0) ++ goto fail; - ret = ioctl(ctx->fd, VIDIOC_G_SELECTION, &sel); + if ((ret = ff_get_encode_buffer(avctx, pkt, ret, 0)) < 0) +- return ret; ++ goto fail; + if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size, + (const uint8_t **)frame->data, frame->linesize, + frame->format, + frame->width, frame->height, 1)) < 0) +- return ret; ++ goto fail; + + if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 && + frame->format == AV_PIX_FMT_YUYV422) { +@@ -77,8 +174,15 @@ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt, + AV_WB64(&pkt->data[8 * x], v << 48 | v >> 16); + } + } ++ pkt->flags |= AV_PKT_FLAG_KEY; ++ av_frame_free(&frame); + *got_packet = 1; + return 0; ++ ++fail: ++ av_frame_free(&frame); ++ *got_packet = 0; ++ return ret; + } + + const FFCodec ff_rawvideo_encoder = { +diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c +index 23474ee1439e..5920463d0a65 100644 +--- a/libavcodec/v4l2_buffers.c ++++ b/libavcodec/v4l2_buffers.c +@@ -21,6 +21,7 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include "config.h" + #include + #include + #include +@@ -28,58 +29,94 @@ + #include + #include + #include "libavcodec/avcodec.h" ++#include "libavcodec/internal.h" ++#include "libavutil/avassert.h" ++#include "libavutil/mem.h" + #include "libavutil/pixdesc.h" +-#include "refstruct.h" ++#include "libavutil/hwcontext.h" + #include "v4l2_context.h" + #include "v4l2_buffers.h" + #include "v4l2_m2m.h" ++#include "v4l2_req_dmabufs.h" ++#include "weak_link.h" ++ ++#if CONFIG_LIBDRM ++#include ++#endif + + #define USEC_PER_SEC 1000000 +-static AVRational v4l2_timebase = { 1, USEC_PER_SEC }; ++static const AVRational v4l2_timebase = { 1, USEC_PER_SEC }; + +-static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf) ++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) + { +- return V4L2_TYPE_IS_OUTPUT(buf->context->type) ? +- container_of(buf->context, V4L2m2mContext, output) : +- container_of(buf->context, V4L2m2mContext, capture); ++ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? ++ container_of(ctx, V4L2m2mContext, output) : ++ container_of(ctx, V4L2m2mContext, capture); + } + +-static inline AVCodecContext *logger(V4L2Buffer *buf) ++static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf) + { +- return buf_to_m2mctx(buf)->avctx; ++ return ctx_to_m2mctx(buf->context); + } + +-static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf) ++static inline AVCodecContext *logger(const V4L2Buffer * const buf) + { +- V4L2m2mContext *s = buf_to_m2mctx(avbuf); ++ return buf_to_m2mctx(buf)->avctx; ++} + +- if (s->avctx->pkt_timebase.num) +- return s->avctx->pkt_timebase; +- return s->avctx->time_base; ++static inline AVRational v4l2_get_timebase(const V4L2Buffer * const avbuf) ++{ ++ const V4L2m2mContext *s = buf_to_m2mctx(avbuf); ++ const AVRational tb = s->avctx->pkt_timebase.num ? ++ s->avctx->pkt_timebase : ++ s->avctx->time_base; ++ return tb.num && tb.den ? tb : v4l2_timebase; + } + +-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) ++static inline struct timeval tv_from_int(const int64_t t) + { +- int64_t v4l2_pts; ++ return (struct timeval){ ++ .tv_usec = t % USEC_PER_SEC, ++ .tv_sec = t / USEC_PER_SEC ++ }; ++} + +- if (pts == AV_NOPTS_VALUE) +- pts = 0; ++static inline int64_t int_from_tv(const struct timeval t) ++{ ++ return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec; ++} + ++static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) ++{ + /* convert pts to v4l2 timebase */ +- v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); +- out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; +- out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; ++ const int64_t v4l2_pts = ++ pts == AV_NOPTS_VALUE ? 0 : ++ av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); ++ out->buf.timestamp = tv_from_int(v4l2_pts); + } + +-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf) ++static inline int64_t v4l2_get_pts(const V4L2Buffer * const avbuf) + { +- int64_t v4l2_pts; +- ++ const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp); ++ return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE; ++#if 0 + /* convert pts back to encoder timebase */ +- v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + +- avbuf->buf.timestamp.tv_usec; ++ return ++ avbuf->context->no_pts_rescale ? v4l2_pts : ++ v4l2_pts == 0 ? AV_NOPTS_VALUE : ++ av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); ++#endif ++} + +- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); ++static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) ++{ ++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { ++ out->planes[plane].bytesused = bytesused; ++ out->planes[plane].length = length; ++ } else { ++ out->buf.bytesused = bytesused; ++ out->buf.length = length; ++ } + } + + static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) +@@ -116,6 +153,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) + return AVCOL_PRI_UNSPECIFIED; + } + ++static void v4l2_set_color(V4L2Buffer *buf, ++ const enum AVColorPrimaries avcp, ++ const enum AVColorSpace avcs, ++ const enum AVColorTransferCharacteristic avxc) ++{ ++ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; ++ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; ++ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; ++ ++ switch (avcp) { ++ case AVCOL_PRI_BT709: ++ cs = V4L2_COLORSPACE_REC709; ++ ycbcr = V4L2_YCBCR_ENC_709; ++ break; ++ case AVCOL_PRI_BT470M: ++ cs = V4L2_COLORSPACE_470_SYSTEM_M; ++ ycbcr = V4L2_YCBCR_ENC_601; ++ break; ++ case AVCOL_PRI_BT470BG: ++ cs = V4L2_COLORSPACE_470_SYSTEM_BG; ++ break; ++ case AVCOL_PRI_SMPTE170M: ++ cs = V4L2_COLORSPACE_SMPTE170M; ++ break; ++ case AVCOL_PRI_SMPTE240M: ++ cs = V4L2_COLORSPACE_SMPTE240M; ++ break; ++ case AVCOL_PRI_BT2020: ++ cs = V4L2_COLORSPACE_BT2020; ++ break; ++ case AVCOL_PRI_SMPTE428: ++ case AVCOL_PRI_SMPTE431: ++ case AVCOL_PRI_SMPTE432: ++ case AVCOL_PRI_EBU3213: ++ case AVCOL_PRI_RESERVED: ++ case AVCOL_PRI_FILM: ++ case AVCOL_PRI_UNSPECIFIED: ++ default: ++ break; ++ } ++ ++ switch (avcs) { ++ case AVCOL_SPC_RGB: ++ cs = V4L2_COLORSPACE_SRGB; ++ break; ++ case AVCOL_SPC_BT709: ++ cs = V4L2_COLORSPACE_REC709; ++ break; ++ case AVCOL_SPC_FCC: ++ cs = V4L2_COLORSPACE_470_SYSTEM_M; ++ break; ++ case AVCOL_SPC_BT470BG: ++ cs = V4L2_COLORSPACE_470_SYSTEM_BG; ++ break; ++ case AVCOL_SPC_SMPTE170M: ++ cs = V4L2_COLORSPACE_SMPTE170M; ++ break; ++ case AVCOL_SPC_SMPTE240M: ++ cs = V4L2_COLORSPACE_SMPTE240M; ++ break; ++ case AVCOL_SPC_BT2020_CL: ++ cs = V4L2_COLORSPACE_BT2020; ++ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; ++ break; ++ case AVCOL_SPC_BT2020_NCL: ++ cs = V4L2_COLORSPACE_BT2020; ++ break; ++ default: ++ break; ++ } ++ ++ switch (xfer) { ++ case AVCOL_TRC_BT709: ++ xfer = V4L2_XFER_FUNC_709; ++ break; ++ case AVCOL_TRC_IEC61966_2_1: ++ xfer = V4L2_XFER_FUNC_SRGB; ++ break; ++ case AVCOL_TRC_SMPTE240M: ++ xfer = V4L2_XFER_FUNC_SMPTE240M; ++ break; ++ case AVCOL_TRC_SMPTE2084: ++ xfer = V4L2_XFER_FUNC_SMPTE2084; ++ break; ++ default: ++ break; ++ } ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { ++ buf->context->format.fmt.pix_mp.colorspace = cs; ++ buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr; ++ buf->context->format.fmt.pix_mp.xfer_func = xfer; ++ } else { ++ buf->context->format.fmt.pix.colorspace = cs; ++ buf->context->format.fmt.pix.ycbcr_enc = ycbcr; ++ buf->context->format.fmt.pix.xfer_func = xfer; ++ } ++} ++ + static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) + { + enum v4l2_quantization qt; +@@ -134,6 +270,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) + return AVCOL_RANGE_UNSPECIFIED; + } + ++static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr) ++{ ++ const enum v4l2_quantization q = ++ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : ++ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : ++ V4L2_QUANTIZATION_DEFAULT; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { ++ buf->context->format.fmt.pix_mp.quantization = q; ++ } else { ++ buf->context->format.fmt.pix.quantization = q; ++ } ++} ++ + static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) + { + enum v4l2_ycbcr_encoding ycbcr; +@@ -210,71 +360,294 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) + return AVCOL_TRC_UNSPECIFIED; + } + +-static void v4l2_free_buffer(void *opaque, uint8_t *unused) ++static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf) + { +- V4L2Buffer* avbuf = opaque; +- V4L2m2mContext *s = buf_to_m2mctx(avbuf); ++ return V4L2_FIELD_IS_INTERLACED(buf->buf.field); ++} + +- if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) { +- atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel); ++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) ++{ ++ return buf->buf.field == V4L2_FIELD_INTERLACED_TB; ++} + +- if (s->reinit) { +- if (!atomic_load(&s->refcount)) +- sem_post(&s->refsync); +- } else { +- if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) { +- /* no need to queue more buffers to the driver */ +- avbuf->status = V4L2BUF_AVAILABLE; +- } +- else if (avbuf->context->streamon) +- ff_v4l2_buffer_enqueue(avbuf); +- } ++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) ++{ ++ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : ++ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; ++} + +- ff_refstruct_unref(&avbuf->context_ref); ++static inline void frame_set_interlace(AVFrame* frame, const int is_interlaced, const int is_tff) ++{ ++ if (!is_interlaced) { ++#if FF_API_INTERLACED_FRAME ++FF_DISABLE_DEPRECATION_WARNINGS ++ frame->interlaced_frame = 0; ++ frame->top_field_first = 0; ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ frame->flags &= ~(AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED); ++ } ++ else { ++#if FF_API_INTERLACED_FRAME ++FF_DISABLE_DEPRECATION_WARNINGS ++ frame->interlaced_frame = 1; ++ frame->top_field_first = !!is_tff; ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ if (is_tff) ++ frame->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED; ++ else ++ frame->flags = (frame->flags & ~AV_FRAME_FLAG_TOP_FIELD_FIRST) | AV_FRAME_FLAG_INTERLACED; + } + } + +-static int v4l2_buf_increase_ref(V4L2Buffer *in) ++static inline int frame_is_interlaced(const AVFrame* const frame) + { +- V4L2m2mContext *s = buf_to_m2mctx(in); ++#if FF_API_INTERLACED_FRAME ++FF_DISABLE_DEPRECATION_WARNINGS ++ return frame->interlaced_frame || (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0; ++FF_ENABLE_DEPRECATION_WARNINGS ++#else ++ return (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0; ++#endif ++} + +- if (in->context_ref) +- atomic_fetch_add(&in->context_refcount, 1); +- else { +- in->context_ref = ff_refstruct_ref(s->self_ref); ++static inline int frame_is_tff(const AVFrame* const frame) ++{ ++#if FF_API_INTERLACED_FRAME ++FF_DISABLE_DEPRECATION_WARNINGS ++ return frame->top_field_first || (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0; ++FF_ENABLE_DEPRECATION_WARNINGS ++#else ++ return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0; ++#endif ++} ++ ++static inline int frame_is_key(const AVFrame* const frame) ++{ ++#if FF_API_FRAME_KEY ++FF_DISABLE_DEPRECATION_WARNINGS ++ return frame->key_frame || (frame->flags & AV_FRAME_FLAG_KEY) != 0; ++FF_ENABLE_DEPRECATION_WARNINGS ++#else ++ return (frame->flags & AV_FRAME_FLAG_KEY) != 0; ++#endif ++} + +- in->context_refcount = 1; ++static inline void frame_set_key(AVFrame* const frame, const int is_key) ++{ ++#if FF_API_FRAME_KEY ++FF_DISABLE_DEPRECATION_WARNINGS ++ frame->key_frame = !!is_key; ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ frame->flags = is_key ? ++ frame->flags | AV_FRAME_FLAG_KEY : ++ frame->flags & ~AV_FRAME_FLAG_KEY; ++} ++ ++#if CONFIG_LIBDRM ++static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) ++{ ++ AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; ++ AVDRMLayerDescriptor *layer; ++ ++ /* fill the DRM frame descriptor */ ++ drm_desc->nb_objects = avbuf->num_planes; ++ drm_desc->nb_layers = 1; ++ ++ layer = &drm_desc->layers[0]; ++ layer->nb_planes = avbuf->num_planes; ++ ++ for (int i = 0; i < avbuf->num_planes; i++) { ++ layer->planes[i].object_index = i; ++ layer->planes[i].offset = avbuf->plane_info[i].offset; ++ layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; + } + +- in->status = V4L2BUF_RET_USER; +- atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed); ++ switch (avbuf->context->av_pix_fmt) { ++ case AV_PIX_FMT_0BGR: ++ layer->format = DRM_FORMAT_RGBX8888; ++ break; ++ case AV_PIX_FMT_RGB0: ++ layer->format = DRM_FORMAT_XBGR8888; ++ break; ++ case AV_PIX_FMT_0RGB: ++ layer->format = DRM_FORMAT_BGRX8888; ++ break; ++ case AV_PIX_FMT_BGR0: ++ layer->format = DRM_FORMAT_XRGB8888; ++ break; + +- return 0; ++ case AV_PIX_FMT_ABGR: ++ layer->format = DRM_FORMAT_RGBA8888; ++ break; ++ case AV_PIX_FMT_RGBA: ++ layer->format = DRM_FORMAT_ABGR8888; ++ break; ++ case AV_PIX_FMT_ARGB: ++ layer->format = DRM_FORMAT_BGRA8888; ++ break; ++ case AV_PIX_FMT_BGRA: ++ layer->format = DRM_FORMAT_ARGB8888; ++ break; ++ ++ case AV_PIX_FMT_BGR24: ++ layer->format = DRM_FORMAT_BGR888; ++ break; ++ case AV_PIX_FMT_RGB24: ++ layer->format = DRM_FORMAT_RGB888; ++ break; ++ ++ case AV_PIX_FMT_YUYV422: ++ ++ layer->format = DRM_FORMAT_YUYV; ++ layer->nb_planes = 1; ++ ++ break; ++ ++ case AV_PIX_FMT_NV12: ++ case AV_PIX_FMT_NV21: ++ ++ layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ? ++ DRM_FORMAT_NV12 : DRM_FORMAT_NV21; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 2; ++ ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * ++ avbuf->context->format.fmt.pix.height; ++ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; ++ break; ++ ++ case AV_PIX_FMT_YUV420P: ++ ++ layer->format = DRM_FORMAT_YUV420; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 3; ++ ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * ++ avbuf->context->format.fmt.pix.height; ++ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1; ++ ++ layer->planes[2].object_index = 0; ++ layer->planes[2].offset = layer->planes[1].offset + ++ ((avbuf->plane_info[0].bytesperline * ++ avbuf->context->format.fmt.pix.height) >> 2); ++ layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1; ++ break; ++ ++ default: ++ drm_desc->nb_layers = 0; ++ break; ++ } ++ ++ return (uint8_t *) drm_desc; + } ++#endif + +-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf) ++static void v4l2_free_bufref(void *opaque, uint8_t *data) + { +- int ret; ++ AVBufferRef * bufref = (AVBufferRef *)data; ++ V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data; ++ struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl); + +- if (plane >= in->num_planes) +- return AVERROR(EINVAL); ++ if (ctx != NULL) { ++ // Buffer still attached to context ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); + +- /* even though most encoders return 0 in data_offset encoding vp8 does require this value */ +- *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset, +- in->plane_info[plane].length, v4l2_free_buffer, in, 0); +- if (!*buf) +- return AVERROR(ENOMEM); ++ if (!s->output_drm && avbuf->dmabuf[0] != NULL) { ++ for (unsigned int i = 0; i != avbuf->num_planes; ++i) ++ dmabuf_read_end(avbuf->dmabuf[i]); ++ } + +- ret = v4l2_buf_increase_ref(in); - if (ret) -- av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_SELECTION failed: %d\n", ret); -+ if (ret) { -+ ret = AVERROR(errno); -+ av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION failed: %d\n", ret); -+ } - - sel.r.width = width; - sel.r.height = height; -@@ -432,10 +447,12 @@ static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, - sel.flags = V4L2_SEL_FLAG_LE; - - ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &sel); -- if (ret) -- av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_SELECTION failed: %d\n", ret); -+ if (ret) { -+ ret = AVERROR(errno); -+ av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %d\n", ret); -+ } +- av_buffer_unref(buf); ++ ff_mutex_lock(&ctx->lock); - return ret; -+ return 0; - } - - static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node) -@@ -517,10 +534,25 @@ static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf) - return 0; - } - --static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) -+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf, const uint32_t pixelformat) - { - struct v4l2_exportbuffer expbuf; - int i, ret; -+ uint64_t mod = DRM_FORMAT_MOD_LINEAR; -+ uint32_t fmt = 0; ++ ff_v4l2_buffer_set_avail(avbuf); ++ avbuf->buf.timestamp.tv_sec = 0; ++ avbuf->buf.timestamp.tv_usec = 0; + -+ switch (pixelformat) { -+ case V4L2_PIX_FMT_NV12: -+ fmt = DRM_FORMAT_NV12; -+ break; -+ case V4L2_PIX_FMT_YUV420: -+ fmt = DRM_FORMAT_YUV420; -+ break; -+ default: -+ return AVERROR(EINVAL); ++ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { ++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name); ++ } ++ else if (ctx->streamon) { ++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name); ++ ff_v4l2_buffer_enqueue(avbuf); // will set to IN_DRIVER ++ } ++ else { ++ av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name); ++ } ++ ++ ff_mutex_unlock(&ctx->lock); + } + -+ avbuf->drm_frame.layers[0].format = fmt; - - for (i = 0; i < avbuf->num_planes; i++) { - memset(&expbuf, 0, sizeof(expbuf)); -@@ -539,12 +571,12 @@ static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) - /* drm frame */ - avbuf->drm_frame.objects[i].size = avbuf->buffer.m.planes[i].length; - avbuf->drm_frame.objects[i].fd = expbuf.fd; -- avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ avbuf->drm_frame.objects[i].format_modifier = mod; - } else { - /* drm frame */ - avbuf->drm_frame.objects[0].size = avbuf->buffer.length; - avbuf->drm_frame.objects[0].fd = expbuf.fd; -- avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ avbuf->drm_frame.objects[0].format_modifier = mod; - } - } - -@@ -629,7 +661,7 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) - if (ret) - goto fail; - -- ret = v4l2_buffer_export_drm(buf); -+ ret = v4l2_buffer_export_drm(buf, multiplanar ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat); - if (ret) - goto fail; - } -@@ -878,7 +910,6 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused) - - static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height) - { -- int av_pix_fmt = AV_PIX_FMT_YUV420P; - AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; - AVDRMLayerDescriptor *layer; - -@@ -895,20 +926,13 @@ static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height) - layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; - } - -- switch (av_pix_fmt) { -- case AV_PIX_FMT_YUYV422: -- -- layer->format = DRM_FORMAT_YUYV; -+ switch (layer->format) { -+ case DRM_FORMAT_YUYV: - layer->nb_planes = 1; -- - break; - -- case AV_PIX_FMT_NV12: -- case AV_PIX_FMT_NV21: -- -- layer->format = av_pix_fmt == AV_PIX_FMT_NV12 ? -- DRM_FORMAT_NV12 : DRM_FORMAT_NV21; -- -+ case DRM_FORMAT_NV12: -+ case DRM_FORMAT_NV21: - if (avbuf->num_planes > 1) - break; - -@@ -920,10 +944,7 @@ static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height) - layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; - break; - -- case AV_PIX_FMT_YUV420P: -- -- layer->format = DRM_FORMAT_YUV420; -- -+ case DRM_FORMAT_YUV420: - if (avbuf->num_planes > 1) - break; - -@@ -1032,6 +1053,26 @@ static int deint_v4l2m2m_config_props(AVFilterLink *outlink) - return 0; - } - -+static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc) -+{ -+ const int is_linear = (drm_desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR || -+ drm_desc->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID); -+ -+ switch (drm_desc->layers[0].format) { -+ case DRM_FORMAT_YUV420: -+ if (is_linear) -+ return drm_desc->nb_objects == 1 ? V4L2_PIX_FMT_YUV420 : 0; -+ break; -+ case DRM_FORMAT_NV12: -+ if (is_linear) -+ return drm_desc->nb_objects == 1 ? V4L2_PIX_FMT_NV12 : 0; -+ break; -+ default: -+ break; -+ } -+ return 0; ++ ff_weak_link_unlock(avbuf->context_wl); ++ av_buffer_unref(&bufref); +} + - static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) ++static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length; ++} ++ ++static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) ++{ ++ int i, ret; ++ const V4L2m2mContext * const s = buf_to_m2mctx(avbuf); ++ ++ for (i = 0; i < avbuf->num_planes; i++) { ++ int dma_fd = -1; ++ const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i); ++ ++ if (s->db_ctl != NULL) { ++ if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL) ++ return AVERROR(ENOMEM); ++ dma_fd = dmabuf_fd(avbuf->dmabuf[i]); ++ if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) ++ avbuf->buf.m.planes[i].m.fd = dma_fd; ++ else ++ avbuf->buf.m.fd = dma_fd; ++ ++ if (!s->output_drm) ++ avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]); ++ } ++ else { ++ struct v4l2_exportbuffer expbuf; ++ memset(&expbuf, 0, sizeof(expbuf)); ++ ++ expbuf.index = avbuf->buf.index; ++ expbuf.type = avbuf->buf.type; ++ expbuf.plane = i; ++ ++ ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf); ++ if (ret < 0) ++ return AVERROR(errno); ++ dma_fd = expbuf.fd; ++ } ++ ++ avbuf->drm_frame.objects[i].size = blen; ++ avbuf->drm_frame.objects[i].fd = dma_fd; ++#if !CONFIG_LIBDRM ++ avbuf->drm_frame.objects[i].format_modifier = 0; ++#else ++ avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; ++#endif ++ } ++ ++ return 0; + } + + static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset) { - AVFilterContext *avctx = link->dst; -@@ -1047,23 +1088,27 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) - avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out); + unsigned int bytesused, length; ++ int rv = 0; - if (ctx->field_order == V4L2_FIELD_ANY) { -- AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)in->data[0]; -+ const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0]; -+ const uint32_t pixelformat = desc_pixelformat(drm_desc); -+ -+ if (pixelformat == 0) { -+ av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n", -+ av_fourcc2str(drm_desc->layers[0].format), -+ drm_desc->nb_objects, drm_desc->objects[0].format_modifier); -+ return AVERROR(EINVAL); -+ } -+ - ctx->orig_width = drm_desc->layers[0].planes[0].pitch; - ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width; + if (plane >= out->num_planes) + return AVERROR(EINVAL); +@@ -282,32 +655,65 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i + length = out->plane_info[plane].length; + bytesused = FFMIN(size+offset, length); - av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height, - drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset); - -- if (in->top_field_first) -- ctx->field_order = V4L2_FIELD_INTERLACED_TB; -- else -- ctx->field_order = V4L2_FIELD_INTERLACED_BT; +- memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset)); - -- ret = deint_v4l2m2m_set_format(output, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); -+ ret = deint_v4l2m2m_set_format(output, pixelformat, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); - if (ret) - return ret; - -- ret = deint_v4l2m2m_set_format(capture, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); -+ ret = deint_v4l2m2m_set_format(capture, pixelformat, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); - if (ret) - return ret; - -@@ -1082,6 +1127,12 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) - ret = deint_v4l2m2m_streamon(output); - if (ret) - return ret; -+ -+ if (in->top_field_first) -+ ctx->field_order = V4L2_FIELD_INTERLACED_TB; -+ else -+ ctx->field_order = V4L2_FIELD_INTERLACED_BT; -+ +- if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { +- out->planes[plane].bytesused = bytesused; +- out->planes[plane].length = length; +- } else { +- out->buf.bytesused = bytesused; +- out->buf.length = length; ++ if (size > length - offset) { ++ size = length - offset; ++ rv = AVERROR(ENOMEM); } - ret = deint_v4l2m2m_enqueue_frame(output, in); -@@ -1157,28 +1208,31 @@ again: - return 0; - } - -- { -+ recycle_q(&s->output); -+ n = count_enqueued(&s->output); +- return 0; ++ memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size); + -+ while (n < 6) { - AVFrame * frame; - int rv; ++ set_buf_length(out, plane, bytesused, length); ++ ++ return rv; ++} ++ ++static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf) ++{ ++ AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]); ++ AVBufferRef * newbuf; ++ ++ if (!bufref) ++ return NULL; ++ ++ newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0); ++ if (newbuf == NULL) ++ av_buffer_unref(&bufref); ++ ++ avbuf->status = V4L2BUF_RET_USER; ++ return newbuf; + } -- recycle_q(&s->output); -- n = count_enqueued(&s->output); -+ if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { -+ av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); -+ return rv; -+ } + static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) + { +- int i, ret; ++ int i; -- while (n < 6) { -- if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { -- av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); -- return rv; -- } -+ if (frame == NULL) { -+ av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); -+ break; -+ } + frame->format = avbuf->context->av_pix_fmt; -- if (frame == NULL) { -- av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); -- break; -- } -+ rv = deint_v4l2m2m_filter_frame(inlink, frame); -+ av_frame_free(&frame); - -- deint_v4l2m2m_filter_frame(inlink, frame); -- av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); -- ++n; -- } +- for (i = 0; i < avbuf->num_planes; i++) { +- ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]); +- if (ret) +- return ret; ++ frame->buf[0] = wrap_avbuf(avbuf); ++ if (frame->buf[0] == NULL) ++ return AVERROR(ENOMEM); ++ ++ if (buf_to_m2mctx(avbuf)->output_drm) { ++#if !CONFIG_LIBDRM ++ return AVERROR_OPTION_NOT_FOUND; ++#else ++ /* 1. get references to the actual data */ ++ const int rv = ff_v4l2_context_frames_set(avbuf->context); + if (rv != 0) + return rv; + -+ av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); -+ ++n; ++ frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf); ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref); ++ return 0; ++#endif ++ } ++ + ++ /* 1. get references to the actual data */ ++ for (i = 0; i < avbuf->num_planes; i++) { ++ frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset; + frame->linesize[i] = avbuf->plane_info[i].bytesperline; +- frame->data[i] = frame->buf[i]->data; } - if (n < 6) { - -From b24e23b1a87bc51508e8fb2cf4ea31385059e150 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 19 Aug 2022 15:29:11 +0000 -Subject: [PATCH 063/186] v4l2_req: Enable use of MMAP for buffer alloc - -Use MMAP rather than DMABUF if either the dmabuf device can't be opened -or create_buf doesn't set the capability. ---- - libavcodec/v4l2_req_dmabufs.c | 22 +++ - libavcodec/v4l2_req_dmabufs.h | 3 + - libavcodec/v4l2_req_media.c | 263 ++++++++++++++++++++++++++++----- - libavcodec/v4l2_req_media.h | 21 ++- - libavcodec/v4l2_request_hevc.c | 42 +++++- - 5 files changed, 307 insertions(+), 44 deletions(-) - -diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c -index ae6c64836972..c4bbed18c680 100644 ---- a/libavcodec/v4l2_req_dmabufs.c -+++ b/libavcodec/v4l2_req_dmabufs.c -@@ -36,6 +36,26 @@ static unsigned int total_bufs = 0; - static size_t total_size = 0; + /* fixup special cases */ +@@ -316,88 +722,152 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) + case AV_PIX_FMT_NV21: + if (avbuf->num_planes > 1) + break; +- frame->linesize[1] = avbuf->plane_info[0].bytesperline; +- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; ++ frame->linesize[1] = frame->linesize[0]; ++ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); + break; + + case AV_PIX_FMT_YUV420P: + if (avbuf->num_planes > 1) + break; +- frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1; +- frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1; +- frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height; +- frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2); ++ frame->linesize[1] = frame->linesize[0] / 2; ++ frame->linesize[2] = frame->linesize[1]; ++ frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format); ++ frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2; + break; + + default: + break; + } + ++ if (avbuf->dmabuf[0] != NULL) { ++ for (unsigned int i = 0; i != avbuf->num_planes; ++i) ++ dmabuf_read_start(avbuf->dmabuf[i]); ++ } ++ ++ return 0; ++} ++ ++static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h) ++{ ++ if (dst_stride == src_stride && w + 32 >= dst_stride) { ++ memcpy(dst, src, dst_stride * h); ++ } ++ else { ++ while (--h >= 0) { ++ memcpy(dst, src, w); ++ dst += dst_stride; ++ src += src_stride; ++ } ++ } ++} ++ ++static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes) ++{ ++ return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); ++} ++ ++static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out) ++{ ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ if (frame->format != AV_PIX_FMT_DRM_PRIME || !src) ++ return AVERROR(EINVAL); ++ ++ av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { ++ // Only currently cope with single buffer types ++ if (out->buf.length != 1) ++ return AVERROR_PATCHWELCOME; ++ if (src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ out->planes[0].m.fd = src->objects[0].fd; ++ } ++ else { ++ if (src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ out->buf.m.fd = src->objects[0].fd; ++ } ++ ++ // No need to copy src AVDescriptor and if we did then we may confuse ++ // fd close on free ++ out->ref_buf = av_buffer_ref(frame->buf[0]); ++ + return 0; + } + + static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + { +- int i, ret; +- struct v4l2_format fmt = out->context->format; +- int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? +- fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat; +- int height = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ? +- fmt.fmt.pix_mp.height : fmt.fmt.pix.height; +- int is_planar_format = 0; +- +- switch (pixel_format) { +- case V4L2_PIX_FMT_YUV420M: +- case V4L2_PIX_FMT_YVU420M: +-#ifdef V4L2_PIX_FMT_YUV422M +- case V4L2_PIX_FMT_YUV422M: +-#endif +-#ifdef V4L2_PIX_FMT_YVU422M +- case V4L2_PIX_FMT_YVU422M: +-#endif +-#ifdef V4L2_PIX_FMT_YUV444M +- case V4L2_PIX_FMT_YUV444M: +-#endif +-#ifdef V4L2_PIX_FMT_YVU444M +- case V4L2_PIX_FMT_YVU444M: +-#endif +- case V4L2_PIX_FMT_NV12M: +- case V4L2_PIX_FMT_NV21M: +- case V4L2_PIX_FMT_NV12MT_16X16: +- case V4L2_PIX_FMT_NV12MT: +- case V4L2_PIX_FMT_NV16M: +- case V4L2_PIX_FMT_NV61M: +- is_planar_format = 1; +- } +- +- if (!is_planar_format) { +- const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); +- int planes_nb = 0; +- int offset = 0; +- +- for (i = 0; i < desc->nb_components; i++) +- planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1); +- +- for (i = 0; i < planes_nb; i++) { +- int size, h = height; +- if (i == 1 || i == 2) { ++ int i; ++ int num_planes = 0; ++ int pel_strides[4] = {0}; ++ ++ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); ++ ++ if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) { ++ av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__); ++ return -1; ++ } ++ ++ for (i = 0; i != desc->nb_components; ++i) { ++ if (desc->comp[i].plane >= num_planes) ++ num_planes = desc->comp[i].plane + 1; ++ pel_strides[desc->comp[i].plane] = desc->comp[i].step; ++ } ++ ++ if (out->num_planes > 1) { ++ if (num_planes != out->num_planes) { ++ av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes); ++ return -1; ++ } ++ for (i = 0; i != num_planes; ++i) { ++ int w = frame->width; ++ int h = frame->height; ++ if (is_chroma(desc, i, num_planes)) { ++ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); + h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); + } +- size = frame->linesize[i] * h; +- ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset); +- if (ret) +- return ret; +- offset += size; ++ ++ cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline, ++ frame->data[i], frame->linesize[i], ++ w * pel_strides[i], h); ++ set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length); + } +- return 0; + } ++ else ++ { ++ unsigned int offset = 0; ++ ++ for (i = 0; i != num_planes; ++i) { ++ int w = frame->width; ++ int h = frame->height; ++ int dst_stride = out->plane_info[0].bytesperline; ++ uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset; ++ ++ if (is_chroma(desc, i, num_planes)) { ++ // Is chroma ++ dst_stride >>= desc->log2_chroma_w; ++ offset += dst_stride * (out->context->height >> desc->log2_chroma_h); ++ w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w); ++ h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h); ++ } ++ else { ++ // Is luma or alpha ++ offset += dst_stride * out->context->height; ++ } ++ if (offset > out->plane_info[0].length) { ++ av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length); ++ return -1; ++ } + +- for (i = 0; i < out->num_planes; i++) { +- ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0); +- if (ret) +- return ret; ++ cpy_2d(dst, dst_stride, ++ frame->data[i], frame->linesize[i], ++ w * pel_strides[i], h); ++ } ++ set_buf_length(out, 0, offset, out->plane_info[0].length); + } +- + return 0; + } + +@@ -407,16 +877,31 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + * + ******************************************************************************/ + +-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) ++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts) + { +- v4l2_set_pts(out, frame->pts); +- +- return v4l2_buffer_swframe_to_buf(frame, out); ++ out->buf.flags = frame_is_key(frame) ? ++ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : ++ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); ++ // Beware that colour info is held in format rather than the actual ++ // v4l2 buffer struct so this may not be as useful as you might hope ++ v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); ++ v4l2_set_color_range(out, frame->color_range); ++ // PTS & interlace are buffer vars ++ if (track_ts) ++ out->buf.timestamp = tv_from_int(track_ts); ++ else ++ v4l2_set_pts(out, frame->pts); ++ v4l2_set_interlace(out, frame_is_interlaced(frame), frame_is_tff(frame)); ++ ++ return frame->format == AV_PIX_FMT_DRM_PRIME ? ++ v4l2_buffer_primeframe_to_buf(frame, out) : ++ v4l2_buffer_swframe_to_buf(frame, out); + } + + int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) + { + int ret; ++ V4L2Context * const ctx = avbuf->context; + + av_frame_unref(frame); + +@@ -426,19 +911,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) + return ret; + + /* 2. get frame information */ +- if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) +- frame->flags |= AV_FRAME_FLAG_KEY; ++ frame_set_key(frame, avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME); ++ frame->pict_type = (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) != 0 ? AV_PICTURE_TYPE_I : ++ (avbuf->buf.flags & V4L2_BUF_FLAG_PFRAME) != 0 ? AV_PICTURE_TYPE_P : ++ (avbuf->buf.flags & V4L2_BUF_FLAG_BFRAME) != 0 ? AV_PICTURE_TYPE_B : ++ AV_PICTURE_TYPE_NONE; + frame->color_primaries = v4l2_get_color_primaries(avbuf); + frame->colorspace = v4l2_get_color_space(avbuf); + frame->color_range = v4l2_get_color_range(avbuf); + frame->color_trc = v4l2_get_color_trc(avbuf); + frame->pts = v4l2_get_pts(avbuf); + frame->pkt_dts = AV_NOPTS_VALUE; ++ frame_set_interlace(frame, v4l2_buf_is_interlaced(avbuf), v4l2_buf_is_top_first(avbuf)); + + /* these values are updated also during re-init in v4l2_process_driver_event */ +- frame->height = avbuf->context->height; +- frame->width = avbuf->context->width; +- frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio; ++ frame->height = ctx->height; ++ frame->width = ctx->width; ++ frame->sample_aspect_ratio = ctx->sample_aspect_ratio; ++ ++ if (ctx->selection.height && ctx->selection.width) { ++ frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0; ++ frame->crop_top = ctx->selection.top < frame->height ? ctx->selection.top : 0; ++ frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ? ++ frame->width - (ctx->selection.left + ctx->selection.width) : 0; ++ frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ? ++ frame->height - (ctx->selection.top + ctx->selection.height) : 0; ++ } + + /* 3. report errors upstream */ + if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) { +@@ -451,15 +949,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) + + int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) + { +- int ret; +- + av_packet_unref(pkt); +- ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf); +- if (ret) +- return ret; ++ ++ pkt->buf = wrap_avbuf(avbuf); ++ if (pkt->buf == NULL) ++ return AVERROR(ENOMEM); + + pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused; +- pkt->data = pkt->buf->data; ++ pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset; ++ pkt->flags = 0; + + if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) + pkt->flags |= AV_PKT_FLAG_KEY; +@@ -474,39 +972,108 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) + return 0; + } + +-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, ++ const void *extdata, size_t extlen, ++ const int64_t timestamp) + { + int ret; + +- ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0); +- if (ret) ++ if (extlen) { ++ ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0); ++ if (ret) ++ return ret; ++ } ++ ++ ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen); ++ if (ret && ret != AVERROR(ENOMEM)) + return ret; + +- v4l2_set_pts(out, pkt->pts); ++ if (timestamp) ++ out->buf.timestamp = tv_from_int(timestamp); ++ else ++ v4l2_set_pts(out, pkt->pts); + +- if (pkt->flags & AV_PKT_FLAG_KEY) +- out->flags = V4L2_BUF_FLAG_KEYFRAME; ++ out->buf.flags = (pkt->flags & AV_PKT_FLAG_KEY) != 0 ? ++ (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : ++ (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); + +- return 0; ++ return ret; ++} ++ ++int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) ++{ ++ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); ++} ++ ++ ++static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data) ++{ ++ V4L2Buffer * const avbuf = (V4L2Buffer *)data; ++ int i; ++ ++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) { ++ struct V4L2Plane_info *p = avbuf->plane_info + i; ++ if (p->mm_addr != NULL) ++ munmap(p->mm_addr, p->length); ++ } ++ ++ if (avbuf->dmabuf[0] == NULL) { ++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { ++ if (avbuf->drm_frame.objects[i].fd != -1) ++ close(avbuf->drm_frame.objects[i].fd); ++ } ++ } ++ else { ++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) { ++ dmabuf_free(avbuf->dmabuf[i]); ++ } ++ } ++ ++ av_buffer_unref(&avbuf->ref_buf); ++ ++ ff_weak_link_unref(&avbuf->context_wl); ++ ++ av_free(avbuf); + } + +-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) ++ ++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem) + { +- V4L2Context *ctx = avbuf->context; + int ret, i; ++ V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); ++ AVBufferRef * bufref; ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ int want_mmap; ++ ++ *pbufref = NULL; ++ if (avbuf == NULL) ++ return AVERROR(ENOMEM); + +- avbuf->buf.memory = V4L2_MEMORY_MMAP; ++ bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0); ++ if (bufref == NULL) { ++ av_free(avbuf); ++ return AVERROR(ENOMEM); ++ } ++ ++ avbuf->context = ctx; ++ avbuf->buf.memory = mem; + avbuf->buf.type = ctx->type; + avbuf->buf.index = index; + ++ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { ++ avbuf->drm_frame.objects[i].fd = -1; ++ } ++ ++ avbuf->context_wl = ff_weak_link_ref(ctx->wl_master); ++ + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { + avbuf->buf.length = VIDEO_MAX_PLANES; + avbuf->buf.m.planes = avbuf->planes; + } + +- ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf); ++ ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf); + if (ret < 0) +- return AVERROR(errno); ++ goto fail; + + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { + avbuf->num_planes = 0; +@@ -518,33 +1085,41 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) + } else + avbuf->num_planes = 1; + +- for (i = 0; i < avbuf->num_planes; i++) { ++ want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && ++ (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); + ++ for (i = 0; i < avbuf->num_planes; i++) { + avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? + ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline : + ctx->format.fmt.pix.bytesperline; + + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { + avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; +- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, +- PROT_READ | PROT_WRITE, MAP_SHARED, +- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); ++ avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset; ++ ++ if (want_mmap) ++ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, ++ PROT_READ | PROT_WRITE, MAP_SHARED, ++ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); + } else { + avbuf->plane_info[i].length = avbuf->buf.length; +- avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, +- PROT_READ | PROT_WRITE, MAP_SHARED, +- buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); ++ avbuf->plane_info[i].offset = 0; ++ ++ if (want_mmap) ++ avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, ++ PROT_READ | PROT_WRITE, MAP_SHARED, ++ buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); + } + +- if (avbuf->plane_info[i].mm_addr == MAP_FAILED) +- return AVERROR(ENOMEM); ++ if (avbuf->plane_info[i].mm_addr == MAP_FAILED) { ++ avbuf->plane_info[i].mm_addr = NULL; ++ ret = AVERROR(ENOMEM); ++ goto fail; ++ } + } + + avbuf->status = V4L2BUF_AVAILABLE; + +- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) +- return 0; +- + if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { + avbuf->buf.m.planes = avbuf->planes; + avbuf->buf.length = avbuf->num_planes; +@@ -554,20 +1129,52 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) + avbuf->buf.length = avbuf->planes[0].length; + } + +- return ff_v4l2_buffer_enqueue(avbuf); ++ if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) { ++ // export_drm does dmabuf alloc if we aren't using v4l2 alloc ++ ret = v4l2_buffer_export_drm(avbuf); ++ if (ret) { ++ av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n"); ++ goto fail; ++ } ++ } ++ ++ *pbufref = bufref; ++ return 0; ++ ++fail: ++ av_buffer_unref(&bufref); ++ return ret; + } + + int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf) + { + int ret; ++ int qc; + +- avbuf->buf.flags = avbuf->flags; ++ if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) { ++ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", ++ avbuf->context->name, avbuf->buf.index, ++ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, ++ avbuf->context->q_count); ++ } + + ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf); +- if (ret < 0) +- return AVERROR(errno); ++ if (ret < 0) { ++ int err = errno; ++ av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n", ++ avbuf->context->name, avbuf->buf.index, ++ err, strerror(err)); ++ return AVERROR(err); ++ } + ++ // Lock not wanted - if called from buffer free then lock already obtained ++ qc = atomic_fetch_add(&avbuf->context->q_count, 1) + 1; + avbuf->status = V4L2BUF_IN_DRIVER; ++ pthread_cond_broadcast(&avbuf->context->cond); ++ ++ av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n", ++ avbuf->context->name, avbuf->buf.index, ++ avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec, qc); + + return 0; + } +diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h +index e35b16130944..444ad94b145e 100644 +--- a/libavcodec/v4l2_buffers.h ++++ b/libavcodec/v4l2_buffers.h +@@ -28,31 +28,47 @@ + #include + #include + ++#include "avcodec.h" ++#include "libavutil/buffer.h" + #include "libavutil/frame.h" ++#include "libavutil/hwcontext_drm.h" + #include "packet.h" + + enum V4L2Buffer_status { + V4L2BUF_AVAILABLE, + V4L2BUF_IN_DRIVER, ++ V4L2BUF_IN_USE, + V4L2BUF_RET_USER, + }; + + /** + * V4L2Buffer (wrapper for v4l2_buffer management) + */ ++struct V4L2Context; ++struct ff_weak_link_client; ++struct dmabuf_h; ++ + typedef struct V4L2Buffer { +- /* each buffer needs to have a reference to its context */ ++ /* each buffer needs to have a reference to its context ++ * The pointer is good enough for most operation but once the buffer has ++ * been passed to the user the buffer may become orphaned so for free ops ++ * the weak link must be used to ensure that the context is actually ++ * there ++ */ + struct V4L2Context *context; ++ struct ff_weak_link_client *context_wl; + +- /* This object is refcounted per-plane, so we need to keep track +- * of how many context-refs we are holding. +- * This pointer is a RefStruct reference. */ +- const struct V4L2m2mContext *context_ref; +- atomic_uint context_refcount; ++ /* DRM descriptor */ ++ AVDRMFrameDescriptor drm_frame; ++ /* For DRM_PRIME encode - need to keep a ref to the source buffer till we ++ * are done ++ */ ++ AVBufferRef * ref_buf; + + /* keep track of the mmap address and mmap length */ + struct V4L2Plane_info { +- int bytesperline; ++ size_t bytesperline; ++ size_t offset; + void * mm_addr; + size_t length; + } plane_info[VIDEO_MAX_PLANES]; +@@ -63,9 +79,9 @@ typedef struct V4L2Buffer { + struct v4l2_buffer buf; + struct v4l2_plane planes[VIDEO_MAX_PLANES]; + +- int flags; + enum V4L2Buffer_status status; + ++ struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here + } V4L2Buffer; + + /** +@@ -101,6 +117,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); + */ + int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); + ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, ++ const void *extdata, size_t extlen, ++ const int64_t timestamp); ++ + /** + * Extracts the data from an AVFrame to a V4L2Buffer + * +@@ -109,7 +129,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); + * + * @returns 0 in case of success, a negative AVERROR code otherwise + */ +-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); ++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts); + + /** + * Initializes a V4L2Buffer +@@ -119,7 +139,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); + * + * @returns 0 in case of success, a negative AVERROR code otherwise + */ +-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); ++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem); + + /** + * Enqueues a V4L2Buffer +@@ -130,5 +150,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); + */ + int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf); + ++static inline void ++ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf) ++{ ++ avbuf->status = V4L2BUF_AVAILABLE; ++ av_buffer_unref(&avbuf->ref_buf); ++} ++ + + #endif // AVCODEC_V4L2_BUFFERS_H +diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c +index be1df3785b28..e20e3e485c64 100644 +--- a/libavcodec/v4l2_context.c ++++ b/libavcodec/v4l2_context.c +@@ -28,11 +28,14 @@ + #include + #include + #include "libavutil/mem.h" ++#include "libavutil/avassert.h" ++#include "libavutil/pixdesc.h" + #include "libavcodec/avcodec.h" + #include "decode.h" + #include "v4l2_buffers.h" + #include "v4l2_fmt.h" + #include "v4l2_m2m.h" ++#include "weak_link.h" + + struct v4l2_format_update { + uint32_t v4l2_fmt; +@@ -42,26 +45,173 @@ struct v4l2_format_update { + int update_avfmt; + }; + +-static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx) ++ ++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) + { +- return V4L2_TYPE_IS_OUTPUT(ctx->type) ? +- container_of(ctx, V4L2m2mContext, output) : +- container_of(ctx, V4L2m2mContext, capture); ++ return (int64_t)n; + } + +-static inline AVCodecContext *logger(V4L2Context *ctx) ++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) + { +- return ctx_to_m2mctx(ctx)->avctx; ++ return (unsigned int)pts; ++} ++ ++// FFmpeg requires us to propagate a number of vars from the coded pkt into ++// the decoded frame. The only thing that tracks like that in V4L2 stateful ++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no ++// guarantees about PTS being unique or specified for every frame so replace ++// the supplied PTS with a simple incrementing number and keep a circular ++// buffer of all the things we want preserved (including the original PTS) ++// indexed by the tracking no. ++static int64_t ++xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt) ++{ ++ int64_t track_pts; ++ ++ // Avoid 0 ++ if (++x->track_no == 0) ++ x->track_no = 1; ++ ++ track_pts = track_to_pts(avctx, x->track_no); ++ ++ av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ ++ .discard = 0, ++ .pending = 1, ++ .pkt_size = avpkt->size, ++ .pts = avpkt->pts, ++ .dts = avpkt->dts, ++ .pkt_pos = avpkt->pos, ++ .duration = avpkt->duration, ++ .track_pts = track_pts ++ }; ++ return track_pts; ++} ++ ++static int64_t ++xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame) ++{ ++ int64_t track_pts; ++ ++ // Avoid 0 ++ if (++x->track_no == 0) ++ x->track_no = 1; ++ ++ track_pts = track_to_pts(avctx, x->track_no); ++ ++ av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no); ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ ++ .discard = 0, ++ .pending = 1, ++ .pts = frame->pts, ++ .dts = AV_NOPTS_VALUE, ++ .duration = frame->duration, ++ .track_pts = track_pts ++ }; ++#if FF_API_FRAME_PKT ++FF_DISABLE_DEPRECATION_WARNINGS ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE].pkt_pos = frame->pkt_pos; ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ return track_pts; ++} ++ ++ ++// Returns -1 if we should discard the frame ++static int ++xlat_pts_frame_out(AVCodecContext *const avctx, ++ xlat_track_t * const x, ++ AVFrame *const frame) ++{ ++ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; ++ V4L2m2mTrackEl *const t = x->track_els + n; ++ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) ++ { ++ av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, ++ "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); ++ frame->pts = AV_NOPTS_VALUE; ++ frame->pkt_dts = AV_NOPTS_VALUE; ++ frame->duration = 0; ++#if FF_API_FRAME_PKT ++FF_DISABLE_DEPRECATION_WARNINGS ++ frame->pkt_size = -1; ++ frame->pkt_pos = -1; ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ } ++ else if (!t->discard) ++ { ++ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; ++ frame->pkt_dts = t->dts; ++ frame->duration = t->duration; ++#if FF_API_FRAME_PKT ++FF_DISABLE_DEPRECATION_WARNINGS ++ frame->pkt_pos = t->pkt_pos; ++ frame->pkt_size = t->pkt_size; ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ ++ if (frame->pts != AV_NOPTS_VALUE) ++ x->last_pts = frame->pts; ++ t->pending = 0; ++ } ++ else ++ { ++ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); ++ return -1; ++ } ++ ++ av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", ++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); ++ return 0; + } + +-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt) ++// Returns -1 if we should discard the frame ++static int ++xlat_pts_pkt_out(AVCodecContext *const avctx, ++ xlat_track_t * const x, ++ AVPacket *const pkt) + { +- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; ++ unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE; ++ V4L2m2mTrackEl *const t = x->track_els + n; ++ if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts) ++ { ++ av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, ++ "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); ++ pkt->pts = AV_NOPTS_VALUE; ++ } ++ else if (!t->discard) ++ { ++ pkt->pts = t->pending ? t->pts : AV_NOPTS_VALUE; ++ ++ if (pkt->pts != AV_NOPTS_VALUE) ++ x->last_pts = pkt->pts; ++ t->pending = 0; ++ } ++ else ++ { ++ av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); ++ return -1; ++ } ++ ++ // * Would like something much better than this...xlat(offset + out_count)? ++ pkt->dts = pkt->pts; ++ av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n", ++ pkt->pts, t->track_pts, n); ++ return 0; ++} ++ ++ ++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) ++{ ++ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? ++ container_of(ctx, V4L2m2mContext, output) : ++ container_of(ctx, V4L2m2mContext, capture); + } + +-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) ++static inline AVCodecContext *logger(const V4L2Context *ctx) + { +- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; ++ return ctx_to_m2mctx(ctx)->avctx; + } + + static AVRational v4l2_get_sar(V4L2Context *ctx) +@@ -82,21 +232,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx) + return sar; + } + +-static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2) ++static inline int ctx_buffers_alloced(const V4L2Context * const ctx) + { +- struct v4l2_format *fmt1 = &ctx->format; +- int ret = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? +- fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || +- fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height +- : +- fmt1->fmt.pix.width != fmt2->fmt.pix.width || +- fmt1->fmt.pix.height != fmt2->fmt.pix.height; ++ return ctx->bufrefs != NULL; ++} ++ ++// Width/Height changed or we don't have an alloc in the first place? ++static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) ++{ ++ const struct v4l2_format *fmt1 = &ctx->format; ++ int ret = !ctx_buffers_alloced(ctx) || ++ (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ++ fmt1->fmt.pix_mp.width != fmt2->fmt.pix_mp.width || ++ fmt1->fmt.pix_mp.height != fmt2->fmt.pix_mp.height ++ : ++ fmt1->fmt.pix.width != fmt2->fmt.pix.width || ++ fmt1->fmt.pix.height != fmt2->fmt.pix.height); + + if (ret) +- av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n", ++ av_log(logger(ctx), AV_LOG_DEBUG, "V4L2 %s changed: alloc=%d (%dx%d) -> (%dx%d)\n", + ctx->name, +- v4l2_get_width(fmt1), v4l2_get_height(fmt1), +- v4l2_get_width(fmt2), v4l2_get_height(fmt2)); ++ ctx_buffers_alloced(ctx), ++ ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1), ++ ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2)); + + return ret; + } +@@ -154,76 +312,100 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd + } + } + +-static int v4l2_start_decode(V4L2Context *ctx) ++static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r) + { +- struct v4l2_decoder_cmd cmd = { +- .cmd = V4L2_DEC_CMD_START, +- .flags = 0, ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ struct v4l2_selection selection = { ++ .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, ++ .target = V4L2_SEL_TGT_COMPOSE + }; +- int ret; + +- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DECODER_CMD, &cmd); +- if (ret) ++ memset(r, 0, sizeof(*r)); ++ if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection)) + return AVERROR(errno); + ++ *r = selection.r; + return 0; + } + +-/** +- * handle resolution change event and end of stream event +- * returns 1 if reinit was successful, negative if it failed +- * returns 0 if reinit was not executed +- */ +-static int v4l2_handle_event(V4L2Context *ctx) ++static int do_source_change(V4L2m2mContext * const s) + { +- V4L2m2mContext *s = ctx_to_m2mctx(ctx); +- struct v4l2_format cap_fmt = s->capture.format; +- struct v4l2_event evt = { 0 }; ++ AVCodecContext *const avctx = s->avctx; ++ + int ret; ++ int reinit; ++ struct v4l2_format cap_fmt = s->capture.format; + +- ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt); +- if (ret < 0) { +- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name); +- return 0; +- } ++ s->capture.done = 0; + +- if (evt.type == V4L2_EVENT_EOS) { +- ctx->done = 1; ++ ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); ++ if (ret) { ++ av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name); + return 0; + } + +- if (evt.type != V4L2_EVENT_SOURCE_CHANGE) +- return 0; ++ get_default_selection(&s->capture, &s->capture.selection); + +- ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt); +- if (ret) { +- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name); +- return 0; ++ reinit = ctx_resolution_changed(&s->capture, &cap_fmt); ++ if ((s->quirks & FF_V4L2_QUIRK_REINIT_ALWAYS) != 0) ++ reinit = 1; ++ ++ s->capture.format = cap_fmt; ++ if (reinit) { ++ s->capture.height = ff_v4l2_get_format_height(&cap_fmt); ++ s->capture.width = ff_v4l2_get_format_width(&cap_fmt); + } + +- if (v4l2_resolution_changed(&s->capture, &cap_fmt)) { +- s->capture.height = v4l2_get_height(&cap_fmt); +- s->capture.width = v4l2_get_width(&cap_fmt); +- s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); +- } else { +- v4l2_start_decode(ctx); +- return 0; ++ // If we don't support selection (or it is bust) and we obviously have HD then kludge ++ if ((s->capture.selection.width == 0 || s->capture.selection.height == 0) && ++ (s->capture.height == 1088 && s->capture.width == 1920)) { ++ s->capture.selection = (struct v4l2_rect){.width = 1920, .height = 1080}; + } + +- s->reinit = 1; ++ s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); + +- if (s->avctx) +- ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height); +- if (ret < 0) +- av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n"); ++ av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n", ++ av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)), ++ s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den, ++ s->capture.width, s->capture.height, ++ s->capture.selection.width, s->capture.selection.height, ++ s->capture.selection.left, s->capture.selection.top, reinit); + +- ret = ff_v4l2_m2m_codec_reinit(s); +- if (ret) { +- av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n"); +- return AVERROR(EINVAL); ++ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); ++ if (ret) ++ av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n"); ++ s->draining = 0; ++ ++ if (!reinit) { ++ /* Buffers are OK so just stream off to ack */ ++ av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__); ++ } ++ else { ++ if (avctx) ++ ret = ff_set_dimensions(s->avctx, ++ s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width, ++ s->capture.selection.height != 0 ? s->capture.selection.height : s->capture.height); ++ if (ret < 0) ++ av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n"); ++ ++ ff_v4l2_context_release(&s->capture); ++ ++ if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) || ++ s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) { ++ av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n", ++ s->capture.width, s->capture.height, ++ ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format)); ++ return AVERROR(EINVAL); ++ } ++ ++ // Update pixel format - should only actually do something on initial change ++ s->capture.av_pix_fmt = ++ ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO); ++ avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt; ++ avctx->sw_pix_fmt = s->capture.av_pix_fmt; + } + +- /* reinit executed */ ++ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON); + return 1; + } + +@@ -267,175 +449,293 @@ static int v4l2_stop_encode(V4L2Context *ctx) + return 0; + } + +-static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout) ++// DQ a buffer ++// Amalgamates all the various ways there are of signalling EOS/Event to ++// generate a consistant EPIPE. ++// ++// Sets ctx->flag_last if next dq would produce EPIPE (i.e. stream has stopped) ++// ++// Returns: ++// 0 Success ++// AVERROR(EPIPE) Nothing more to read ++// AVERROR(ENOSPC) No buffers in Q to put result in ++// * AVERROR(..) ++ ++ static int ++dq_buf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf) + { +- struct v4l2_plane planes[VIDEO_MAX_PLANES]; +- struct v4l2_buffer buf = { 0 }; +- V4L2Buffer *avbuf; +- struct pollfd pfd = { +- .events = POLLIN | POLLRDNORM | POLLPRI | POLLOUT | POLLWRNORM, /* default blocking capture */ +- .fd = ctx_to_m2mctx(ctx)->fd, ++ V4L2m2mContext * const m = ctx_to_m2mctx(ctx); ++ AVCodecContext * const avctx = m->avctx; ++ V4L2Buffer * avbuf; ++ const int is_mp = V4L2_TYPE_IS_MULTIPLANAR(ctx->type); ++ ++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; ++ ++ struct v4l2_buffer buf = { ++ .type = ctx->type, ++ .memory = V4L2_MEMORY_MMAP, + }; +- int i, ret; + +- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) { +- for (i = 0; i < ctx->num_buffers; i++) { +- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) +- break; +- } +- if (i == ctx->num_buffers) +- av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to " +- "userspace. Increase num_capture_buffers " +- "to prevent device deadlock or dropped " +- "packets/frames.\n"); +- } +- +- /* if we are draining and there are no more capture buffers queued in the driver we are done */ +- if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) { +- for (i = 0; i < ctx->num_buffers; i++) { +- /* capture buffer initialization happens during decode hence +- * detection happens at runtime +- */ +- if (!ctx->buffers) +- break; ++ *ppavbuf = NULL; ++ ++ if (ctx->flag_last) ++ return AVERROR(EPIPE); ++ ++ if (is_mp) { ++ buf.length = VIDEO_MAX_PLANES; ++ buf.m.planes = planes; ++ } ++ ++ while (ioctl(m->fd, VIDIOC_DQBUF, &buf) != 0) { ++ const int err = errno; ++ av_assert0(AVERROR(err) < 0); ++ if (err != EINTR) { ++ av_log(avctx, AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", ++ ctx->name, av_err2str(AVERROR(err))); + +- if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER) +- goto start; ++ if (err == EPIPE) ++ ctx->flag_last = 1; ++ ++ return AVERROR(err); + } +- ctx->done = 1; +- return NULL; + } ++ atomic_fetch_sub(&ctx->q_count, 1); ++ ++ avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; ++ ff_v4l2_buffer_set_avail(avbuf); ++ avbuf->buf = buf; ++ if (is_mp) { ++ memcpy(avbuf->planes, planes, sizeof(planes)); ++ avbuf->buf.m.planes = avbuf->planes; ++ } ++ // Done with any attached buffer ++ av_buffer_unref(&avbuf->ref_buf); + +-start: +- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) +- pfd.events = POLLOUT | POLLWRNORM; +- else { +- /* no need to listen to requests for more input while draining */ +- if (ctx_to_m2mctx(ctx)->draining) +- pfd.events = POLLIN | POLLRDNORM | POLLPRI; ++ if (V4L2_TYPE_IS_CAPTURE(ctx->type)) { ++ // Zero length cap buffer return == EOS ++ if ((is_mp ? buf.m.planes[0].bytesused : buf.bytesused) == 0) { ++ av_log(avctx, AV_LOG_DEBUG, "Buffer empty - reQ\n"); ++ ++ // Must reQ so we don't leak ++ // May not matter if the next thing we do is release all the ++ // buffers but better to be tidy. ++ ff_v4l2_buffer_enqueue(avbuf); ++ ++ ctx->flag_last = 1; ++ return AVERROR(EPIPE); ++ } ++ ++#ifdef V4L2_BUF_FLAG_LAST ++ // If flag_last set then this contains data but is the last frame ++ // so remember that but return OK ++ if ((buf.flags & V4L2_BUF_FLAG_LAST) != 0) ++ ctx->flag_last = 1; ++#endif + } + +- for (;;) { +- ret = poll(&pfd, 1, timeout); +- if (ret > 0) +- break; +- if (errno == EINTR) ++ *ppavbuf = avbuf; ++ return 0; ++} ++ ++/** ++ * handle resolution change event and end of stream event ++ * Expects to be called after the stream has stopped ++ * ++ * returns 1 if reinit was successful, negative if it failed ++ * returns 0 if reinit was not executed ++ */ ++static int ++get_event(V4L2m2mContext * const m) ++{ ++ AVCodecContext * const avctx = m->avctx; ++ struct v4l2_event evt = { 0 }; ++ ++ while (ioctl(m->fd, VIDIOC_DQEVENT, &evt) != 0) { ++ const int rv = AVERROR(errno); ++ if (rv == AVERROR(EINTR)) + continue; +- return NULL; ++ if (rv == AVERROR(EAGAIN)) { ++ av_log(avctx, AV_LOG_WARNING, "V4L2 failed to get expected event - assume EOS\n"); ++ return AVERROR_EOF; ++ } ++ av_log(avctx, AV_LOG_ERROR, "V4L2 VIDIOC_DQEVENT: %s\n", av_err2str(rv)); ++ return rv; + } + +- /* 0. handle errors */ +- if (pfd.revents & POLLERR) { +- /* if we are trying to get free buffers but none have been queued yet, +- * or if no buffers have been allocated yet, no need to raise a warning +- */ +- if (timeout == 0) { +- if (!ctx->buffers) +- return NULL; ++ av_log(avctx, AV_LOG_DEBUG, "Dq event %d\n", evt.type); + +- for (i = 0; i < ctx->num_buffers; i++) { +- if (ctx->buffers[i].status != V4L2BUF_AVAILABLE) +- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); +- } ++ if (evt.type == V4L2_EVENT_EOS) { ++ av_log(avctx, AV_LOG_TRACE, "V4L2 VIDIOC_EVENT_EOS\n"); ++ return AVERROR_EOF; ++ } ++ ++ if (evt.type == V4L2_EVENT_SOURCE_CHANGE) ++ return do_source_change(m); ++ ++ return 0; ++} ++ ++static inline int ++dq_ok(const V4L2Context * const c) ++{ ++ return c->streamon && atomic_load(&c->q_count) != 0; ++} ++ ++// Get a buffer ++// If output then just gets the buffer in the expected way ++// If capture then runs the capture state m/c to deal with res change etc. ++// If return value == 0 then *ppavbuf != NULL ++ ++static int ++get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout) ++{ ++ V4L2m2mContext * const m = ctx_to_m2mctx(ctx); ++ AVCodecContext * const avctx = m->avctx; ++ const int is_cap = V4L2_TYPE_IS_CAPTURE(ctx->type); ++ ++ const unsigned int poll_cap = (POLLIN | POLLRDNORM); ++ const unsigned int poll_out = (POLLOUT | POLLWRNORM); ++ const unsigned int poll_event = POLLPRI; ++ ++ *ppavbuf = NULL; ++ ++ for (;;) { ++ struct pollfd pfd = { ++ .fd = m->fd, ++ // If capture && stream not started then assume we are waiting for the initial event ++ .events = !is_cap ? poll_out : ++ !ff_v4l2_ctx_eos(ctx) && ctx->streamon ? poll_cap : ++ poll_event, ++ }; ++ int ret; ++ ++ if (ctx->done) { ++ av_log(avctx, AV_LOG_TRACE, "V4L2 %s already done\n", ctx->name); ++ return AVERROR_EOF; + } +- else +- av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name); + +- return NULL; +- } ++ // If capture && timeout == -1 then also wait for rx buffer free ++ if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining) ++ pfd.events |= poll_out; + +- /* 1. handle resolution changes */ +- if (pfd.revents & POLLPRI) { +- ret = v4l2_handle_event(ctx); +- if (ret < 0) { +- /* if re-init failed, abort */ +- ctx->done = 1; +- return NULL; ++ // If nothing Qed all we will get is POLLERR - avoid that ++ if ((pfd.events == poll_out && !dq_ok(&m->output)) || ++ (pfd.events == poll_cap && !dq_ok(&m->capture)) || ++ (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) { ++ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name); ++ return AVERROR(ENOSPC); + } +- if (ret) { +- /* if re-init was successful drop the buffer (if there was one) +- * since we had to reconfigure capture (unmap all buffers) +- */ +- return NULL; ++ ++ // Timeout kludged s.t. "forever" eventually gives up & produces logging ++ // If waiting for an event when we have seen a last_frame then we expect ++ // it to be ready already so force a short timeout ++ ret = poll(&pfd, 1, ++ ff_v4l2_ctx_eos(ctx) ? 10 : ++ timeout == -1 ? 3000 : timeout); ++ if (ret < 0) { ++ ret = AVERROR(errno); // Remember errno before logging etc. ++ av_assert0(ret < 0); + } +- } + +- /* 2. dequeue the buffer */ +- if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) { ++ av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s ret=%d, timeout=%d, events=%#x, revents=%#x\n", ++ ctx->name, ret, timeout, pfd.events, pfd.revents); + +- if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { +- /* there is a capture buffer ready */ +- if (pfd.revents & (POLLIN | POLLRDNORM)) +- goto dequeue; ++ if (ret < 0) { ++ if (ret == AVERROR(EINTR)) ++ continue; ++ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll error %d (%s)\n", ctx->name, AVUNERROR(ret), av_err2str(ret)); ++ return ret; ++ } + +- /* the driver is ready to accept more input; instead of waiting for the capture +- * buffer to complete we return NULL so input can proceed (we are single threaded) +- */ +- if (pfd.revents & (POLLOUT | POLLWRNORM)) +- return NULL; ++ if (ret == 0) { ++ if (timeout == -1) ++ av_log(avctx, AV_LOG_ERROR, "V4L2 %s poll unexpected timeout: events=%#x\n", ctx->name, pfd.events); ++ if (ff_v4l2_ctx_eos(ctx)) { ++ av_log(avctx, AV_LOG_WARNING, "V4L2 %s poll event timeout\n", ctx->name); ++ ret = get_event(m); ++ if (ret < 0) { ++ ctx->done = 1; ++ return ret; ++ } ++ } ++ return AVERROR(EAGAIN); + } + +-dequeue: +- memset(&buf, 0, sizeof(buf)); +- buf.memory = V4L2_MEMORY_MMAP; +- buf.type = ctx->type; +- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { +- memset(planes, 0, sizeof(planes)); +- buf.length = VIDEO_MAX_PLANES; +- buf.m.planes = planes; ++ if ((pfd.revents & POLLERR) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "V4L2 %s POLLERR\n", ctx->name); ++ return AVERROR_UNKNOWN; + } + +- ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf); +- if (ret) { +- if (errno != EAGAIN) { ++ if ((pfd.revents & poll_event) != 0) { ++ ret = get_event(m); ++ if (ret < 0) { + ctx->done = 1; +- if (errno != EPIPE) +- av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n", +- ctx->name, av_err2str(AVERROR(errno))); ++ return ret; + } +- return NULL; ++ continue; + } + +- if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) { +- int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ? +- buf.m.planes[0].bytesused : buf.bytesused; +- if (bytesused == 0) { +- ctx->done = 1; +- return NULL; +- } +-#ifdef V4L2_BUF_FLAG_LAST +- if (buf.flags & V4L2_BUF_FLAG_LAST) +- ctx->done = 1; +-#endif ++ if ((pfd.revents & poll_cap) != 0) { ++ ret = dq_buf(ctx, ppavbuf); ++ if (ret == AVERROR(EPIPE)) ++ continue; ++ return ret; + } + +- avbuf = &ctx->buffers[buf.index]; +- avbuf->status = V4L2BUF_AVAILABLE; +- avbuf->buf = buf; +- if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { +- memcpy(avbuf->planes, planes, sizeof(planes)); +- avbuf->buf.m.planes = avbuf->planes; ++ if ((pfd.revents & poll_out) != 0) { ++ if (is_cap) ++ return AVERROR(EAGAIN); ++ return dq_buf(ctx, ppavbuf); + } +- return avbuf; ++ ++ av_log(avctx, AV_LOG_ERROR, "V4L2 poll unexpected events=%#x, revents=%#x\n", pfd.events, pfd.revents); ++ return AVERROR_UNKNOWN; + } ++} + +- return NULL; ++// Clear out flags and timestamps that should should be set by the user ++// Returns the passed avbuf ++static V4L2Buffer * ++clean_v4l2_buffer(V4L2Buffer * const avbuf) ++{ ++ struct v4l2_buffer *const buf = &avbuf->buf; ++ ++ buf->flags = 0; ++ buf->field = V4L2_FIELD_ANY; ++ buf->timestamp = (struct timeval){0}; ++ buf->timecode = (struct v4l2_timecode){0}; ++ buf->sequence = 0; ++ ++ return avbuf; ++} ++ ++int ++ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1) ++{ ++ V4L2Buffer * avbuf; ++ if (timeout1 != 0) { ++ int rv = get_qbuf(ctx, &avbuf, timeout1); ++ if (rv != 0) ++ return rv; ++ } ++ do { ++ get_qbuf(ctx, &avbuf, 0); ++ } while (avbuf); ++ return 0; + } + + static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) + { +- int timeout = 0; /* return when no more buffers to dequeue */ + int i; + + /* get back as many output buffers as possible */ +- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { +- do { +- } while (v4l2_dequeue_v4l2buf(ctx, timeout)); +- } ++ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) ++ ff_v4l2_dq_all(ctx, 0); + + for (i = 0; i < ctx->num_buffers; i++) { +- if (ctx->buffers[i].status == V4L2BUF_AVAILABLE) +- return &ctx->buffers[i]; ++ V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; ++ if (avbuf->status == V4L2BUF_AVAILABLE) ++ return clean_v4l2_buffer(avbuf); + } + + return NULL; +@@ -443,25 +743,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) + + static int v4l2_release_buffers(V4L2Context* ctx) + { +- struct v4l2_requestbuffers req = { +- .memory = V4L2_MEMORY_MMAP, +- .type = ctx->type, +- .count = 0, /* 0 -> unmaps buffers from the driver */ +- }; +- int i, j; ++ int i; ++ int ret = 0; ++ const int fd = ctx_to_m2mctx(ctx)->fd; + +- for (i = 0; i < ctx->num_buffers; i++) { +- V4L2Buffer *buffer = &ctx->buffers[i]; ++ // Orphan any buffers in the wild ++ ff_weak_link_break(&ctx->wl_master); ++ ++ if (ctx->bufrefs) { ++ for (i = 0; i < ctx->num_buffers; i++) ++ av_buffer_unref(ctx->bufrefs + i); ++ } + +- for (j = 0; j < buffer->num_planes; j++) { +- struct V4L2Plane_info *p = &buffer->plane_info[j]; +- if (p->mm_addr && p->length) +- if (munmap(p->mm_addr, p->length) < 0) +- av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno))); ++ if (fd != -1) { ++ struct v4l2_requestbuffers req = { ++ .memory = V4L2_MEMORY_MMAP, ++ .type = ctx->type, ++ .count = 0, /* 0 -> unmap all buffers from the driver */ ++ }; ++ ++ while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) { ++ if (errno == EINTR) ++ continue; ++ ++ ret = AVERROR(errno); ++ ++ av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n", ++ ctx->name, av_err2str(AVERROR(errno))); ++ ++ if (ctx_to_m2mctx(ctx)->output_drm) ++ av_log(logger(ctx), AV_LOG_ERROR, ++ "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n" ++ "for all buffers: \n" ++ " 1. drmModeRmFB(..)\n" ++ " 2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n"); + } + } ++ atomic_store(&ctx->q_count, 0); + +- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req); ++ return ret; + } + + static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt) +@@ -490,6 +810,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm + + static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) + { ++ V4L2m2mContext* s = ctx_to_m2mctx(ctx); ++ V4L2m2mPriv *priv = s->avctx->priv_data; + enum AVPixelFormat pixfmt = ctx->av_pix_fmt; + struct v4l2_fmtdesc fdesc; + int ret; +@@ -503,21 +825,22 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) + return 0; + } + +- for (;;) { ++ for (;; ++fdesc.index) { + ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc); + if (ret) + return AVERROR(EINVAL); + ++ if (priv->pix_fmt != AV_PIX_FMT_NONE) { ++ if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) ++ continue; ++ } ++ + pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO); + ret = v4l2_try_raw_format(ctx, pixfmt); +- if (ret){ +- fdesc.index++; +- continue; ++ if (ret == 0) { ++ *p = pixfmt; ++ return 0; + } +- +- *p = pixfmt; +- +- return 0; + } + + return AVERROR(EINVAL); +@@ -560,30 +883,131 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p) + * + *****************************************************************************/ + +-int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) ++ ++static void flush_all_buffers_status(V4L2Context* const ctx) ++{ ++ int i; ++ ++ if (!ctx->bufrefs) ++ return; ++ ++ for (i = 0; i < ctx->num_buffers; ++i) { ++ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; ++ if (buf->status == V4L2BUF_IN_DRIVER) ++ ff_v4l2_buffer_set_avail(buf); ++ } ++ atomic_store(&ctx->q_count, 0); ++} ++ ++static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx) ++{ ++ int i; ++ int rv; ++ ++ if (!ctx->bufrefs) { ++ rv = ff_v4l2_context_init(ctx); ++ if (rv) { ++ av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); ++ return rv; ++ } ++ } ++ ++ ff_mutex_lock(&ctx->lock); ++ for (i = 0; i < ctx->num_buffers; ++i) { ++ struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; ++ if (buf->status == V4L2BUF_AVAILABLE) { ++ rv = ff_v4l2_buffer_enqueue(buf); ++ if (rv < 0) ++ break; ++ } ++ } ++ ff_mutex_unlock(&ctx->lock); ++ return rv; ++} ++ ++static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx) + { + int type = ctx->type; +- int ret; ++ int ret = 0; + +- ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type); +- if (ret < 0) +- return AVERROR(errno); ++ if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) ++ stuff_all_buffers(avctx, ctx); + +- ctx->streamon = (cmd == VIDIOC_STREAMON); ++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) { ++ ret = AVERROR(errno); ++ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name, ++ av_err2str(ret)); ++ return ret; ++ } + +- return 0; ++ ctx->first_buf = 1; ++ ctx->streamon = 1; ++ ctx->flag_last = 0; ++ av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name); ++ return ret; ++} ++ ++static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx) ++{ ++ int type = ctx->type; ++ int ret = 0; ++ const int has_bufs = ctx_buffers_alloced(ctx); ++ ++ // Avoid doing anything if there is nothing we can do ++ if (!has_bufs && !ctx->streamon) ++ return 0; ++ ++ if (has_bufs) ++ ff_mutex_lock(&ctx->lock); ++ ++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) { ++ ret = AVERROR(errno); ++ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name, ++ av_err2str(ret)); ++ } ++ else { ++ flush_all_buffers_status(ctx); ++ ++ ctx->streamon = 0; ++ ctx->flag_last = 0; ++ ++ av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name); ++ } ++ ++ if (has_bufs) ++ ff_mutex_unlock(&ctx->lock); ++ return ret; ++} ++ ++ ++int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) ++{ ++ AVCodecContext * const avctx = logger(ctx); ++ ++ switch (cmd) { ++ case VIDIOC_STREAMOFF: ++ return set_streamoff(avctx, ctx); ++ case VIDIOC_STREAMON: ++ return set_streamon(avctx, ctx); ++ default: ++ av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd); ++ break; ++ } ++ return AVERROR_BUG; + } + + int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) + { +- V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ V4L2m2mContext *const s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; ++ int64_t track_ts; + V4L2Buffer* avbuf; + int ret; + + if (!frame) { + ret = v4l2_stop_encode(ctx); + if (ret) +- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name); ++ av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name); + s->draining= 1; + return 0; + } +@@ -592,23 +1016,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) + if (!avbuf) + return AVERROR(EAGAIN); + +- ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf); ++ track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame); ++ ++ ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts); + if (ret) + return ret; + + return ff_v4l2_buffer_enqueue(avbuf); + } + +-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) ++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, ++ const void * extdata, size_t extlen) + { + V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; + V4L2Buffer* avbuf; + int ret; ++ int64_t track_ts; + + if (!pkt->size) { + ret = v4l2_stop_decode(ctx); ++ // Log but otherwise ignore stop failure + if (ret) +- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name); ++ av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); + s->draining = 1; + return 0; + } +@@ -617,8 +1047,13 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) + if (!avbuf) + return AVERROR(EAGAIN); + +- ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf); +- if (ret) ++ track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt); ++ ++ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts); ++ if (ret == AVERROR(ENOMEM)) ++ av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", ++ __func__, pkt->size, avbuf->planes[0].length); ++ else if (ret) + return ret; + + return ff_v4l2_buffer_enqueue(avbuf); +@@ -626,42 +1061,77 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) + + int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) + { ++ V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; + V4L2Buffer *avbuf; ++ int rv; + +- /* +- * timeout=-1 blocks until: +- * 1. decoded frame available +- * 2. an input buffer is ready to be dequeued +- */ +- avbuf = v4l2_dequeue_v4l2buf(ctx, timeout); +- if (!avbuf) { +- if (ctx->done) +- return AVERROR_EOF; ++ do { ++ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) ++ return rv; ++ if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0) ++ return rv; ++ } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0); + +- return AVERROR(EAGAIN); +- } +- +- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); ++ return 0; + } + +-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) ++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout) + { ++ V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; + V4L2Buffer *avbuf; ++ int rv; + +- /* +- * blocks until: +- * 1. encoded packet available +- * 2. an input buffer ready to be dequeued +- */ +- avbuf = v4l2_dequeue_v4l2buf(ctx, -1); +- if (!avbuf) { +- if (ctx->done) +- return AVERROR_EOF; ++ do { ++ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) ++ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC ++ if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0) ++ return rv; ++ } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0); + +- return AVERROR(EAGAIN); ++ return 0; ++} ++ ++// Return 0 terminated list of drm fourcc video formats for this context ++// NULL if none found or error ++// Returned list is malloced so must be freed ++uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN) ++{ ++ unsigned int i; ++ unsigned int n = 0; ++ unsigned int size = 0; ++ uint32_t * e = NULL; ++ *pN = 0; ++ ++ for (i = 0; i < 1024; ++i) { ++ struct v4l2_fmtdesc fdesc = { ++ .index = i, ++ .type = ctx->type ++ }; ++ ++ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc)) ++ return e; ++ ++ if (n + 1 >= size) { ++ unsigned int newsize = (size == 0) ? 16 : size * 2; ++ uint32_t * t = av_realloc(e, newsize * sizeof(*t)); ++ if (!t) ++ return e; ++ e = t; ++ size = newsize; ++ } ++ ++ e[n] = fdesc.pixelformat; ++ e[++n] = 0; ++ if (pN) ++ *pN = n; + } + +- return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); ++ // If we've looped 1024 times we are clearly confused ++ *pN = 0; ++ av_free(e); ++ return NULL; + } + + int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) +@@ -693,78 +1163,194 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) + + int ff_v4l2_context_set_format(V4L2Context* ctx) + { +- return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); ++ int ret; ++ ++ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); ++ if (ret != 0) ++ return ret; ++ ++ // Check returned size against min size and if smaller have another go ++ // Only worry about plane[0] as this is meant to enforce limits for ++ // encoded streams where we might know a bit more about the shape ++ // than the driver ++ if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) { ++ if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage) ++ return 0; ++ ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size; ++ } ++ else { ++ if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage) ++ return 0; ++ ctx->format.fmt.pix.sizeimage = ctx->min_buf_size; ++ } ++ ++ ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format); ++ return ret; + } + + void ff_v4l2_context_release(V4L2Context* ctx) + { + int ret; + +- if (!ctx->buffers) ++ if (!ctx->bufrefs) + return; + + ret = v4l2_release_buffers(ctx); + if (ret) + av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name); + +- av_freep(&ctx->buffers); ++ av_freep(&ctx->bufrefs); ++ av_buffer_unref(&ctx->frames_ref); ++ ++ ff_mutex_destroy(&ctx->lock); ++ pthread_cond_destroy(&ctx->cond); + } + +-int ff_v4l2_context_init(V4L2Context* ctx) ++ ++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem) + { +- V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); + struct v4l2_requestbuffers req; +- int ret, i; ++ int ret; ++ int i; + +- if (!v4l2_type_supported(ctx)) { +- av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); +- return AVERROR_PATCHWELCOME; +- } +- +- ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); +- if (ret) +- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name); ++ av_assert0(ctx->bufrefs == NULL); + + memset(&req, 0, sizeof(req)); +- req.count = ctx->num_buffers; +- req.memory = V4L2_MEMORY_MMAP; ++ req.count = req_buffers; ++ req.memory = mem; + req.type = ctx->type; +- ret = ioctl(s->fd, VIDIOC_REQBUFS, &req); +- if (ret < 0) { +- av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno)); +- return AVERROR(errno); ++ while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) { ++ if (errno != EINTR) { ++ ret = AVERROR(errno); ++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret)); ++ return ret; ++ } + } + + ctx->num_buffers = req.count; +- ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer)); +- if (!ctx->buffers) { ++ ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs)); ++ if (!ctx->bufrefs) { + av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name); +- return AVERROR(ENOMEM); ++ goto fail_release; + } + +- for (i = 0; i < req.count; i++) { +- ctx->buffers[i].context = ctx; +- ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i); +- if (ret < 0) { ++ ctx->wl_master = ff_weak_link_new(ctx); ++ if (!ctx->wl_master) { ++ ret = AVERROR(ENOMEM); ++ goto fail_release; ++ } ++ ++ for (i = 0; i < ctx->num_buffers; i++) { ++ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem); ++ if (ret) { + av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret)); +- goto error; ++ goto fail_release; + } + } + + av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name, + V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat), + req.count, +- v4l2_get_width(&ctx->format), +- v4l2_get_height(&ctx->format), ++ ff_v4l2_get_format_width(&ctx->format), ++ ff_v4l2_get_format_height(&ctx->format), + V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage, + V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline); + + return 0; + +-error: ++fail_release: + v4l2_release_buffers(ctx); ++ av_freep(&ctx->bufrefs); ++ return ret; ++} ++ ++int ff_v4l2_context_frames_set(V4L2Context *const ctx) ++{ ++ AVHWFramesContext *hwframes; ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ const int w = ctx->width != 0 ? ctx->width : s->avctx->width; ++ const int h = ctx->height != 0 ? ctx->height : s->avctx->height; ++ int ret; ++ ++ if (ctx->frames_ref != NULL) { ++ const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data; ++ if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h) ++ return 0; ++ av_buffer_unref(&ctx->frames_ref); ++ } ++ ++ ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref); ++ if (!ctx->frames_ref) ++ return AVERROR(ENOMEM); + +- av_freep(&ctx->buffers); ++ hwframes = (AVHWFramesContext*)ctx->frames_ref->data; ++ hwframes->format = AV_PIX_FMT_DRM_PRIME; ++ hwframes->sw_format = ctx->av_pix_fmt; ++ hwframes->width = w; ++ hwframes->height = h; ++ ret = av_hwframe_ctx_init(ctx->frames_ref); ++ if (ret < 0) { ++ av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret)); ++ av_buffer_unref(&ctx->frames_ref); ++ return ret; ++ } ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__, ++ av_get_pix_fmt_name(ctx->av_pix_fmt), w, h); ++ return 0; ++} ++ ++int ff_v4l2_context_init(V4L2Context* ctx) ++{ ++ struct v4l2_queryctrl qctrl; ++ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); ++ int ret; ++ ++ // It is not valid to reinit a context without a previous release ++ av_assert0(ctx->bufrefs == NULL); ++ ++ if (!v4l2_type_supported(ctx)) { ++ av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); ++ return AVERROR_PATCHWELCOME; ++ } ++ ++ ff_mutex_init(&ctx->lock, NULL); ++ pthread_cond_init(&ctx->cond, NULL); ++ atomic_init(&ctx->q_count, 0); ++ ++ ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); ++ if (ret) { ++ ret = AVERROR(errno); ++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret)); ++ goto fail_unlock; ++ } ++ ++ memset(&qctrl, 0, sizeof(qctrl)); ++ qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT; ++ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) { ++ ret = AVERROR(errno); ++ if (ret != AVERROR(EINVAL)) { ++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret)); ++ goto fail_unlock; ++ } ++ // Control unsupported - set default if wanted ++ if (ctx->num_buffers < 2) ++ ctx->num_buffers = 4; ++ } ++ else { ++ if (ctx->num_buffers < 2) ++ ctx->num_buffers = qctrl.minimum + 2; ++ ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum); ++ } ++ ++ ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); ++ if (ret < 0) ++ goto fail_unlock; ++ ++ return 0; + ++fail_unlock: ++ ff_mutex_destroy(&ctx->lock); + return ret; + } +diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h +index 6f7460c89a9d..9f1c05a918ff 100644 +--- a/libavcodec/v4l2_context.h ++++ b/libavcodec/v4l2_context.h +@@ -32,6 +32,8 @@ + #include "libavutil/rational.h" + #include "codec_id.h" + #include "packet.h" ++#include "libavutil/buffer.h" ++#include "libavutil/thread.h" + #include "v4l2_buffers.h" + + typedef struct V4L2Context { +@@ -71,28 +73,57 @@ typedef struct V4L2Context { + */ + int width, height; + AVRational sample_aspect_ratio; ++ struct v4l2_rect selection; + + /** +- * Indexed array of V4L2Buffers ++ * If the default size of buffer is less than this then try to ++ * set to this. + */ +- V4L2Buffer *buffers; ++ uint32_t min_buf_size; ++ ++ /** ++ * Indexed array of pointers to V4L2Buffers ++ */ ++ AVBufferRef **bufrefs; + + /** + * Readonly after init. + */ + int num_buffers; + ++ /** ++ * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF ++ */ ++ enum v4l2_memory buf_mem; ++ + /** + * Whether the stream has been started (VIDIOC_STREAMON has been sent). + */ + int streamon; + ++ /* 1st buffer after stream on */ ++ int first_buf; ++ + /** + * Either no more buffers available or an unrecoverable error was notified + * by the V4L2 kernel driver: once set the context has to be exited. + */ + int done; + ++ int flag_last; ++ ++ /** ++ * If NZ then when Qing frame/pkt use this rather than the ++ * "real" PTS ++ */ ++ uint64_t track_ts; ++ ++ AVBufferRef *frames_ref; ++ atomic_int q_count; ++ struct ff_weak_link_master *wl_master; ++ ++ AVMutex lock; ++ pthread_cond_t cond; + } V4L2Context; + + /** +@@ -103,6 +134,14 @@ typedef struct V4L2Context { + */ + int ff_v4l2_context_init(V4L2Context* ctx); + ++/** ++ * (re)set the hwframecontext from the current v4l2 context ++ * ++ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables. ++ * @return 0 in case of success, a negative value representing the error otherwise. ++ */ ++int ff_v4l2_context_frames_set(V4L2Context *const ctx); ++ + /** + * Sets the V4L2Context format in the v4l2 driver. + * +@@ -120,6 +159,19 @@ int ff_v4l2_context_set_format(V4L2Context* ctx); + */ + int ff_v4l2_context_get_format(V4L2Context* ctx, int probe); + ++/** ++ * Get the list of drm fourcc pixel formats for this context ++ * ++ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context ++ * description for required variables. ++ * @param[in] pN A pointer to receive the number of formats ++ * found. May be NULL if not wanted. ++ * @return Pointer to malloced list of zero terminated formats, ++ * NULL if none or error. As list is malloced it must be ++ * freed. ++ */ ++uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN); ++ + /** + * Releases a V4L2Context. + * +@@ -148,7 +200,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd); + * @param[inout] pkt The AVPacket to dequeue to. + * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. + */ +-int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); ++int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout); + + /** + * Dequeues a buffer from a V4L2Context to an AVFrame. +@@ -157,7 +209,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); + * @param[in] ctx The V4L2Context to dequeue from. + * @param[inout] f The AVFrame to dequeue to. + * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) ++ * + * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. ++ * AVERROR(ENOSPC) if no buffer availible to put ++ * the frame in + */ + int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); + +@@ -171,7 +226,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); + * @param[in] pkt A pointer to an AVPacket. + * @return 0 in case of success, a negative error otherwise. + */ +-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); ++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size); + + /** + * Enqueues a buffer to a V4L2Context from an AVFrame +@@ -184,4 +239,28 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); + */ + int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f); + ++/** ++ * Dequeue all buffers on this queue ++ * ++ * Used to recycle output buffers ++ * ++ * @param[in] ctx The V4L2Context to dequeue from. ++ * @param[in] timeout1 A timeout on dequeuing the 1st buffer, ++ * all others have a timeout of zero ++ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return ++ * of the first dequeue operation, 0 otherwise. ++ */ ++int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1); ++ ++/** ++ * Returns the number of buffers currently queued ++ * ++ * @param[in] ctx The V4L2Context to evaluate ++ */ ++static inline int ++ff_v4l2_context_q_count(const V4L2Context* const ctx) ++{ ++ return atomic_load(&ctx->q_count); ++} ++ + #endif // AVCODEC_V4L2_CONTEXT_H +diff --git a/libavcodec/v4l2_fmt.c b/libavcodec/v4l2_fmt.c +index 6df47e3f5a3c..c820a1d5227b 100644 +--- a/libavcodec/v4l2_fmt.c ++++ b/libavcodec/v4l2_fmt.c +@@ -42,6 +42,14 @@ static const struct fmt_conversion { + { AV_FMT(RGB24), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB24) }, + { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGR32) }, + { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB32) }, ++ { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRX32) }, ++ { AV_FMT(RGB0), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBX32) }, ++ { AV_FMT(0BGR), AV_CODEC(RAWVIDEO), V4L2_FMT(XBGR32) }, ++ { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(XRGB32) }, ++ { AV_FMT(BGRA), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRA32) }, ++ { AV_FMT(RGBA), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBA32) }, ++ { AV_FMT(ABGR), AV_CODEC(RAWVIDEO), V4L2_FMT(ABGR32) }, ++ { AV_FMT(ARGB), AV_CODEC(RAWVIDEO), V4L2_FMT(ARGB32) }, + { AV_FMT(GRAY8), AV_CODEC(RAWVIDEO), V4L2_FMT(GREY) }, + { AV_FMT(YUV420P), AV_CODEC(RAWVIDEO), V4L2_FMT(YUV420) }, + { AV_FMT(YUYV422), AV_CODEC(RAWVIDEO), V4L2_FMT(YUYV) }, +diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c +index 15415cfc4eea..286191f6620c 100644 +--- a/libavcodec/v4l2_m2m.c ++++ b/libavcodec/v4l2_m2m.c +@@ -36,6 +36,15 @@ + #include "v4l2_context.h" + #include "v4l2_fmt.h" + #include "v4l2_m2m.h" ++#include "v4l2_req_dmabufs.h" ++ ++static void ++xlat_init(xlat_track_t * const x) ++{ ++ memset(x, 0, sizeof(*x)); ++ x->last_pts = AV_NOPTS_VALUE; ++} ++ + + static inline int v4l2_splane_video(struct v4l2_capability *cap) + { +@@ -69,7 +78,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) + + s->capture.done = s->output.done = 0; + s->capture.name = "capture"; ++ s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; + s->output.name = "output"; ++ s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; + atomic_init(&s->refcount, 0); + sem_init(&s->refsync, 0, 0); + +@@ -86,18 +97,58 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) + if (v4l2_mplane_video(&cap)) { + s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; + s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; ++ s->output.format.type = s->output.type; + return 0; + } + + if (v4l2_splane_video(&cap)) { + s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; ++ s->output.format.type = s->output.type; + return 0; + } + + return AVERROR(EINVAL); + } + ++static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) ++{ ++ struct v4l2_format fmt = {.type = s->output.type}; ++ int rv; ++ uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt); ++ unsigned int w; ++ unsigned int h; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { ++ fmt.fmt.pix_mp.pixelformat = pixfmt; ++ fmt.fmt.pix_mp.width = avctx->width; ++ fmt.fmt.pix_mp.height = avctx->height; ++ } ++ else { ++ fmt.fmt.pix.pixelformat = pixfmt; ++ fmt.fmt.pix.width = avctx->width; ++ fmt.fmt.pix.height = avctx->height; ++ } ++ ++ rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt); ++ ++ if (rv != 0) { ++ rv = AVERROR(errno); ++ av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv)); ++ return rv; ++ } ++ ++ w = ff_v4l2_get_format_width(&fmt); ++ h = ff_v4l2_get_format_height(&fmt); ++ ++ if (w < avctx->width || h < avctx->height) { ++ av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ + static int v4l2_probe_driver(V4L2m2mContext *s) + { + void *log_ctx = s->avctx; +@@ -117,6 +168,11 @@ static int v4l2_probe_driver(V4L2m2mContext *s) + goto done; + } + ++ // If being given frames (encode) check that V4L2 can cope with the size ++ if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO && ++ (ret = check_size(s->avctx, s)) != 0) ++ goto done; ++ + ret = ff_v4l2_context_get_format(&s->capture, 1); + if (ret) { + av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n"); +@@ -218,13 +274,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) + av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n"); + + /* 2. unmap the capture buffers (v4l2 and ffmpeg): +- * we must wait for all references to be released before being allowed +- * to queue new buffers. + */ +- av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n"); +- if (atomic_load(&s->refcount)) +- while(sem_wait(&s->refsync) == -1 && errno == EINTR); +- + ff_v4l2_context_release(&s->capture); + + /* 3. get the new capture format */ +@@ -243,7 +293,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) + + /* 5. complete reinit */ + s->draining = 0; +- s->reinit = 0; + + return 0; + } +@@ -259,6 +308,9 @@ static void v4l2_m2m_destroy_context(FFRefStructOpaque unused, void *context) + close(s->fd); + av_frame_free(&s->frame); + av_packet_unref(&s->buf_pkt); ++ av_freep(&s->extdata_data); ++ ++ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n"); + } + + int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) +@@ -269,6 +321,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) + if (!s) + return 0; + ++ av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n"); ++ ++ if (s->avctx && av_codec_is_decoder(s->avctx->codec)) ++ av_packet_unref(&s->buf_pkt); ++ + if (s->fd >= 0) { + ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF); + if (ret) +@@ -280,6 +337,14 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) + } + + ff_v4l2_context_release(&s->output); ++ av_buffer_unref(&s->device_ref); ++ ++ dmabufs_ctl_unref(&s->db_ctl); ++ ++ if (s->fd != -1) { ++ close(s->fd); ++ s->fd = -1; ++ } + + s->self_ref = NULL; + ff_refstruct_unref(&priv->context); +@@ -341,6 +406,7 @@ int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s) + priv->context->output.num_buffers = priv->num_output_buffers; + priv->context->self_ref = priv->context; + priv->context->fd = -1; ++ xlat_init(&priv->context->xlat); + + priv->context->frame = av_frame_alloc(); + if (!priv->context->frame) { +diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h +index 4ba33dc33593..cb49e0c2fe66 100644 +--- a/libavcodec/v4l2_m2m.h ++++ b/libavcodec/v4l2_m2m.h +@@ -30,6 +30,7 @@ + #include + + #include "libavcodec/avcodec.h" ++#include "libavutil/pixfmt.h" + #include "v4l2_context.h" + + #define container_of(ptr, type, member) ({ \ +@@ -40,6 +41,38 @@ + { "num_output_buffers", "Number of buffers in the output context",\ + OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS } + ++#define FF_V4L2_M2M_TRACK_SIZE 128 ++typedef struct V4L2m2mTrackEl { ++ int discard; // If we see this buffer its been flushed, so discard ++ int pending; ++ int64_t pts; ++ int64_t dts; ++#if FF_API_FRAME_PKT ++ int64_t pkt_pos; ++ int pkt_size; ++#endif ++ int64_t duration; ++ int64_t track_pts; ++} V4L2m2mTrackEl; ++ ++typedef struct pts_stats_s ++{ ++ void * logctx; ++ const char * name; // For debug ++ unsigned int last_count; ++ unsigned int last_interval; ++ int64_t last_pts; ++ int64_t guess; ++} pts_stats_t; ++ ++typedef struct xlat_track_s { ++ unsigned int track_no; ++ int64_t last_pts; // Last valid PTS decoded ++ V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; ++} xlat_track_t; ++ ++struct dmabufs_ctl; ++ + typedef struct V4L2m2mContext { + char devname[PATH_MAX]; + int fd; +@@ -52,10 +85,10 @@ typedef struct V4L2m2mContext { + AVCodecContext *avctx; + sem_t refsync; + atomic_uint refcount; +- int reinit; + + /* null frame/packet received */ + int draining; ++ int running; + AVPacket buf_pkt; + + /* Reference to a frame. Only used during encoding */ +@@ -66,6 +99,36 @@ typedef struct V4L2m2mContext { + + /* reference back to V4L2m2mPriv */ + void *priv; ++ ++ AVBufferRef *device_ref; ++ ++ /* generate DRM frames */ ++ int output_drm; ++ ++ /* input frames are drmprime */ ++ int input_drm; ++ ++ /* Frame tracking */ ++ xlat_track_t xlat; ++ ++ pts_stats_t pts_stat; ++ ++ /* req pkt */ ++ int req_pkt; ++ int reorder_size; ++ ++ /* Ext data sent */ ++ int extdata_sent; ++ /* Ext data sent in packet - overrides ctx */ ++ void * extdata_data; ++ size_t extdata_size; ++ ++#define FF_V4L2_QUIRK_REINIT_ALWAYS 1 ++#define FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN 2 ++ /* Quirks */ ++ unsigned int quirks; ++ ++ struct dmabufs_ctl * db_ctl; + } V4L2m2mContext; + + typedef struct V4L2m2mPriv { +@@ -75,6 +138,8 @@ typedef struct V4L2m2mPriv { + + int num_output_buffers; + int num_capture_buffers; ++ const char * dmabuf_alloc; ++ enum AVPixelFormat pix_fmt; + } V4L2m2mPriv; + + /** +@@ -128,4 +193,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx); + */ + int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx); + ++ ++static inline unsigned int ff_v4l2_get_format_width(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; ++} ++ ++static inline unsigned int ff_v4l2_get_format_height(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; ++} ++ ++static inline uint32_t ff_v4l2_get_format_pixelformat(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; ++} ++ ++static inline int ff_v4l2_ctx_eos(const V4L2Context * const ctx) ++{ ++ return ctx->flag_last; ++} ++ ++ + #endif /* AVCODEC_V4L2_M2M_H */ +diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c +index aa2d759e1ea5..d3c3820379dd 100644 +--- a/libavcodec/v4l2_m2m_dec.c ++++ b/libavcodec/v4l2_m2m_dec.c +@@ -21,8 +21,15 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include "config_components.h" ++ + #include + #include ++ ++#include "libavutil/avassert.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_drm.h" ++#include "libavutil/mem.h" + #include "libavutil/pixfmt.h" + #include "libavutil/pixdesc.h" + #include "libavutil/opt.h" +@@ -30,75 +37,279 @@ + #include "codec_internal.h" + #include "libavcodec/decode.h" + ++#include "libavcodec/hwaccels.h" ++#include "libavcodec/internal.h" ++#include "libavcodec/hwconfig.h" ++ + #include "v4l2_context.h" + #include "v4l2_m2m.h" + #include "v4l2_fmt.h" ++#include "v4l2_req_dmabufs.h" + +-static int v4l2_try_start(AVCodecContext *avctx) ++#if CONFIG_H264_DECODER ++#include "h264_parse.h" ++#endif ++#if CONFIG_HEVC_DECODER ++#include "hevc/parse.h" ++#endif ++ ++// Pick 64 for max last count - that is >1sec at 60fps ++#define STATS_LAST_COUNT_MAX 64 ++#define STATS_INTERVAL_MAX (1 << 30) ++ ++#ifndef FF_API_BUFFER_SIZE_T ++#define FF_API_BUFFER_SIZE_T 1 ++#endif ++ ++#define DUMP_FAILED_EXTRADATA 0 ++ ++#if DUMP_FAILED_EXTRADATA ++static inline char hex1(unsigned int x) + { +- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; +- V4L2Context *const capture = &s->capture; +- V4L2Context *const output = &s->output; +- struct v4l2_selection selection = { 0 }; +- int ret; ++ x &= 0xf; ++ return x <= 9 ? '0' + x : 'a' + x - 10; ++} + +- /* 1. start the output process */ +- if (!output->streamon) { +- ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON); +- if (ret < 0) { +- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n"); +- return ret; +- } ++static inline char * hex2(char * s, unsigned int x) ++{ ++ *s++ = hex1(x >> 4); ++ *s++ = hex1(x); ++ return s; ++} ++ ++static inline char * hex4(char * s, unsigned int x) ++{ ++ s = hex2(s, x >> 8); ++ s = hex2(s, x); ++ return s; ++} ++ ++static inline char * dash2(char * s) ++{ ++ *s++ = '-'; ++ *s++ = '-'; ++ return s; ++} ++ ++static void ++data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len) ++{ ++ size_t i; ++ s = hex4(s, offset); ++ m += offset; ++ for (i = 0; i != 8; ++i) { ++ *s++ = ' '; ++ s = len > i + offset ? hex2(s, *m++) : dash2(s); + } ++ *s++ = ' '; ++ *s++ = ':'; ++ for (; i != 16; ++i) { ++ *s++ = ' '; ++ s = len > i + offset ? hex2(s, *m++) : dash2(s); ++ } ++ *s++ = 0; ++} + +- if (capture->streamon) +- return 0; ++static void ++log_dump(void * logctx, int lvl, const void * const data, const size_t len) ++{ ++ size_t i; ++ for (i = 0; i < len; i += 16) { ++ char buf[80]; ++ data16(buf, i, data, len); ++ av_log(logctx, lvl, "%s\n", buf); ++ } ++} ++#endif + +- /* 2. get the capture format */ +- capture->format.type = capture->type; +- ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format); +- if (ret) { +- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n"); +- return ret; ++static unsigned int pts_stats_interval(const pts_stats_t * const stats) ++{ ++ return stats->last_interval; ++} ++ ++static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess) ++{ ++ if (stats->last_count <= 1) ++ return stats->last_pts; ++ if (stats->last_pts == AV_NOPTS_VALUE || ++ fail_bad_guess && (stats->last_interval == 0 || ++ stats->last_count >= STATS_LAST_COUNT_MAX)) ++ return AV_NOPTS_VALUE; ++ return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval; ++} ++ ++static void pts_stats_add(pts_stats_t * const stats, int64_t pts) ++{ ++ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { ++ if (stats->last_count < STATS_LAST_COUNT_MAX) ++ ++stats->last_count; ++ return; + } + +- /* 2.1 update the AVCodecContext */ +- avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); +- capture->av_pix_fmt = avctx->pix_fmt; ++ if (stats->last_pts != AV_NOPTS_VALUE) { ++ const int64_t interval = pts - stats->last_pts; + +- /* 3. set the crop parameters */ +- selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; +- selection.r.height = avctx->coded_height; +- selection.r.width = avctx->coded_width; +- ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection); +- if (!ret) { +- ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); +- if (ret) { +- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n"); +- } else { +- av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height); +- /* update the size of the resulting frame */ +- capture->height = selection.r.height; +- capture->width = selection.r.width; ++ if (interval < 0 || interval >= STATS_INTERVAL_MAX || ++ stats->last_count >= STATS_LAST_COUNT_MAX) { ++ if (stats->last_interval != 0) ++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", ++ __func__, stats->name, interval, stats->last_count); ++ stats->last_interval = 0; ++ } ++ else { ++ const int64_t frame_time = interval / (int64_t)stats->last_count; ++ ++ if (frame_time != stats->last_interval) ++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", ++ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); ++ stats->last_interval = frame_time; + } + } + +- /* 4. init the capture context now that we have the capture format */ +- if (!capture->buffers) { +- ret = ff_v4l2_context_init(capture); +- if (ret) { +- av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); +- return AVERROR(ENOMEM); ++ stats->last_pts = pts; ++ stats->last_count = 1; ++} ++ ++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) ++{ ++ *stats = (pts_stats_t){ ++ .logctx = logctx, ++ .name = name, ++ .last_count = 1, ++ .last_interval = 0, ++ .last_pts = AV_NOPTS_VALUE ++ }; ++} ++ ++// If abdata == NULL then this just counts space required ++// Unpacks avcC if detected ++static int ++h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata) ++{ ++ const uint8_t * const xdend = extradata + extrasize; ++ const uint8_t * p = extradata; ++ uint8_t * d = abdata; ++ unsigned int n; ++ unsigned int len; ++ const unsigned int hdrlen = 4; ++ unsigned int need_pps = 1; ++ ++ if (extrasize < 8) ++ return AVERROR(EINVAL); ++ ++ if (p[0] == 0 && p[1] == 0) { ++ // Assume a couple of leading zeros are good enough to indicate NAL ++ if (abdata) ++ memcpy(d, p, extrasize); ++ return extrasize; ++ } ++ ++ // avcC starts with a 1 ++ if (p[0] != 1) ++ return AVERROR(EINVAL); ++ ++ p += 5; ++ n = *p++ & 0x1f; ++ ++doxps: ++ while (n--) { ++ if (xdend - p < 2) ++ return AVERROR(EINVAL); ++ len = (p[0] << 8) | p[1]; ++ p += 2; ++ if (xdend - p < (ptrdiff_t)len) ++ return AVERROR(EINVAL); ++ if (abdata) { ++ d[0] = 0; ++ d[1] = 0; ++ d[2] = 0; ++ d[3] = 1; ++ memcpy(d + 4, p, len); + } ++ d += len + hdrlen; ++ p += len; ++ } ++ if (need_pps) { ++ need_pps = 0; ++ if (p >= xdend) ++ return AVERROR(EINVAL); ++ n = *p++; ++ goto doxps; + } + +- /* 5. start the capture process */ +- ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); +- if (ret) { +- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n"); ++ return d - abdata; ++} ++ ++static int ++copy_extradata(AVCodecContext * const avctx, ++ const void * const src_data, const int src_len, ++ void ** const pdst_data, size_t * const pdst_len) ++{ ++ int len; ++ ++ *pdst_len = 0; ++ av_freep(pdst_data); ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264) ++ len = h264_xd_copy(src_data, src_len, NULL); ++ else ++ len = src_len < 0 ? AVERROR(EINVAL) : src_len; ++ ++ // Zero length is OK but we want to stop - -ve is error val ++ if (len <= 0) ++ return len; ++ ++ if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) ++ return AVERROR(ENOMEM); ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264) ++ h264_xd_copy(src_data, src_len, *pdst_data); ++ else ++ memcpy(*pdst_data, src_data, len); ++ *pdst_len = len; ++ ++ return 0; ++} ++ ++ ++ ++static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s) ++{ ++ int ret; ++ struct v4l2_decoder_cmd cmd = { ++ .cmd = V4L2_DEC_CMD_START, ++ .flags = 0, ++ }; ++ ++ if (s->output.streamon) ++ return 0; ++ ++ ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON); ++ if (ret != 0) { ++ av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret)); + return ret; + } + ++ // STREAMON should do implicit START so this just for those that don't. ++ // It is optional so don't worry if it fails ++ if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) { ++ ret = AVERROR(errno); ++ av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret)); ++ } ++ else { ++ av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n"); ++ } ++ return 0; ++} ++ ++static int v4l2_try_start(AVCodecContext *avctx) ++{ ++ V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context; ++ int ret; ++ ++ /* 1. start the output process */ ++ if ((ret = check_output_streamon(avctx, s)) != 0) ++ return ret; + return 0; + } + +@@ -133,51 +344,822 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) + return 0; + } + +-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++static void ++set_best_effort_pts(AVCodecContext *const avctx, ++ pts_stats_t * const ps, ++ AVFrame *const frame) ++{ ++ pts_stats_add(ps, frame->pts); ++ ++ frame->best_effort_timestamp = pts_stats_guess(ps, 1); ++ // If we can't guess from just PTS - try DTS ++ if (frame->best_effort_timestamp == AV_NOPTS_VALUE) ++ frame->best_effort_timestamp = frame->pkt_dts; ++ ++ // We can't emulate what s/w does in a useful manner and using the ++ // "correct" answer seems to just confuse things. ++ frame->pkt_dts = frame->pts; ++ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", ++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts); ++} ++ ++static void ++xlat_flush(xlat_track_t * const x) ++{ ++ unsigned int i; ++ // Do not reset track_no - this ensures that any frames left in the decoder ++ // that turn up later get discarded. ++ ++ x->last_pts = AV_NOPTS_VALUE; ++ for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) { ++ x->track_els[i].pending = 0; ++ x->track_els[i].discard = 1; ++ } ++} ++ ++static void ++xlat_init(xlat_track_t * const x) ++{ ++ memset(x, 0, sizeof(*x)); ++ xlat_flush(x); ++} ++ ++static int ++xlat_pending(const V4L2m2mContext * const s) ++{ ++ const xlat_track_t *const x = &s->xlat; ++ unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE; ++ int i; ++ const int64_t now = pts_stats_guess(&s->pts_stat, 0); ++ int64_t first_dts = AV_NOPTS_VALUE; ++ int no_dts_count = 0; ++ unsigned int interval = pts_stats_interval(&s->pts_stat); ++ ++ for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) { ++ const V4L2m2mTrackEl * const t = x->track_els + n; ++ ++ if (first_dts == AV_NOPTS_VALUE) ++ if (t->dts == AV_NOPTS_VALUE) ++ ++no_dts_count; ++ else ++ first_dts = t->dts; ++ ++ // Discard only set on never-set or flushed entries ++ // So if we get here we've never successfully decoded a frame so allow ++ // more frames into the buffer before stalling ++ if (t->discard) ++ return i - 16; ++ ++ // If we've got this frame out then everything before this point ++ // must have entered the decoder ++ if (!t->pending) ++ break; ++ ++ // If we've never seen a pts all we can do is count frames ++ if (now == AV_NOPTS_VALUE) ++ continue; ++ ++ if (t->dts != AV_NOPTS_VALUE && now >= t->dts) ++ break; ++ } ++ ++ if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) { ++ const int iframes = (first_dts - now) / (int)interval; ++ const int t = iframes - s->reorder_size + no_dts_count; ++ ++// av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n", ++// x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count); ++ ++ if (iframes > 0 && iframes < 64 && t < i) { ++ return t; ++ } ++ } ++ ++ return i; ++} ++ ++static inline int stream_started(const V4L2m2mContext * const s) { ++ return s->output.streamon; ++} ++ ++#define NQ_OK 0 ++#define NQ_Q_FULL 1 ++#define NQ_SRC_EMPTY 2 ++#define NQ_NONE 3 ++#define NQ_DRAINING 4 ++#define NQ_DEAD 5 ++ ++#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING) ++#define RETRY_NQ(nq_status) ((nq_status) == NQ_Q_FULL || (nq_status) == NQ_NONE) ++ ++// do_not_get If true then no new packet will be got but status will ++// be set appropriately ++ ++// AVERROR_EOF Flushing an already flushed stream ++// -ve Error (all errors except EOF are unexpected) ++// NQ_OK (0) OK ++// NQ_Q_FULL Dst full (retry if we think V4L2 Q has space now) ++// NQ_SRC_EMPTY Src empty (do not retry) ++// NQ_NONE Enqueue not attempted ++// NQ_DRAINING At EOS, dQ dest until EOS there too ++// NQ_DEAD Not running (do not retry, do not attempt capture dQ) ++ ++static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s, const int do_not_get) + { +- V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; +- V4L2Context *const capture = &s->capture; +- V4L2Context *const output = &s->output; + int ret; + +- if (!s->buf_pkt.size) { +- ret = ff_decode_get_packet(avctx, &s->buf_pkt); ++ // If we don't already have a coded packet - get a new one ++ // We will already have a coded pkt if the output Q was full last time we ++ // tried to Q it ++ if (!s->buf_pkt.size && !do_not_get) { ++ unsigned int i; ++ ++ for (i = 0; i < 256; ++i) { ++ uint8_t * side_data; ++ size_t side_size; ++ ++ ret = ff_decode_get_packet(avctx, &s->buf_pkt); ++ if (ret != 0) ++ break; ++ ++ // New extradata is the only side-data we undertand ++ side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); ++ if (side_data) { ++ av_log(avctx, AV_LOG_DEBUG, "New extradata\n"); ++ if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0) ++ av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret)); ++ s->extdata_sent = 0; ++ } ++ ++ if (s->buf_pkt.size != 0) ++ break; ++ ++ if (s->buf_pkt.side_data_elems == 0) { ++ av_log(avctx, AV_LOG_WARNING, "Empty pkt from ff_decode_get_packet - treating as EOF\n"); ++ ret = AVERROR_EOF; ++ break; ++ } ++ ++ // Retry a side-data only pkt ++ } ++ // If i >= 256 something has gone wrong ++ if (i >= 256) { ++ av_log(avctx, AV_LOG_ERROR, "Too many side-data only packets\n"); ++ return AVERROR(EIO); ++ } ++ ++ if (ret == AVERROR(EAGAIN)) { ++ if (!stream_started(s)) { ++ av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__); ++ return NQ_DEAD; ++ } ++ return NQ_SRC_EMPTY; ++ } ++ ++ if (ret == AVERROR_EOF) { ++ // EOF - enter drain mode ++ av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n", ++ ret, s->buf_pkt.size, stream_started(s), s->draining); ++ if (!stream_started(s)) { ++ av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n"); ++ s->draining = 1; ++ s->capture.done = 1; ++ return AVERROR_EOF; ++ } ++ ++ if (!s->draining) { ++ // Calling enqueue with an empty pkt starts drain ++ av_assert0(s->buf_pkt.size == 0); ++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); ++ if (ret) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret); ++ return ret; ++ } ++ } ++ return NQ_DRAINING; ++ } ++ + if (ret < 0) { +- if (ret == AVERROR(EAGAIN)) +- return ff_v4l2_context_dequeue_frame(capture, frame, 0); +- else if (ret != AVERROR_EOF) +- return ret; ++ av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret); ++ return ret; + } + } + +- if (s->draining) +- goto dequeue; ++ if (s->draining) { ++ if (s->buf_pkt.size) { ++ av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n"); ++ av_packet_unref(&s->buf_pkt); ++ } ++ return NQ_DRAINING; ++ } ++ ++ if (!s->buf_pkt.size) ++ return NQ_NONE; ++ ++ if ((ret = check_output_streamon(avctx, s)) != 0) ++ return ret; + +- ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt); +- if (ret < 0 && ret != AVERROR(EAGAIN)) +- goto fail; ++ if (s->extdata_sent) ++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); ++ else ++ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); + +- /* if EAGAIN don't unref packet and try to enqueue in the next iteration */ +- if (ret != AVERROR(EAGAIN)) ++ if (ret == AVERROR(EAGAIN)) { ++ // Out of input buffers - keep packet ++ ret = NQ_Q_FULL; ++ } ++ else { ++ // In all other cases we are done with this packet + av_packet_unref(&s->buf_pkt); ++ s->extdata_sent = 1; + +- if (!s->draining) { +- ret = v4l2_try_start(avctx); + if (ret) { +- /* cant recover */ +- if (ret != AVERROR(ENOMEM)) +- ret = 0; +- goto fail; ++ av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret); ++ return ret; ++ } ++ } ++ ++ // Start if we haven't ++ { ++ const int ret2 = v4l2_try_start(avctx); ++ if (ret2) { ++ av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2); ++ ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD; + } + } + +-dequeue: +- return ff_v4l2_context_dequeue_frame(capture, frame, -1); +-fail: +- av_packet_unref(&s->buf_pkt); + return ret; + } + ++static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx) ++{ ++ int rv = 0; ++ ++ ff_mutex_lock(&ctx->lock); ++ ++ while (atomic_load(&ctx->q_count) == 0 && ctx->streamon) { ++ if (pthread_cond_wait(&ctx->cond, &ctx->lock) != 0) { ++ rv = AVERROR(errno); ++ av_log(avctx, AV_LOG_ERROR, "Cond wait failure: %s\n", av_err2str(rv)); ++ break; ++ } ++ } ++ ++ ff_mutex_unlock(&ctx->lock); ++ return rv; ++} ++ ++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++{ ++ V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; ++ int src_rv = -1; ++ int dst_rv = 1; // Non-zero (done), non-negative (error) number ++ unsigned int i = 0; ++ ++ do { ++ const int pending = xlat_pending(s); ++ const int prefer_dq = (pending > 4); ++ const int last_src_rv = src_rv; ++ ++ av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt); ++ ++ // Enqueue another pkt for decode if ++ // (a) We don't have a lot of stuff in the buffer already OR ++ // (b) ... we (think we) do but we've failed to get a frame already OR ++ // (c) We've dequeued a lot of frames without asking for input ++ src_rv = try_enqueue_src(avctx, s, !(!prefer_dq || i != 0 || s->req_pkt > 2)); ++ ++ // If we got a frame last time or we've already tried to get a frame and ++ // we have nothing to enqueue then return now. rv will be AVERROR(EAGAIN) ++ // indicating that we want more input. ++ // This should mean that once decode starts we enter a stable state where ++ // we alternately ask for input and produce output ++ if ((i != 0 || s->req_pkt) && src_rv == NQ_SRC_EMPTY) ++ break; ++ ++ if (src_rv == NQ_Q_FULL && last_src_rv == NQ_Q_FULL) { ++ av_log(avctx, AV_LOG_WARNING, "Poll thinks src Q has space; none found\n"); ++ break; ++ } ++ ++ // Try to get a new frame if ++ // (a) we haven't already got one AND ++ // (b) enqueue returned a status indicating that decode should be attempted ++ if (dst_rv != 0 && TRY_DQ(src_rv)) { ++ // Pick a timeout depending on state ++ // The pending count isn't completely reliable so it is good enough ++ // hint that we want a frame but not good enough to require it in ++ // all cases; however if it has got > 31 that exceeds its margin of ++ // error so require a frame to prevent ridiculous levels of latency ++ const int t = ++ src_rv == NQ_Q_FULL ? -1 : ++ src_rv == NQ_DRAINING ? 300 : ++ prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0; ++ ++ // Dequeue frame will unref any previous contents of frame ++ // if it returns success so we don't need an explicit unref ++ // when discarding ++ // This returns AVERROR(EAGAIN) on timeout or if ++ // there is room in the input Q and timeout == -1 ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); ++ ++ // Failure due to no buffer in Q? ++ if (dst_rv == AVERROR(ENOSPC)) { ++ // Wait & retry ++ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); ++ } ++ } ++ ++ if (dst_rv == 0) { ++ set_best_effort_pts(avctx, &s->pts_stat, frame); ++ if (!s->running) { ++ s->running = 1; ++ av_log(avctx, AV_LOG_VERBOSE, "Decode running\n"); ++ } ++ } ++ ++ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { ++ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); ++ dst_rv = AVERROR_EOF; ++ s->capture.done = 1; ++ } ++ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) ++ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", ++ s->draining, s->capture.done); ++ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) ++ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", ++ s->draining, s->capture.done, dst_rv); ++ } ++ ++ ++i; ++ if (i >= 256) { ++ av_log(avctx, AV_LOG_ERROR, "Unexpectedly large retry count: %d\n", i); ++ src_rv = AVERROR(EIO); ++ } ++ ++ // Continue trying to enqueue packets if either ++ // (a) we succeeded last time OR ++ // (b) we didn't ret a frame and we can retry the input ++ } while (src_rv == NQ_OK || (dst_rv == AVERROR(EAGAIN) && RETRY_NQ(src_rv))); ++ ++ // Ensure that the frame contains nothing if we aren't returning a frame ++ // (might happen when discarding) ++ if (dst_rv) ++ av_frame_unref(frame); ++ ++ // If we got a frame this time ask for a pkt next time ++ s->req_pkt = (dst_rv == 0) ? s->req_pkt + 1 : 0; ++ ++#if 0 ++ if (dst_rv == 0) ++ { ++ static int z = 0; ++ if (++z > 50) { ++ av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n"); ++ ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); ++ return -1; ++ } ++ } ++#endif ++ ++ return dst_rv == 0 ? 0 : ++ src_rv < 0 ? src_rv : ++ dst_rv < 0 ? dst_rv : ++ AVERROR(EAGAIN); ++} ++ ++#if 0 ++#include ++static int64_t us_time(void) ++{ ++ struct timespec ts; ++ clock_gettime(CLOCK_MONOTONIC, &ts); ++ return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000; ++} ++ ++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) ++{ ++ int ret; ++ const int64_t now = us_time(); ++ int64_t done; ++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ ret = v4l2_receive_frame2(avctx, frame); ++ done = us_time(); ++ av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret); ++ return ret; ++} ++#endif ++ ++static uint32_t ++avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile) ++{ ++ switch (codec_id) { ++ case AV_CODEC_ID_H264: ++ switch (avprofile) { ++ case FF_PROFILE_H264_BASELINE: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; ++ case FF_PROFILE_H264_CONSTRAINED_BASELINE: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE; ++ case FF_PROFILE_H264_MAIN: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN; ++ case FF_PROFILE_H264_EXTENDED: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED; ++ case FF_PROFILE_H264_HIGH: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH; ++ case FF_PROFILE_H264_HIGH_10: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10; ++ case FF_PROFILE_H264_HIGH_10_INTRA: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA; ++ case FF_PROFILE_H264_MULTIVIEW_HIGH: ++ case FF_PROFILE_H264_HIGH_422: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422; ++ case FF_PROFILE_H264_HIGH_422_INTRA: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA; ++ case FF_PROFILE_H264_STEREO_HIGH: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH; ++ case FF_PROFILE_H264_HIGH_444_PREDICTIVE: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE; ++ case FF_PROFILE_H264_HIGH_444_INTRA: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA; ++ case FF_PROFILE_H264_CAVLC_444: ++ return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA; ++ case FF_PROFILE_H264_HIGH_444: ++ default: ++ break; ++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE = 12, ++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH = 13, ++// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA = 14, ++// V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH = 16, ++// V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH = 17, ++ } ++ break; ++ case AV_CODEC_ID_MPEG2VIDEO: ++ case AV_CODEC_ID_MPEG4: ++ case AV_CODEC_ID_VC1: ++ case AV_CODEC_ID_VP8: ++ case AV_CODEC_ID_VP9: ++ case AV_CODEC_ID_AV1: ++ // Most profiles are a simple number that matches the V4L2 enum ++ return avprofile; ++ default: ++ break; ++ } ++ return ~(uint32_t)0; ++} ++ ++// This check mirrors Chrome's profile check by testing to see if the profile ++// exists as a possible value for the V4L2 profile control ++static int ++check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s) ++{ ++ struct v4l2_queryctrl query_ctrl; ++ struct v4l2_querymenu query_menu; ++ uint32_t profile_id; ++ ++ // An unset profile is almost certainly zero or -99 - do not reject ++ if (avctx->profile <= 0) { ++ av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile); ++ return 0; ++ } ++ ++ memset(&query_ctrl, 0, sizeof(query_ctrl)); ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_MPEG2VIDEO: ++ profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE; ++ break; ++ case AV_CODEC_ID_MPEG4: ++ profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE; ++ break; ++ case AV_CODEC_ID_H264: ++ profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE; ++ break; ++ case AV_CODEC_ID_VP8: ++ profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE; ++ break; ++ case AV_CODEC_ID_VP9: ++ profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE; ++ break; ++#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE ++ case AV_CODEC_ID_AV1: ++ profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE; ++ break; ++#endif ++ default: ++ av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id); ++ return 0; ++ } ++ ++ query_ctrl = (struct v4l2_queryctrl){.id = profile_id}; ++ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) { ++ av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id); ++ } ++ else { ++ av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id); ++ ++ query_menu = (struct v4l2_querymenu){ ++ .id = query_ctrl.id, ++ .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile), ++ }; ++ ++ if (query_menu.index > query_ctrl.maximum || ++ query_menu.index < query_ctrl.minimum || ++ ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) { ++ return AVERROR(ENOENT); ++ } ++ } ++ ++ return 0; ++}; ++ ++static int ++check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc) ++{ ++ unsigned int i; ++ const uint32_t w = avctx->coded_width; ++ const uint32_t h = avctx->coded_height; ++ ++ if (w == 0 || h == 0 || fcc == 0) { ++ av_log(avctx, AV_LOG_TRACE, "%s: Size %dx%d or fcc %s empty\n", __func__, w, h, av_fourcc2str(fcc)); ++ return 0; ++ } ++ if ((s->quirks & FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN) != 0) { ++ av_log(avctx, AV_LOG_TRACE, "%s: Skipped (quirk): Size %dx%d, fcc %s\n", __func__, w, h, av_fourcc2str(fcc)); ++ return 0; ++ } ++ ++ for (i = 0;; ++i) { ++ struct v4l2_frmsizeenum fs = { ++ .index = i, ++ .pixel_format = fcc, ++ }; ++ ++ while (ioctl(s->fd, VIDIOC_ENUM_FRAMESIZES, &fs) != 0) { ++ const int err = AVERROR(errno); ++ if (err == AVERROR(EINTR)) ++ continue; ++ if (i == 0 && err == AVERROR(ENOTTY)) { ++ av_log(avctx, AV_LOG_DEBUG, "Framesize enum not supported\n"); ++ return 0; ++ } ++ if (err != AVERROR(EINVAL)) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to enum framesizes: %s", av_err2str(err)); ++ return err; ++ } ++ av_log(avctx, AV_LOG_WARNING, "Failed to find Size=%dx%d, fmt=%s in %u frame size enums\n", ++ w, h, av_fourcc2str(fcc), i); ++ return err; ++ } ++ ++ switch (fs.type) { ++ case V4L2_FRMSIZE_TYPE_DISCRETE: ++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Discrete: %dx%d\n", __func__, i, ++ fs.discrete.width,fs.discrete.height); ++ if (w == fs.discrete.width && h == fs.discrete.height) ++ return 0; ++ break; ++ case V4L2_FRMSIZE_TYPE_STEPWISE: ++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Stepwise: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, ++ fs.stepwise.min_width, fs.stepwise.min_height, ++ fs.stepwise.max_width, fs.stepwise.max_height, ++ fs.stepwise.step_width,fs.stepwise.step_height); ++ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && ++ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height && ++ (w - fs.stepwise.min_width) % fs.stepwise.step_width == 0 && ++ (h - fs.stepwise.min_height) % fs.stepwise.step_height == 0) ++ return 0; ++ break; ++ case V4L2_FRMSIZE_TYPE_CONTINUOUS: ++ av_log(avctx, AV_LOG_TRACE, "%s[%d]: Continuous: Min: %dx%d Max: %dx%d, Step: %dx%d\n", __func__, i, ++ fs.stepwise.min_width, fs.stepwise.min_height, ++ fs.stepwise.max_width, fs.stepwise.max_height, ++ fs.stepwise.step_width,fs.stepwise.step_height); ++ if (w >= fs.stepwise.min_width && w <= fs.stepwise.max_width && ++ h >= fs.stepwise.min_height && h <= fs.stepwise.max_height) ++ return 0; ++ break; ++ default: ++ av_log(avctx, AV_LOG_ERROR, "Unexpected framesize enum: %d", fs.type); ++ return AVERROR(EINVAL); ++ } ++ } ++} ++ ++static int ++get_quirks(AVCodecContext * const avctx, V4L2m2mContext * const s) ++{ ++ struct v4l2_capability cap; ++ ++ memset(&cap, 0, sizeof(cap)); ++ while (ioctl(s->fd, VIDIOC_QUERYCAP, &cap) != 0) { ++ int err = errno; ++ if (err == EINTR) ++ continue; ++ av_log(avctx, AV_LOG_ERROR, "V4L2: Failed to get capabilities: %s\n", strerror(err)); ++ return AVERROR(err); ++ } ++ ++ // Could be made table driven if we have a few more but right now there ++ // seems no point ++ ++ // Meson (amlogic) always gives a resolution changed event after output ++ // streamon and userspace must (re)allocate capture buffers and streamon ++ // capture to clear the event even if the capture buffers were the right ++ // size in the first place. ++ if (strcmp(cap.driver, "meson-vdec") == 0) ++ s->quirks |= FF_V4L2_QUIRK_REINIT_ALWAYS | FF_V4L2_QUIRK_ENUM_FRAMESIZES_BROKEN; ++ ++ av_log(avctx, AV_LOG_DEBUG, "Driver '%s': Quirks=%#x\n", cap.driver, s->quirks); ++ return 0; ++} ++ ++// This heuristic is for H264 but use for everything ++static uint32_t max_coded_size(const AVCodecContext * const avctx) ++{ ++ uint32_t wxh = avctx->coded_width * avctx->coded_height; ++ uint32_t size; ++ ++ size = wxh * 3 / 2; ++ // H.264 Annex A table A-1 gives minCR which is either 2 or 4 ++ // unfortunately that doesn't yield an actually useful limit ++ // and it should be noted that frame 0 is special cased to allow ++ // a bigger number which really isn't helpful for us. So just pick ++ // frame_size / 2 ++ size /= 2; ++ // Add 64k to allow for any overheads and/or encoder hopefulness ++ // with small WxH ++ return size + (1 << 16); ++} ++ ++static void ++parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) ++{ ++ s->reorder_size = 0; ++ ++ if (!avctx->extradata || !avctx->extradata_size) ++ return; ++ ++ switch (avctx->codec_id) { ++#if CONFIG_H264_DECODER ++ case AV_CODEC_ID_H264: ++ { ++ H264ParamSets ps; ++ int is_avc = 0; ++ int nal_length_size = 0; ++ int ret; ++ ++ memset(&ps, 0, sizeof(ps)); ++ ++ ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size, ++ &ps, &is_avc, &nal_length_size, ++ avctx->err_recognition, avctx); ++ if (ret > 0) { ++ const SPS * sps = NULL; ++ unsigned int i; ++ for (i = 0; i != MAX_SPS_COUNT; ++i) { ++ if (ps.sps_list[i]) { ++ sps = ps.sps_list[i]; ++ break; ++ } ++ } ++ if (sps) { ++ avctx->profile = ff_h264_get_profile(sps); ++ avctx->level = sps->level_idc; ++ s->reorder_size = sps->num_reorder_frames; ++ } ++ } ++ ff_h264_ps_uninit(&ps); ++ break; ++ } ++#endif ++#if CONFIG_HEVC_DECODER ++ case AV_CODEC_ID_HEVC: ++ { ++ HEVCParamSets ps; ++ HEVCSEI sei; ++ int is_nalff = 0; ++ int nal_length_size = 0; ++ int ret; ++ ++ memset(&ps, 0, sizeof(ps)); ++ memset(&sei, 0, sizeof(sei)); ++ ++ ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, ++ &ps, &sei, &is_nalff, &nal_length_size, ++ avctx->err_recognition, 0, avctx); ++ if (ret > 0) { ++ const HEVCSPS * sps = NULL; ++ unsigned int i; ++ for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) { ++ if (ps.sps_list[i]) { ++ sps = ps.sps_list[i]; ++ break; ++ } ++ } ++ if (sps) { ++ avctx->profile = sps->ptl.general_ptl.profile_idc; ++ avctx->level = sps->ptl.general_ptl.level_idc; ++ s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering; ++ } ++ } ++ ff_hevc_ps_uninit(&ps); ++ ff_hevc_reset_sei(&sei); ++ break; ++ } ++#endif ++ default: ++ break; ++ } ++} ++ ++static int ++choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) ++{ ++ const V4L2m2mPriv * const priv = avctx->priv_data; ++ unsigned int fmts_n; ++ uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n); ++ enum AVPixelFormat *fmts2 = NULL; ++ enum AVPixelFormat gf_pix_fmt; ++ unsigned int i; ++ unsigned int n = 0; ++ unsigned int pref_n = 1; ++ int rv = AVERROR(ENOENT); ++ ++ if (!fmts) ++ return AVERROR(ENOENT); ++ ++ if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) { ++ rv = AVERROR(ENOMEM); ++ goto error; ++ } ++ ++ // Filter for formats that are supported by ffmpeg and ++ // can accomodate the stream size ++ fmts2[n++] = AV_PIX_FMT_DRM_PRIME; ++ for (i = 0; i != fmts_n; ++i) { ++ const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO); ++ av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f)); ++ if (f == AV_PIX_FMT_NONE) ++ continue; ++ ++ if (check_size(avctx, s, fmts[i]) != 0) ++ continue; ++ ++ if (f == priv->pix_fmt) ++ pref_n = n; ++ fmts2[n++] = f; ++ } ++ ++ if (n < 2) { ++ av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__); ++ goto error; ++ } ++ ++ if (n != 2) { ++ // ffmpeg.c really only expects one s/w format. It thinks that the ++ // last format in the list is the s/w format of the h/w format but ++ // also chooses the first non-h/w format as the preferred s/w format. ++ // The only way of reconciling this is to dup our preferred format into ++ // both last & first place :-( ++ const enum AVPixelFormat t = fmts2[pref_n]; ++ fmts2[pref_n] = fmts2[1]; ++ fmts2[1] = t; ++ fmts2[n++] = t; ++ } ++ ++ fmts2[n] = AV_PIX_FMT_NONE; ++ ++ gf_pix_fmt = ff_get_format(avctx, fmts2); ++ av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", ++ avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), ++ avctx->coded_width, avctx->coded_height, ++ gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); ++ ++ if (gf_pix_fmt == AV_PIX_FMT_NONE) ++ goto error; ++ ++ if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { ++ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; ++ s->capture.av_pix_fmt = avctx->sw_pix_fmt; ++ s->output_drm = 1; ++ } ++ else { ++ avctx->pix_fmt = gf_pix_fmt; ++ s->capture.av_pix_fmt = gf_pix_fmt; ++ s->output_drm = 0; ++ } ++ ++ // Get format converts capture.av_pix_fmt back into a V4L2 format in the context ++ if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0) ++ goto error; ++ rv = ff_v4l2_context_set_format(&s->capture); ++ ++error: ++ av_free(fmts2); ++ av_free(fmts); ++ return rv; ++} ++ + static av_cold int v4l2_decode_init(AVCodecContext *avctx) + { + V4L2Context *capture, *output; +@@ -185,10 +1167,31 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) + V4L2m2mPriv *priv = avctx->priv_data; + int ret; + ++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ ++#if FF_API_FRAME_PKT ++FF_DISABLE_DEPRECATION_WARNINGS ++ if (avctx->codec_id == AV_CODEC_ID_H264) { ++ if (avctx->ticks_per_frame == 1) { ++ if(avctx->time_base.den < INT_MAX/2) { ++ avctx->time_base.den *= 2; ++ } else ++ avctx->time_base.num /= 2; ++ } ++ avctx->ticks_per_frame = 2; ++ } ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ + ret = ff_v4l2_m2m_create_context(priv, &s); + if (ret < 0) + return ret; + ++ parse_extradata(avctx, s); ++ ++ xlat_init(&s->xlat); ++ pts_stats_init(&s->pts_stat, avctx, "decoder"); ++ + capture = &s->capture; + output = &s->output; + +@@ -196,14 +1199,45 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) + * by the v4l2 driver; this event will trigger a full pipeline reconfig and + * the proper values will be retrieved from the kernel driver. + */ +- output->height = capture->height = avctx->coded_height; +- output->width = capture->width = avctx->coded_width; ++// output->height = capture->height = avctx->coded_height; ++// output->width = capture->width = avctx->coded_width; ++ output->height = capture->height = 0; ++ output->width = capture->width = 0; + + output->av_codec_id = avctx->codec_id; + output->av_pix_fmt = AV_PIX_FMT_NONE; ++ output->min_buf_size = max_coded_size(avctx); + + capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; + capture->av_pix_fmt = avctx->pix_fmt; ++ capture->min_buf_size = 0; ++ ++ capture->av_pix_fmt = AV_PIX_FMT_NONE; ++ s->output_drm = 0; ++ ++ s->db_ctl = NULL; ++ if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) { ++ if (strcmp(priv->dmabuf_alloc, "cma") == 0) ++ s->db_ctl = dmabufs_ctl_new(); ++ else { ++ av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc); ++ return AVERROR(EINVAL); ++ } ++ if (!s->db_ctl) { ++ av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc); ++ return AVERROR(ENOMEM); ++ } ++ } ++ ++ s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); ++ if (!s->device_ref) { ++ ret = AVERROR(ENOMEM); ++ return ret; ++ } ++ ++ ret = av_hwdevice_ctx_init(s->device_ref); ++ if (ret < 0) ++ return ret; + + s->avctx = avctx; + ret = ff_v4l2_m2m_codec_init(priv); +@@ -212,12 +1246,90 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) + return ret; + } + +- return v4l2_prepare_decoder(s); ++ if (avctx->extradata && ++ (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret)); ++#if DUMP_FAILED_EXTRADATA ++ log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size); ++#endif ++ return ret; ++ } ++ ++ if ((ret = get_quirks(avctx, s)) != 0) ++ return ret; ++ ++ if ((ret = check_profile(avctx, s)) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile); ++ return ret; ++ } ++ ++ // Size check done as part of format filtering ++ if ((ret = choose_capture_format(avctx, s)) != 0) ++ return ret; ++ ++ if ((ret = v4l2_prepare_decoder(s)) < 0) ++ return ret; ++ ++ return 0; + } + + static av_cold int v4l2_decode_close(AVCodecContext *avctx) + { +- return ff_v4l2_m2m_codec_end(avctx->priv_data); ++ int rv; ++ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ rv = ff_v4l2_m2m_codec_end(avctx->priv_data); ++ av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv); ++ return rv; ++} ++ ++static void v4l2_decode_flush(AVCodecContext *avctx) ++{ ++ // An alternatve and more drastic form of flush is to simply do this: ++ // v4l2_decode_close(avctx); ++ // v4l2_decode_init(avctx); ++ // The downside is that this keeps a decoder open until all the frames ++ // associated with it have been returned. This is a bit wasteful on ++ // possibly limited h/w resources and fails on a Pi for this reason unless ++ // more GPU mem is allocated than is the default. ++ ++ V4L2m2mPriv * const priv = avctx->priv_data; ++ V4L2m2mContext * const s = priv->context; ++ V4L2Context * const output = &s->output; ++ V4L2Context * const capture = &s->capture; ++ ++ av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon); ++ ++ // Reflushing everything is benign, quick and avoids having to worry about ++ // states like EOS processing so don't try to optimize out (having got it ++ // wrong once) ++ ++ ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF); ++ ++ // Clear any buffered input packet ++ av_packet_unref(&s->buf_pkt); ++ ++ // Clear a pending EOS ++ if (ff_v4l2_ctx_eos(capture)) { ++ // Arguably we could delay this but this is easy and doesn't require ++ // thought or extra vars ++ ff_v4l2_context_set_status(capture, VIDIOC_STREAMOFF); ++ ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); ++ } ++ ++ // V4L2 makes no guarantees about whether decoded frames are flushed or not ++ // so mark all frames we are tracking to be discarded if they appear ++ xlat_flush(&s->xlat); ++ ++ // resend extradata ++ s->extdata_sent = 0; ++ // clear status vars ++ s->running = 0; ++ s->draining = 0; ++ output->done = 0; ++ capture->done = 0; ++ ++ // Stream on will occur when we actually submit a new frame ++ av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__); + } + + #define OFFSET(x) offsetof(V4L2m2mPriv, x) +@@ -227,9 +1339,16 @@ static const AVOption options[] = { + V4L_M2M_DEFAULT_OPTS, + { "num_capture_buffers", "Number of buffers in the capture context", + OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS }, ++ { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS }, ++ { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, + { NULL}, + }; + ++static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = { ++ HW_CONFIG_INTERNAL(DRM_PRIME), ++ NULL ++}; ++ + #define M2MDEC_CLASS(NAME) \ + static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \ + .class_name = #NAME "_v4l2m2m_decoder", \ +@@ -250,11 +1369,17 @@ static const AVOption options[] = { + .init = v4l2_decode_init, \ + FF_CODEC_RECEIVE_FRAME_CB(v4l2_receive_frame), \ + .close = v4l2_decode_close, \ ++ .flush = v4l2_decode_flush, \ + .bsfs = bsf_name, \ + .p.capabilities = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \ + .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \ + FF_CODEC_CAP_INIT_CLEANUP, \ + .p.wrapper_name = "v4l2m2m", \ ++ .p.pix_fmts = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \ ++ AV_PIX_FMT_NV12, \ ++ AV_PIX_FMT_YUV420P, \ ++ AV_PIX_FMT_NONE}, \ ++ .hw_configs = v4l2_m2m_hw_configs, \ + } + + M2MDEC(h264, "H.264", AV_CODEC_ID_H264, "h264_mp4toannexb"); +diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c +index 5770e87ea10b..6b00e113fa57 100644 +--- a/libavcodec/v4l2_m2m_enc.c ++++ b/libavcodec/v4l2_m2m_enc.c +@@ -21,13 +21,17 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include "config.h" ++ + #include + #include + #include ++ + #include "encode.h" + #include "libavcodec/avcodec.h" + #include "libavutil/pixdesc.h" + #include "libavutil/pixfmt.h" ++#include "libavutil/mem.h" + #include "libavutil/opt.h" + #include "codec_internal.h" + #include "profiles.h" +@@ -38,6 +42,39 @@ + #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x + #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x + ++#if CONFIG_LIBDRM ++#include ++ ++// P030 should be defined in drm_fourcc.h and hopefully will be sometime ++// in the future but until then... ++#ifndef DRM_FORMAT_P030 ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') ++#endif ++ ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++ ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++ ++#endif ++ ++#ifndef V4L2_CID_CODEC_BASE ++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE ++#endif ++ ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in videodev2.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ + static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den) + { + struct v4l2_streamparm parm = { 0 }; +@@ -148,15 +185,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p) + static int v4l2_check_b_frame_support(V4L2m2mContext *s) + { + if (s->avctx->max_b_frames) +- av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n"); ++ av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames); + +- v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0); ++ v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1); + v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0); + if (s->avctx->max_b_frames == 0) + return 0; + + avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding"); +- + return AVERROR_PATCHWELCOME; + } + +@@ -271,17 +307,212 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s) + return 0; + } + ++static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame) ++{ ++#if !CONFIG_LIBDRM ++ return AVERROR_OPTION_NOT_FOUND; ++#else ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ const uint32_t drm_fmt = src->layers[0].format; ++ // Treat INVALID as LINEAR ++ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? ++ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; ++ uint32_t pix_fmt = 0; ++ uint32_t w = 0; ++ uint32_t h = 0; ++ uint32_t bpl = src->layers[0].planes[0].pitch; ++ ++ // We really don't expect multiple layers ++ // All formats that we currently cope with are single object ++ ++ if (src->nb_layers != 1 || src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ switch (drm_fmt) { ++ case DRM_FORMAT_YUV420: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 3) ++ break; ++ pix_fmt = V4L2_PIX_FMT_YUV420; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ break; ++ ++ case DRM_FORMAT_NV12: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_COL128; ++ w = bpl; ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++ break; ++ ++ case DRM_FORMAT_P030: ++ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; ++ w = bpl / 2; // Matching lie to how we construct this ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!pix_fmt) ++ return AVERROR(EINVAL); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { ++ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->plane_fmt[0].bytesperline = bpl; ++ pix->num_planes = 1; ++ } ++ else { ++ struct v4l2_pix_format *const pix = &format->fmt.pix; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->bytesperline = bpl; ++ } ++ ++ return 0; ++#endif ++} ++ ++// Do we have similar enough formats to be usable? ++static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b) ++{ ++ if (a->type != b->type) ++ return 0; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) { ++ const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp; ++ const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp; ++ unsigned int i; ++ if (pa->pixelformat != pb->pixelformat || ++ pa->num_planes != pb->num_planes) ++ return 0; ++ for (i = 0; i != pa->num_planes; ++i) { ++ if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline) ++ return 0; ++ } ++ } ++ else { ++ const struct v4l2_pix_format *const pa = &a->fmt.pix; ++ const struct v4l2_pix_format *const pb = &b->fmt.pix; ++ if (pa->pixelformat != pb->pixelformat || ++ pa->bytesperline != pb->bytesperline) ++ return 0; ++ } ++ return 1; ++} ++ ++static inline int q_full(const V4L2Context *const output) ++{ ++ return ff_v4l2_context_q_count(output) == output->num_buffers; ++} ++ + static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) + { + V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; + V4L2Context *const output = &s->output; ++ int rv; ++ const int needs_slot = q_full(output); ++ ++ av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot); ++ ++ // Signal EOF if needed (doesn't need q slot) ++ if (!frame) { ++ av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__); ++ return ff_v4l2_context_enqueue_frame(output, frame); ++ } ++ ++ if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) { ++ // We should be able to return AVERROR(EAGAIN) to indicate buffer ++ // exhaustion, but ffmpeg currently treats that as fatal. ++ av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv)); ++ return rv; ++ } ++ ++ if (s->input_drm && !output->streamon) { ++ struct v4l2_format req_format = {.type = output->format.type}; ++ ++ // Set format when we first get a buffer ++ if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n"); ++ return rv; ++ } ++ ++ ff_v4l2_context_release(output); ++ ++ output->format = req_format; ++ ++ if ((rv = ff_v4l2_context_set_format(output)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n"); ++ return rv; ++ } ++ ++ if (!fmt_eq(&req_format, &output->format)) { ++ av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ output->selection.top = frame->crop_top; ++ output->selection.left = frame->crop_left; ++ output->selection.width = av_frame_cropped_width(frame); ++ output->selection.height = av_frame_cropped_height(frame); ++ ++ if ((rv = ff_v4l2_context_init(output)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n"); ++ return rv; ++ } ++ ++ { ++ struct v4l2_selection selection = { ++ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT, ++ .target = V4L2_SEL_TGT_CROP, ++ .r = output->selection ++ }; ++ if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n", ++ selection.r.width, selection.r.height, selection.r.left, selection.r.top, ++ av_err2str(AVERROR(errno))); ++ } ++ av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n", ++ selection.r.width, selection.r.height, selection.r.left, selection.r.top); ++ } ++ } + + #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME +- if (frame && frame->pict_type == AV_PICTURE_TYPE_I) ++ if (frame->pict_type == AV_PICTURE_TYPE_I) + v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1); #endif +- return ff_v4l2_context_enqueue_frame(output, frame); ++ rv = ff_v4l2_context_enqueue_frame(output, frame); ++ if (rv) { ++ av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv)); ++ } ++ ++ return rv; + } + + static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) +@@ -292,6 +523,11 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) + AVFrame *frame = s->frame; + int ret; + ++ av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__, ++ ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture)); ++ ++ ff_v4l2_dq_all(output, 0); ++ + if (s->draining) + goto dequeue; + +@@ -328,7 +564,115 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) + } + + dequeue: +- return ff_v4l2_context_dequeue_packet(capture, avpkt); ++ // Dequeue a frame ++ for (;;) { ++ int t = q_full(output) ? -1 : s->draining ? 300 : 0; ++ int rv2; ++ ++ // If output is full wait for either a packet or output to become not full ++ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t); ++ ++ // If output was full retry packet dequeue ++ t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300; ++ rv2 = ff_v4l2_dq_all(output, t); ++ if (t == 0 || rv2 != 0) ++ break; ++ } ++ if (ret) ++ return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret; ++ ++ if (capture->first_buf == 1) { ++ uint8_t * data; ++ const int len = avpkt->size; ++ ++ // 1st buffer after streamon should be SPS/PPS ++ capture->first_buf = 2; ++ ++ // Clear both possible stores so there is no chance of confusion ++ av_freep(&s->extdata_data); ++ s->extdata_size = 0; ++ av_freep(&avctx->extradata); ++ avctx->extradata_size = 0; ++ ++ if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) ++ goto fail_no_mem; ++ ++ memcpy(data, avpkt->data, len); ++ av_packet_unref(avpkt); ++ ++ // We need to copy the header, but keep local if not global ++ if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) { ++ avctx->extradata = data; ++ avctx->extradata_size = len; ++ } ++ else { ++ s->extdata_data = data; ++ s->extdata_size = len; ++ } ++ ++ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0); ++ ff_v4l2_dq_all(output, 0); ++ if (ret) ++ return ret; ++ } ++ ++ // First frame must be key so mark as such even if encoder forgot ++ if (capture->first_buf == 2) { ++ avpkt->flags |= AV_PKT_FLAG_KEY; ++ ++ // Add any extradata to the 1st packet we emit as we cannot create it at init ++ if (avctx->extradata_size > 0 && avctx->extradata) { ++ void * const side = av_packet_new_side_data(avpkt, ++ AV_PKT_DATA_NEW_EXTRADATA, ++ avctx->extradata_size); ++ if (!side) ++ goto fail_no_mem; ++ ++ memcpy(side, avctx->extradata, avctx->extradata_size); ++ } ++ } ++ ++ // Add SPS/PPS to the start of every key frame if non-global headers ++ if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) { ++ const size_t newlen = s->extdata_size + avpkt->size; ++ AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE); ++ ++ if (buf == NULL) ++ goto fail_no_mem; ++ ++ memcpy(buf->data, s->extdata_data, s->extdata_size); ++ memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size); ++ ++ av_buffer_unref(&avpkt->buf); ++ avpkt->buf = buf; ++ avpkt->data = buf->data; ++ avpkt->size = newlen; ++ } ++ else if (ff_v4l2_context_q_count(capture) < 2) { ++ // Avoid running out of capture buffers ++ // In most cases the buffers will be returned quickly in which case ++ // we don't copy and can use the v4l2 buffers directly but sometimes ++ // ffmpeg seems to hold onto all of them for a long time (.mkv ++ // creation?) so avoid deadlock in those cases. ++ AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE); ++ if (buf == NULL) ++ goto fail_no_mem; ++ ++ memcpy(buf->data, avpkt->data, avpkt->size); ++ av_buffer_unref(&avpkt->buf); // Will recycle the V4L2 buffer ++ ++ avpkt->buf = buf; ++ avpkt->data = buf->data; ++ } ++ ++ capture->first_buf = 0; ++ return 0; ++ ++fail_no_mem: ++ av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n"); ++ ret = AVERROR(ENOMEM); ++ av_packet_unref(avpkt); ++ return ret; + } + + static av_cold int v4l2_encode_init(AVCodecContext *avctx) +@@ -340,6 +684,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + uint32_t v4l2_fmt_output; + int ret; + ++ av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt); ++ + ret = ff_v4l2_m2m_create_context(priv, &s); + if (ret < 0) + return ret; +@@ -347,13 +693,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + capture = &s->capture; + output = &s->output; + ++ s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME); ++ + /* common settings output/capture */ + output->height = capture->height = avctx->height; + output->width = capture->width = avctx->width; + + /* output context */ + output->av_codec_id = AV_CODEC_ID_RAWVIDEO; +- output->av_pix_fmt = avctx->pix_fmt; ++ output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt : ++ avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt : ++ AV_PIX_FMT_YUV420P; + + /* capture context */ + capture->av_codec_id = avctx->codec_id; +@@ -372,7 +722,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + v4l2_fmt_output = output->format.fmt.pix.pixelformat; + + pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO); +- if (pix_fmt_output != avctx->pix_fmt) { ++ if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output); + av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name); + return AVERROR(EINVAL); +@@ -390,9 +740,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx) + #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM + + #define V4L_M2M_CAPTURE_OPTS \ +- V4L_M2M_DEFAULT_OPTS,\ ++ { "num_output_buffers", "Number of buffers in the output context",\ ++ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\ + { "num_capture_buffers", "Number of buffers in the capture context", \ +- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS } ++ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS } + + static const AVOption mpeg4_options[] = { + V4L_M2M_CAPTURE_OPTS, +diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c +new file mode 100644 +index 000000000000..93c56ba13f85 +--- /dev/null ++++ b/libavcodec/v4l2_req_decode_q.c +@@ -0,0 +1,108 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++ ++#include "v4l2_req_decode_q.h" ++ ++int decode_q_in_q(const req_decode_ent * const d) ++{ ++ return d->in_q; ++} ++ ++void decode_q_add(req_decode_q * const q, req_decode_ent * const d) ++{ ++ pthread_mutex_lock(&q->q_lock); ++ if (!q->head) { ++ q->head = d; ++ q->tail = d; ++ d->prev = NULL; ++ } ++ else { ++ q->tail->next = d; ++ d->prev = q->tail; ++ q->tail = d; ++ } ++ d->next = NULL; ++ d->in_q = 1; ++ pthread_mutex_unlock(&q->q_lock); ++} ++ ++// Remove entry from Q - if head wake-up anything that was waiting ++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d) ++{ ++ int try_signal = 0; ++ ++ if (!d->in_q) ++ return; ++ ++ pthread_mutex_lock(&q->q_lock); ++ if (d->prev) ++ d->prev->next = d->next; ++ else { ++ try_signal = 1; // Only need to signal if we were head ++ q->head = d->next; ++ } ++ ++ if (d->next) ++ d->next->prev = d->prev; ++ else ++ q->tail = d->prev; ++ ++ // Not strictly needed but makes debug easier ++ d->next = NULL; ++ d->prev = NULL; ++ d->in_q = 0; ++ pthread_mutex_unlock(&q->q_lock); ++ ++ if (try_signal) ++ pthread_cond_broadcast(&q->q_cond); ++} ++ ++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d) ++{ ++ pthread_mutex_lock(&q->q_lock); ++ ++ while (q->head != d) ++ pthread_cond_wait(&q->q_cond, &q->q_lock); ++ ++ pthread_mutex_unlock(&q->q_lock); ++} ++ ++void decode_q_uninit(req_decode_q * const q) ++{ ++ pthread_mutex_destroy(&q->q_lock); ++ pthread_cond_destroy(&q->q_cond); ++} ++ ++void decode_q_init(req_decode_q * const q) ++{ ++ memset(q, 0, sizeof(*q)); ++ pthread_mutex_init(&q->q_lock, NULL); ++ pthread_cond_init(&q->q_cond, NULL); ++} ++ ++ +diff --git a/libavcodec/v4l2_req_decode_q.h b/libavcodec/v4l2_req_decode_q.h +new file mode 100644 +index 000000000000..0ff8bbe88207 +--- /dev/null ++++ b/libavcodec/v4l2_req_decode_q.h +@@ -0,0 +1,51 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H ++#define AVCODEC_V4L2_REQ_DECODE_Q_H ++ ++#include ++ ++typedef struct req_decode_ent { ++ struct req_decode_ent * next; ++ struct req_decode_ent * prev; ++ int in_q; ++} req_decode_ent; ++ ++typedef struct req_decode_q { ++ pthread_mutex_t q_lock; ++ pthread_cond_t q_cond; ++ req_decode_ent * head; ++ req_decode_ent * tail; ++} req_decode_q; ++ ++int decode_q_in_q(const req_decode_ent * const d); ++void decode_q_add(req_decode_q * const q, req_decode_ent * const d); ++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d); ++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d); ++void decode_q_uninit(req_decode_q * const q); ++void decode_q_init(req_decode_q * const q); ++ ++#endif ++ +diff --git a/libavcodec/v4l2_req_devscan.c b/libavcodec/v4l2_req_devscan.c +new file mode 100644 +index 000000000000..99a8c19710bc +--- /dev/null ++++ b/libavcodec/v4l2_req_devscan.c +@@ -0,0 +1,475 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++#include ++ ++#include "v4l2_req_devscan.h" ++#include "v4l2_req_utils.h" ++ ++struct decdev { ++ enum v4l2_buf_type src_type; ++ uint32_t src_fmt_v4l2; ++ const char * vname; ++ const char * mname; ++}; ++ ++struct devscan { ++ struct decdev env; ++ unsigned int dev_size; ++ unsigned int dev_count; ++ struct decdev *devs; ++}; ++ ++static int video_src_pixfmt_supported(uint32_t fmt) ++{ ++ return 1; ++} ++ ++static void v4l2_setup_format(struct v4l2_format *format, unsigned int type, ++ unsigned int width, unsigned int height, ++ unsigned int pixelformat) ++{ ++ unsigned int sizeimage; ++ ++ memset(format, 0, sizeof(*format)); ++ format->type = type; ++ ++ sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(type)) { ++ format->fmt.pix_mp.width = width; ++ format->fmt.pix_mp.height = height; ++ format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage; ++ format->fmt.pix_mp.pixelformat = pixelformat; ++ } else { ++ format->fmt.pix.width = width; ++ format->fmt.pix.height = height; ++ format->fmt.pix.sizeimage = sizeimage; ++ format->fmt.pix.pixelformat = pixelformat; ++ } ++} ++ ++static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat, ++ unsigned int width, unsigned int height) ++{ ++ struct v4l2_format format; ++ ++ v4l2_setup_format(&format, type, width, height, pixelformat); ++ ++ return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0; ++} ++ ++static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities) ++{ ++ struct v4l2_capability capability = { 0 }; ++ int rc; ++ ++ rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability); ++ if (rc < 0) ++ return -errno; ++ ++ if (capabilities != NULL) { ++ if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0) ++ *capabilities = capability.device_caps; ++ else ++ *capabilities = capability.capabilities; ++ } ++ ++ return 0; ++} ++ ++static int devscan_add(struct devscan *const scan, ++ enum v4l2_buf_type src_type, ++ uint32_t src_fmt_v4l2, ++ const char * vname, ++ const char * mname) ++{ ++ struct decdev *d; ++ ++ if (scan->dev_size <= scan->dev_count) { ++ unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2; ++ d = realloc(scan->devs, n * sizeof(*d)); ++ if (!d) ++ return -ENOMEM; ++ scan->devs = d; ++ scan->dev_size = n; ++ } ++ ++ d = scan->devs + scan->dev_count; ++ d->src_type = src_type; ++ d->src_fmt_v4l2 = src_fmt_v4l2; ++ d->vname = strdup(vname); ++ if (!d->vname) ++ return -ENOMEM; ++ d->mname = strdup(mname); ++ if (!d->mname) { ++ free((char *)d->vname); ++ return -ENOMEM; ++ } ++ ++scan->dev_count; ++ return 0; ++} ++ ++void devscan_delete(struct devscan **const pScan) ++{ ++ unsigned int i; ++ struct devscan * const scan = *pScan; ++ ++ if (!scan) ++ return; ++ *pScan = NULL; ++ ++ for (i = 0; i < scan->dev_count; ++i) { ++ free((char*)scan->devs[i].mname); ++ free((char*)scan->devs[i].vname); ++ } ++ free(scan->devs); ++ free(scan); ++} ++ ++#define REQ_BUF_CAPS (\ ++ V4L2_BUF_CAP_SUPPORTS_DMABUF |\ ++ V4L2_BUF_CAP_SUPPORTS_REQUESTS |\ ++ V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF) ++ ++static void probe_formats(void * const dc, ++ struct devscan *const scan, ++ const int fd, ++ const unsigned int type_v4l2, ++ const char *const mpath, ++ const char *const vpath) ++{ ++ unsigned int i; ++ for (i = 0;; ++i) { ++ struct v4l2_fmtdesc fmtdesc = { ++ .index = i, ++ .type = type_v4l2 ++ }; ++ struct v4l2_requestbuffers rbufs = { ++ .count = 0, ++ .type = type_v4l2, ++ .memory = V4L2_MEMORY_MMAP ++ }; ++ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { ++ if (errno == EINTR) ++ continue; ++ if (errno != EINVAL) ++ request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2); ++ return; ++ } ++ if (!video_src_pixfmt_supported(fmtdesc.pixelformat)) ++ continue; ++ ++ if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) { ++ request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat); ++ continue; ++ } ++ ++ while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) { ++ if (errno != EINTR) { ++ request_debug(dc, "%s: Reqbufs failed\n", vpath); ++ continue; ++ } ++ } ++ ++ if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) { ++ request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities); ++ continue; ++ } ++ ++ request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n", ++ mpath, vpath, fmtdesc.pixelformat, type_v4l2); ++ devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath); ++ } ++} ++ ++ ++static int probe_video_device(void * const dc, ++ struct udev_device *const device, ++ struct devscan *const scan, ++ const char *const mpath) ++{ ++ int ret; ++ unsigned int capabilities = 0; ++ int video_fd = -1; ++ ++ const char *path = udev_device_get_devnode(device); ++ if (!path) { ++ request_err(dc, "%s: get video device devnode failed\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ video_fd = open(path, O_RDWR, 0); ++ if (video_fd == -1) { ++ ret = -errno; ++ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno); ++ goto fail; ++ } ++ ++ ret = v4l2_query_capabilities(video_fd, &capabilities); ++ if (ret < 0) { ++ request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities); ++ ++ if (!(capabilities & V4L2_CAP_STREAMING)) { ++ request_debug(dc, "%s: missing required streaming capability\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) { ++ request_debug(dc, "%s: missing required mem2mem capability\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ /* Should check capture formats too... */ ++ if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0) ++ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path); ++ if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) ++ probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path); ++ ++ close(video_fd); ++ return 0; ++ ++fail: ++ if (video_fd >= 0) ++ close(video_fd); ++ return ret; ++} ++ ++static int probe_media_device(void * const dc, ++ struct udev_device *const device, ++ struct devscan *const scan) ++{ ++ int ret; ++ int rv; ++ struct media_device_info device_info = { 0 }; ++ struct media_v2_topology topology = { 0 }; ++ struct media_v2_interface *interfaces = NULL; ++ struct udev *udev = udev_device_get_udev(device); ++ struct udev_device *video_device; ++ dev_t devnum; ++ int media_fd = -1; ++ ++ const char *path = udev_device_get_devnode(device); ++ if (!path) { ++ request_err(dc, "%s: get media device devnode failed\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ media_fd = open(path, O_RDWR, 0); ++ if (media_fd < 0) { ++ ret = -errno; ++ request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info); ++ if (rv < 0) { ++ ret = -errno; ++ request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); ++ if (rv < 0) { ++ ret = -errno; ++ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ if (topology.num_interfaces <= 0) { ++ request_err(dc, "%s: media device has no interfaces\n", __func__); ++ ret = -EINVAL; ++ goto fail; ++ } ++ ++ interfaces = calloc(topology.num_interfaces, sizeof(*interfaces)); ++ if (!interfaces) { ++ request_err(dc, "%s: allocating media interface struct failed\n", __func__); ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ topology.ptr_interfaces = (__u64)(uintptr_t)interfaces; ++ rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology); ++ if (rv < 0) { ++ ret = -errno; ++ request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret); ++ goto fail; ++ } ++ ++ for (int i = 0; i < topology.num_interfaces; i++) { ++ if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO) ++ continue; ++ ++ devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor); ++ video_device = udev_device_new_from_devnum(udev, 'c', devnum); ++ if (!video_device) { ++ ret = -errno; ++ request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device); ++ continue; ++ } ++ ++ ret = probe_video_device(dc, video_device, scan, path); ++ udev_device_unref(video_device); ++ ++ if (ret != 0) ++ goto fail; ++ } ++ ++fail: ++ free(interfaces); ++ if (media_fd != -1) ++ close(media_fd); ++ return ret; ++} ++ ++const char *decdev_media_path(const struct decdev *const dev) ++{ ++ return !dev ? NULL : dev->mname; ++} ++ ++const char *decdev_video_path(const struct decdev *const dev) ++{ ++ return !dev ? NULL : dev->vname; ++} ++ ++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev) ++{ ++ return !dev ? 0 : dev->src_type; ++} ++ ++uint32_t decdev_src_pixelformat(const struct decdev *const dev) ++{ ++ return !dev ? 0 : dev->src_fmt_v4l2; ++} ++ ++ ++const struct decdev *devscan_find(struct devscan *const scan, ++ const uint32_t src_fmt_v4l2) ++{ ++ unsigned int i; ++ ++ if (scan->env.mname && scan->env.vname) ++ return &scan->env; ++ ++ if (!src_fmt_v4l2) ++ return scan->dev_count ? scan->devs + 0 : NULL; ++ ++ for (i = 0; i != scan->dev_count; ++i) { ++ if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2) ++ return scan->devs + i; ++ } ++ return NULL; ++} ++ ++int devscan_build(void * const dc, struct devscan **pscan) ++{ ++ int ret; ++ struct udev *udev; ++ struct udev_enumerate *enumerate; ++ struct udev_list_entry *devices; ++ struct udev_list_entry *entry; ++ struct udev_device *device; ++ struct devscan * scan; ++ ++ *pscan = NULL; ++ ++ scan = calloc(1, sizeof(*scan)); ++ if (!scan) { ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH"); ++ scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH"); ++ if (scan->env.mname && scan->env.vname) { ++ request_info(dc, "Media/video device env overrides found: %s,%s\n", ++ scan->env.mname, scan->env.vname); ++ *pscan = scan; ++ return 0; ++ } ++ ++ udev = udev_new(); ++ if (!udev) { ++ request_err(dc, "%s: allocating udev context failed\n", __func__); ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ enumerate = udev_enumerate_new(udev); ++ if (!enumerate) { ++ request_err(dc, "%s: allocating udev enumerator failed\n", __func__); ++ ret = -ENOMEM; ++ goto fail; ++ } ++ ++ udev_enumerate_add_match_subsystem(enumerate, "media"); ++ udev_enumerate_scan_devices(enumerate); ++ ++ devices = udev_enumerate_get_list_entry(enumerate); ++ udev_list_entry_foreach(entry, devices) { ++ const char *path = udev_list_entry_get_name(entry); ++ if (!path) ++ continue; ++ ++ device = udev_device_new_from_syspath(udev, path); ++ if (!device) ++ continue; ++ ++ probe_media_device(dc, device, scan); ++ udev_device_unref(device); ++ } ++ ++ udev_enumerate_unref(enumerate); ++ udev_unref(udev); ++ ++ *pscan = scan; ++ return 0; ++ ++fail: ++ if (udev) ++ udev_unref(udev); ++ devscan_delete(&scan); ++ return ret; ++} ++ +diff --git a/libavcodec/v4l2_req_devscan.h b/libavcodec/v4l2_req_devscan.h +new file mode 100644 +index 000000000000..49b5bb44b2d7 +--- /dev/null ++++ b/libavcodec/v4l2_req_devscan.h +@@ -0,0 +1,47 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef AVCODEC_V4L2_REQ_DEVSCAN_H ++#define AVCODEC_V4L2_REQ_DEVSCAN_H ++ ++#include ++ ++struct devscan; ++struct decdev; ++enum v4l2_buf_type; ++ ++/* These return pointers to data in the devscan structure and so are vaild ++ * for the lifetime of that ++ */ ++const char *decdev_media_path(const struct decdev *const dev); ++const char *decdev_video_path(const struct decdev *const dev); ++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev); ++uint32_t decdev_src_pixelformat(const struct decdev *const dev); ++ ++const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2); ++ ++int devscan_build(void * const dc, struct devscan **pscan); ++void devscan_delete(struct devscan **const pScan); ++ ++#endif +diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c +new file mode 100644 +index 000000000000..e157d4d55749 +--- /dev/null ++++ b/libavcodec/v4l2_req_dmabufs.c +@@ -0,0 +1,433 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "v4l2_req_dmabufs.h" ++#include "v4l2_req_utils.h" ++ ++#define TRACE_ALLOC 0 ++ ++#ifndef __O_CLOEXEC ++#define __O_CLOEXEC 0 ++#endif ++ ++struct dmabufs_ctl; ++struct dmabuf_h; ++ ++struct dmabuf_fns { ++ int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size); ++ void (*buf_free)(struct dmabuf_h * dh); ++ int (*ctl_new)(struct dmabufs_ctl * dbsc); ++ void (*ctl_free)(struct dmabufs_ctl * dbsc); ++}; ++ ++struct dmabufs_ctl { ++ atomic_int ref_count; ++ int fd; ++ size_t page_size; ++ void * v; ++ const struct dmabuf_fns * fns; ++}; ++ ++struct dmabuf_h { ++ int fd; ++ size_t size; ++ size_t len; ++ void * mapptr; ++ void * v; ++ const struct dmabuf_fns * fns; ++}; ++ ++#if TRACE_ALLOC ++static unsigned int total_bufs = 0; ++static size_t total_size = 0; ++#endif ++ +struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size) +{ + struct dmabuf_h *dh; @@ -25269,52 +8006,2114 @@ index ae6c64836972..c4bbed18c680 100644 + return dh; +} + - struct dmabuf_h * dmabuf_import(int fd, size_t size) - { - struct dmabuf_h *dh; -@@ -122,6 +142,8 @@ int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags) - struct dma_buf_sync sync = { - .flags = flags - }; ++struct dmabuf_h * dmabuf_import(int fd, size_t size) ++{ ++ struct dmabuf_h *dh; ++ ++ fd = dup(fd); ++ if (fd < 0 || size == 0) ++ return NULL; ++ ++ dh = malloc(sizeof(*dh)); ++ if (!dh) { ++ close(fd); ++ return NULL; ++ } ++ ++ *dh = (struct dmabuf_h) { ++ .fd = fd, ++ .size = size, ++ .mapptr = MAP_FAILED ++ }; ++ ++#if TRACE_ALLOC ++ ++total_bufs; ++ total_size += dh->size; ++ request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); ++#endif ++ ++ return dh; ++} ++ ++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size) ++{ ++ struct dmabuf_h * dh; ++ if (old != NULL) { ++ if (old->size >= size) { ++ return old; ++ } ++ dmabuf_free(old); ++ } ++ ++ if (size == 0 || ++ (dh = malloc(sizeof(*dh))) == NULL) ++ return NULL; ++ ++ *dh = (struct dmabuf_h){ ++ .fd = -1, ++ .mapptr = MAP_FAILED, ++ .fns = dbsc->fns ++ }; ++ ++ if (dh->fns->buf_alloc(dbsc, dh, size) != 0) ++ goto fail; ++ ++ ++#if TRACE_ALLOC ++ ++total_bufs; ++ total_size += dh->size; ++ request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); ++#endif ++ ++ return dh; ++ ++fail: ++ free(dh); ++ return NULL; ++} ++ ++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags) ++{ ++ struct dma_buf_sync sync = { ++ .flags = flags ++ }; + if (dh->fd == -1) + return 0; - while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) { - const int err = errno; - if (errno == EINTR) ++ while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) { ++ const int err = errno; ++ if (errno == EINTR) ++ continue; ++ request_log("%s: ioctl failed: flags=%#x\n", __func__, flags); ++ return -err; ++ } ++ return 0; ++} ++ ++int dmabuf_write_start(struct dmabuf_h * const dh) ++{ ++ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE); ++} ++ ++int dmabuf_write_end(struct dmabuf_h * const dh) ++{ ++ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE); ++} ++ ++int dmabuf_read_start(struct dmabuf_h * const dh) ++{ ++ if (!dmabuf_map(dh)) ++ return -1; ++ return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ); ++} ++ ++int dmabuf_read_end(struct dmabuf_h * const dh) ++{ ++ return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ); ++} ++ ++ ++void * dmabuf_map(struct dmabuf_h * const dh) ++{ ++ if (!dh) ++ return NULL; ++ if (dh->mapptr != MAP_FAILED) ++ return dh->mapptr; ++ dh->mapptr = mmap(NULL, dh->size, ++ PROT_READ | PROT_WRITE, ++ MAP_SHARED | MAP_POPULATE, ++ dh->fd, 0); ++ if (dh->mapptr == MAP_FAILED) { ++ request_log("%s: Map failed\n", __func__); ++ return NULL; ++ } ++ return dh->mapptr; ++} ++ ++int dmabuf_fd(const struct dmabuf_h * const dh) ++{ ++ if (!dh) ++ return -1; ++ return dh->fd; ++} ++ ++size_t dmabuf_size(const struct dmabuf_h * const dh) ++{ ++ if (!dh) ++ return 0; ++ return dh->size; ++} ++ ++size_t dmabuf_len(const struct dmabuf_h * const dh) ++{ ++ if (!dh) ++ return 0; ++ return dh->len; ++} ++ ++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len) ++{ ++ dh->len = len; ++} ++ ++void dmabuf_free(struct dmabuf_h * dh) ++{ ++ if (!dh) ++ return; ++ ++#if TRACE_ALLOC ++ --total_bufs; ++ total_size -= dh->size; ++ request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); ++#endif ++ ++ if (dh->fns != NULL && dh->fns->buf_free) ++ dh->fns->buf_free(dh); ++ ++ if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL) ++ munmap(dh->mapptr, dh->size); ++ if (dh->fd != -1) ++ while (close(dh->fd) == -1 && errno == EINTR) ++ /* loop */; ++ free(dh); ++} ++ ++static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns) ++{ ++ struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc)); ++ ++ if (!dbsc) ++ return NULL; ++ ++ dbsc->fd = -1; ++ dbsc->fns = fns; ++ dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE); ++ ++ if (fns->ctl_new(dbsc) != 0) ++ goto fail; ++ ++ return dbsc; ++ ++fail: ++ free(dbsc); ++ return NULL; ++} ++ ++static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc) ++{ ++ request_debug(NULL, "Free dmabuf ctl\n"); ++ ++ dbsc->fns->ctl_free(dbsc); ++ ++ free(dbsc); ++} ++ ++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc) ++{ ++ struct dmabufs_ctl * const dbsc = *pDbsc; ++ ++ if (!dbsc) ++ return; ++ *pDbsc = NULL; ++ ++ if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0) ++ return; ++ ++ dmabufs_ctl_free(dbsc); ++} ++ ++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc) ++{ ++ atomic_fetch_add(&dbsc->ref_count, 1); ++ return dbsc; ++} ++ ++//----------------------------------------------------------------------------- ++// ++// Alloc dmabuf via CMA ++ ++static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names) ++{ ++ for (; *names != NULL; ++names) ++ { ++ while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 && ++ errno == EINTR) ++ /* Loop */; ++ if (dbsc->fd != -1) ++ { ++ request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names); ++ return 0; ++ } ++ request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno)); ++ } ++ request_log("Unable to open any dma_heap device\n"); ++ return -1; ++} ++ ++static int ctl_cma_new(struct dmabufs_ctl * dbsc) ++{ ++ static const char * const names[] = { ++ "/dev/dma_heap/linux,cma", ++ "/dev/dma_heap/reserved", ++ NULL ++ }; ++ ++ return ctl_cma_new2(dbsc, names); ++} ++ ++static void ctl_cma_free(struct dmabufs_ctl * dbsc) ++{ ++ if (dbsc->fd != -1) ++ while (close(dbsc->fd) == -1 && errno == EINTR) ++ /* loop */; ++} ++ ++static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size) ++{ ++ struct dma_heap_allocation_data data = { ++ .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1), ++ .fd = 0, ++ .fd_flags = O_RDWR, ++ .heap_flags = 0 ++ }; ++ ++ while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) { ++ int err = errno; ++ request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n", ++ (uint64_t)data.len, ++ dbsc->fd, ++ err, ++ strerror(err)); ++ if (err == EINTR) ++ continue; ++ return -err; ++ } ++ ++ dh->fd = data.fd; ++ dh->size = (size_t)data.len; ++ ++// fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__, ++// dh->size, (size_t)lseek(dh->fd, 0, SEEK_END)); ++ ++ return 0; ++} ++ ++static void buf_cma_free(struct dmabuf_h * dh) ++{ ++ // Nothing needed ++} ++ ++static const struct dmabuf_fns dmabuf_cma_fns = { ++ .buf_alloc = buf_cma_alloc, ++ .buf_free = buf_cma_free, ++ .ctl_new = ctl_cma_new, ++ .ctl_free = ctl_cma_free, ++}; ++ ++struct dmabufs_ctl * dmabufs_ctl_new(void) ++{ ++ request_debug(NULL, "Dmabufs using CMA\n"); ++ return dmabufs_ctl_new2(&dmabuf_cma_fns); ++} ++ ++static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc) ++{ ++ static const char * const names[] = { ++ "/dev/dma_heap/vidbuf_cached", ++ "/dev/dma_heap/linux,cma", ++ "/dev/dma_heap/reserved", ++ NULL ++ }; ++ ++ return ctl_cma_new2(dbsc, names); ++} ++ ++static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = { ++ .buf_alloc = buf_cma_alloc, ++ .buf_free = buf_cma_free, ++ .ctl_new = ctl_cma_new_vidbuf_cached, ++ .ctl_free = ctl_cma_free, ++}; ++ ++struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void) ++{ ++ request_debug(NULL, "Dmabufs using Vidbuf\n"); ++ return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns); ++} ++ diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h -index cfb17e801d59..c1d3d8c8d751 100644 ---- a/libavcodec/v4l2_req_dmabufs.h +new file mode 100644 +index 000000000000..9226ab2498a5 +--- /dev/null +++ b/libavcodec/v4l2_req_dmabufs.h -@@ -18,6 +18,9 @@ static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t s - } - /* Create from existing fd - dups(fd) */ - struct dmabuf_h * dmabuf_import(int fd, size_t size); +@@ -0,0 +1,69 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef AVCODEC_V4L2_REQ_DMABUFS_H ++#define AVCODEC_V4L2_REQ_DMABUFS_H ++ ++#include ++ ++struct dmabufs_ctl; ++struct dmabuf_h; ++ ++struct dmabufs_ctl * dmabufs_ctl_new(void); ++struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void); ++void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc); ++struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc); ++ ++// Need not preserve old contents ++// On NULL return old buffer is freed ++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size); ++ ++static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) { ++ return dmabuf_realloc(dbsc, NULL, size); ++} ++/* Create from existing fd - dups(fd) */ ++struct dmabuf_h * dmabuf_import(int fd, size_t size); +/* Import an MMAP - return NULL if mapptr = MAP_FAIL */ +struct dmabuf_h * dmabuf_import_mmap(void * mapptr, size_t size); + - void * dmabuf_map(struct dmabuf_h * const dh); - - /* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */ ++void * dmabuf_map(struct dmabuf_h * const dh); ++ ++/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */ ++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags); ++ ++int dmabuf_write_start(struct dmabuf_h * const dh); ++int dmabuf_write_end(struct dmabuf_h * const dh); ++int dmabuf_read_start(struct dmabuf_h * const dh); ++int dmabuf_read_end(struct dmabuf_h * const dh); ++ ++int dmabuf_fd(const struct dmabuf_h * const dh); ++/* Allocated size */ ++size_t dmabuf_size(const struct dmabuf_h * const dh); ++/* Bytes in use */ ++size_t dmabuf_len(const struct dmabuf_h * const dh); ++/* Set bytes in use */ ++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len); ++void dmabuf_free(struct dmabuf_h * dh); ++ ++#endif +diff --git a/libavcodec/v4l2_req_hevc_v1.c b/libavcodec/v4l2_req_hevc_v1.c +new file mode 100644 +index 000000000000..4d908dac126e +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v1.c +@@ -0,0 +1,27 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#define HEVC_CTRLS_VERSION 1 ++#include "v4l2_req_hevc_vx.c" ++ +diff --git a/libavcodec/v4l2_req_hevc_v2.c b/libavcodec/v4l2_req_hevc_v2.c +new file mode 100644 +index 000000000000..1874e20a5d8d +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v2.c +@@ -0,0 +1,27 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#define HEVC_CTRLS_VERSION 2 ++#include "v4l2_req_hevc_vx.c" ++ +diff --git a/libavcodec/v4l2_req_hevc_v3.c b/libavcodec/v4l2_req_hevc_v3.c +new file mode 100644 +index 000000000000..1c5a84bea8d1 +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v3.c +@@ -0,0 +1,27 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#define HEVC_CTRLS_VERSION 3 ++#include "v4l2_req_hevc_vx.c" ++ +diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c +new file mode 100644 +index 000000000000..472df7cb0e39 +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v4.c +@@ -0,0 +1,27 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#define HEVC_CTRLS_VERSION 4 ++#include "v4l2_req_hevc_vx.c" ++ +diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c +new file mode 100644 +index 000000000000..bb7535a49201 +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_vx.c +@@ -0,0 +1,1454 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++// File included by v4l2_req_hevc_v* - not compiled on its own ++ ++#include "decode.h" ++#include "hevc/hevcdec.h" ++#include "hwconfig.h" ++#include "internal.h" ++#include "thread.h" ++ ++#include "libavutil/mem.h" ++ ++#if HEVC_CTRLS_VERSION == 1 ++#include "hevc-ctrls-v1.h" ++ ++// Fixup renamed entries ++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT ++ ++#elif HEVC_CTRLS_VERSION == 2 ++#include "hevc-ctrls-v2.h" ++#elif HEVC_CTRLS_VERSION == 3 ++#include "hevc-ctrls-v3.h" ++#elif HEVC_CTRLS_VERSION == 4 ++#include ++#if !defined(V4L2_CID_STATELESS_HEVC_SPS) ++#include "hevc-ctrls-v4.h" ++#endif ++#else ++#error Unknown HEVC_CTRLS_VERSION ++#endif ++ ++#ifndef V4L2_CID_STATELESS_HEVC_SPS ++#define V4L2_CID_STATELESS_HEVC_SPS V4L2_CID_MPEG_VIDEO_HEVC_SPS ++#define V4L2_CID_STATELESS_HEVC_PPS V4L2_CID_MPEG_VIDEO_HEVC_PPS ++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS ++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX ++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS ++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE ++#define V4L2_CID_STATELESS_HEVC_START_CODE V4L2_CID_MPEG_VIDEO_HEVC_START_CODE ++ ++#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED ++#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED ++#define V4L2_STATELESS_HEVC_START_CODE_NONE V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE ++#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B ++#endif ++ ++#include "v4l2_request_hevc.h" ++ ++#include "libavutil/hwcontext_drm.h" ++ ++#include ++#include ++ ++#include "v4l2_req_devscan.h" ++#include "v4l2_req_dmabufs.h" ++#include "v4l2_req_pollqueue.h" ++#include "v4l2_req_media.h" ++#include "v4l2_req_utils.h" ++ ++// Attached to buf[0] in frame ++// Pooled in hwcontext so generally create once - 1/frame ++typedef struct V4L2MediaReqDescriptor { ++ AVDRMFrameDescriptor drm; ++ ++ // Media ++ uint64_t timestamp; ++ struct qent_dst * qe_dst; ++ ++ // Refs to source frames ++ AVBufferRef * refs[18]; // 16 + 1 + 1 ++ ++ // Decode only - should be NULL by the time we emit the frame ++ struct req_decode_ent decode_ent; ++ ++ struct media_request *req; ++ struct qent_src *qe_src; ++ ++#if HEVC_CTRLS_VERSION >= 2 ++ struct v4l2_ctrl_hevc_decode_params dec; ++#endif ++ ++ size_t num_slices; ++ size_t alloced_slices; ++ struct v4l2_ctrl_hevc_slice_params * slice_params; ++ struct slice_info * slices; ++ ++ size_t num_offsets; ++ size_t alloced_offsets; ++ uint32_t *offsets; ++ ++} V4L2MediaReqDescriptor; ++ ++struct slice_info { ++ const uint8_t * ptr; ++ size_t len; // bytes ++ size_t n_offsets; ++}; ++ ++// Handy container for accumulating controls before setting ++struct req_controls { ++ int has_scaling; ++ struct timeval tv; ++ struct v4l2_ctrl_hevc_sps sps; ++ struct v4l2_ctrl_hevc_pps pps; ++ struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix; ++}; ++ ++//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 }; ++ ++ ++// Get an FFmpeg format from the v4l2 format ++static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format) ++{ ++ switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ? ++ format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) { ++ case V4L2_PIX_FMT_YUV420: ++ return AV_PIX_FMT_YUV420P; ++ case V4L2_PIX_FMT_NV12: ++ return AV_PIX_FMT_NV12; ++#if CONFIG_SAND ++ case V4L2_PIX_FMT_NV12_COL128: ++ return AV_PIX_FMT_RPI4_8; ++ case V4L2_PIX_FMT_NV12_10_COL128: ++ return AV_PIX_FMT_RPI4_10; ++#endif ++ default: ++ break; ++ } ++ return AV_PIX_FMT_NONE; ++} ++ ++static inline uint64_t frame_capture_dpb(const AVFrame * const frame) ++{ ++ const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; ++ return rd->timestamp; ++} ++ ++static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp) ++{ ++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0]; ++ rd->timestamp = dpb_stamp; ++} ++ ++static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table) ++{ ++ int32_t luma_weight_denom, chroma_weight_denom; ++ const SliceHeader * const sh = &h->sh; ++ const HEVCPPS * const pps = h->pps; ++ const HEVCSPS * const sps = pps->sps; ++ ++ if (sh->slice_type == HEVC_SLICE_I || ++ (sh->slice_type == HEVC_SLICE_P && !pps->weighted_pred_flag) || ++ (sh->slice_type == HEVC_SLICE_B && !pps->weighted_bipred_flag)) ++ return; ++ ++ table->luma_log2_weight_denom = sh->luma_log2_weight_denom; ++ ++ if (sps->chroma_format_idc) ++ table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom; ++ ++ luma_weight_denom = (1 << sh->luma_log2_weight_denom); ++ chroma_weight_denom = (1 << sh->chroma_log2_weight_denom); ++ ++ for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) { ++ table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom; ++ table->luma_offset_l0[i] = sh->luma_offset_l0[i]; ++ table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom; ++ table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom; ++ table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0]; ++ table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1]; ++ } ++ ++ if (sh->slice_type != HEVC_SLICE_B) ++ return; ++ ++ for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) { ++ table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom; ++ table->luma_offset_l1[i] = sh->luma_offset_l1[i]; ++ table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom; ++ table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom; ++ table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0]; ++ table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1]; ++ } ++} ++ ++#if HEVC_CTRLS_VERSION <= 2 ++static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp) ++{ ++ const HEVCFrame *frame; ++ int i; ++ ++ for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) { ++ frame = h->rps[ST_CURR_BEF].ref[i]; ++ if (frame && timestamp == frame_capture_dpb(frame->f)) ++ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE; ++ } ++ ++ for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) { ++ frame = h->rps[ST_CURR_AFT].ref[i]; ++ if (frame && timestamp == frame_capture_dpb(frame->f)) ++ return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER; ++ } ++ ++ for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) { ++ frame = h->rps[LT_CURR].ref[i]; ++ if (frame && timestamp == frame_capture_dpb(frame->f)) ++ return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR; ++ } ++ ++ return 0; ++} ++#endif ++ ++static unsigned int ++get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame, ++ const struct v4l2_hevc_dpb_entry * const entries, ++ const unsigned int num_entries) ++{ ++ uint64_t timestamp; ++ ++ if (!frame) ++ return 0; ++ ++ timestamp = frame_capture_dpb(frame->f); ++ ++ for (unsigned int i = 0; i < num_entries; i++) { ++ if (entries[i].timestamp == timestamp) ++ return i; ++ } ++ ++ return 0; ++} ++ ++static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx) ++{ ++ unsigned int z = 0; ++ while (idx--) { ++ if (*b++ == 0) { ++ ++z; ++ if (z >= 2 && *b == 3) { ++ ++b; ++ z = 0; ++ } ++ } ++ else { ++ z = 0; ++ } ++ } ++ return b; ++} ++ ++static int slice_add(V4L2MediaReqDescriptor * const rd) ++{ ++ if (rd->num_slices >= rd->alloced_slices) { ++ size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2; ++ ++ if (av_reallocp_array(&rd->slice_params, n2, sizeof(*rd->slice_params))) ++ goto fail; ++ if (av_reallocp_array(&rd->slices, n2, sizeof(*rd->slices))) ++ goto fail; ++ rd->alloced_slices = n2; ++ } ++ ++rd->num_slices; ++ return 0; ++ ++fail: ++ av_freep(&rd->slices); ++ rd->alloced_slices = 0; ++ rd->num_slices = 0; ++ return AVERROR(ENOMEM); ++} ++ ++static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets) ++{ ++ if (rd->num_offsets + n > rd->alloced_offsets) { ++ size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2; ++ void * p2; ++ while (rd->num_offsets + n > n2) ++ n2 *= 2; ++ if (av_reallocp_array(&rd->offsets, n2, sizeof(*rd->offsets))) { ++ rd->alloced_offsets = 0; ++ rd->num_offsets = 0; ++ return AVERROR(ENOMEM); ++ } ++ rd->offsets = p2; ++ rd->alloced_offsets = n2; ++ } ++ for (size_t i = 0; i != n; ++i) ++ rd->offsets[rd->num_offsets++] = offsets[i] - 1; ++ return 0; ++} ++ ++static unsigned int ++fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries) ++{ ++ unsigned int i; ++ unsigned int n = 0; ++ const HEVCFrame * const pic = h->cur_frame; ++ const HEVCLayerContext * const layer = &h->layers[h->cur_layer]; ++ ++ for (i = 0; i < FF_ARRAY_ELEMS(layer->DPB); i++) { ++ const HEVCFrame * const frame = &layer->DPB[i]; ++ if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) { ++ struct v4l2_hevc_dpb_entry * const entry = entries + n++; ++ ++ entry->timestamp = frame_capture_dpb(frame->f); ++#if HEVC_CTRLS_VERSION <= 2 ++ entry->rps = find_frame_rps_type(h, entry->timestamp); ++#else ++ entry->flags = (frame->flags & HEVC_FRAME_FLAG_LONG_REF) == 0 ? 0 : ++ V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE; ++#endif ++ entry->field_pic = (frame->f->flags & AV_FRAME_FLAG_INTERLACED) != 0; ++ ++#if HEVC_CTRLS_VERSION <= 3 ++ /* TODO: Interleaved: Get the POC for each field. */ ++ entry->pic_order_cnt[0] = frame->poc; ++ entry->pic_order_cnt[1] = frame->poc; ++#else ++ entry->pic_order_cnt_val = frame->poc; ++#endif ++ } ++ } ++ return n; ++} ++ ++static void fill_slice_params(const HEVCContext * const h, ++#if HEVC_CTRLS_VERSION >= 2 ++ const struct v4l2_ctrl_hevc_decode_params * const dec, ++#endif ++ struct v4l2_ctrl_hevc_slice_params *slice_params, ++ uint32_t bit_size, uint32_t bit_offset) ++{ ++ const SliceHeader * const sh = &h->sh; ++#if HEVC_CTRLS_VERSION >= 2 ++ const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb; ++ const unsigned int dpb_n = dec->num_active_dpb_entries; ++#else ++ struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb; ++ unsigned int dpb_n; ++#endif ++ unsigned int i; ++ RefPicList *rpl; ++ ++ *slice_params = (struct v4l2_ctrl_hevc_slice_params) { ++ .bit_size = bit_size, ++#if HEVC_CTRLS_VERSION <= 3 ++ .data_bit_offset = bit_offset, ++#else ++ .data_byte_offset = bit_offset / 8 + 1, ++#endif ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ .slice_segment_addr = sh->slice_segment_addr, ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ ++ .nal_unit_type = h->nal_unit_type, ++ .nuh_temporal_id_plus1 = h->temporal_id + 1, ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ .slice_type = sh->slice_type, ++ .colour_plane_id = sh->colour_plane_id, ++ .slice_pic_order_cnt = h->cur_frame->poc, ++ .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0, ++ .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0, ++ .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0, ++ .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand, ++ .slice_qp_delta = sh->slice_qp_delta, ++ .slice_cb_qp_offset = sh->slice_cb_qp_offset, ++ .slice_cr_qp_offset = sh->slice_cr_qp_offset, ++ .slice_act_y_qp_offset = 0, ++ .slice_act_cb_qp_offset = 0, ++ .slice_act_cr_qp_offset = 0, ++ .slice_beta_offset_div2 = sh->beta_offset / 2, ++ .slice_tc_offset_div2 = sh->tc_offset / 2, ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ ++ .pic_struct = h->sei.picture_timing.picture_struct, ++ ++#if HEVC_CTRLS_VERSION < 2 ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, ++ .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, ++ .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs, ++#endif ++ }; ++ ++ if (sh->slice_sample_adaptive_offset_flag[0]) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA; ++ ++ if (sh->slice_sample_adaptive_offset_flag[1]) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA; ++ ++ if (sh->slice_temporal_mvp_enabled_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED; ++ ++ if (sh->mvd_l1_zero_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO; ++ ++ if (sh->cabac_init_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT; ++ ++ if (sh->collocated_list == L0) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0; ++ ++ if (sh->disable_deblocking_filter_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED; ++ ++ if (sh->slice_loop_filter_across_slices_enabled_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED; ++ ++ if (sh->dependent_slice_segment_flag) ++ slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT; ++ ++#if HEVC_CTRLS_VERSION < 2 ++ dpb_n = fill_dpb_entries(h, dpb); ++ slice_params->num_active_dpb_entries = dpb_n; ++#endif ++ ++ if (sh->slice_type != HEVC_SLICE_I) { ++ rpl = &h->cur_frame->refPicList[0]; ++ for (i = 0; i < rpl->nb_refs; i++) ++ slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); ++ } ++ ++ if (sh->slice_type == HEVC_SLICE_B) { ++ rpl = &h->cur_frame->refPicList[1]; ++ for (i = 0; i < rpl->nb_refs; i++) ++ slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n); ++ } ++ ++ fill_pred_table(h, &slice_params->pred_weight_table); ++ ++ slice_params->num_entry_point_offsets = sh->num_entry_point_offsets; ++#if HEVC_CTRLS_VERSION <= 3 ++ if (slice_params->num_entry_point_offsets > 256) { ++ slice_params->num_entry_point_offsets = 256; ++ av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); ++ } ++ ++ for (i = 0; i < slice_params->num_entry_point_offsets; i++) ++ slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; ++#endif ++} ++ ++#if HEVC_CTRLS_VERSION >= 2 ++static void ++fill_decode_params(const HEVCContext * const h, ++ struct v4l2_ctrl_hevc_decode_params * const dec) ++{ ++ unsigned int i; ++ ++ *dec = (struct v4l2_ctrl_hevc_decode_params){ ++ .pic_order_cnt_val = h->poc, ++ .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs, ++ .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs, ++ .num_poc_lt_curr = h->rps[LT_CURR].nb_refs, ++ }; ++ ++ dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb); ++ ++ // The docn does seem to ask that we fit our 32 bit signed POC into ++ // a U8 so... (To be fair 16 bits would be enough) ++ // Luckily we (Pi) don't use these fields ++ for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i) ++ dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc; ++ for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i) ++ dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc; ++ for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i) ++ dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc; ++ ++ if (IS_IRAP(h)) ++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC; ++ if (IS_IDR(h)) ++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC; ++ if (h->sh.no_output_of_prior_pics_flag) ++ dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR; ++ ++} ++#endif ++ ++static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps) ++{ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */ ++ *ctrl = (struct v4l2_ctrl_hevc_sps) { ++ .chroma_format_idc = sps->chroma_format_idc, ++ .pic_width_in_luma_samples = sps->width, ++ .pic_height_in_luma_samples = sps->height, ++ .bit_depth_luma_minus8 = sps->bit_depth - 8, ++ .bit_depth_chroma_minus8 = sps->bit_depth - 8, ++ .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4, ++ .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1, ++ .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics, ++ .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1, ++ .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3, ++ .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size, ++ .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2, ++ .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size, ++ .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter, ++ .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra, ++ .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1, ++ .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1, ++ .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3, ++ .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size, ++ .num_short_term_ref_pic_sets = sps->nb_st_rps, ++ .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps, ++ .chroma_format_idc = sps->chroma_format_idc, ++ .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1, ++ }; ++ ++ if (sps->separate_colour_plane) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE; ++ ++ if (sps->scaling_list_enabled) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED; ++ ++ if (sps->amp_enabled) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED; ++ ++ if (sps->sao_enabled) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET; ++ ++ if (sps->pcm_enabled) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED; ++ ++ if (sps->pcm_loop_filter_disabled) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED; ++ ++ if (sps->long_term_ref_pics_present) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT; ++ ++ if (sps->temporal_mvp_enabled) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED; ++ ++ if (sps->strong_intra_smoothing_enabled) ++ ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED; ++} ++ ++static void fill_scaling_matrix(const ScalingList * const sl, ++ struct v4l2_ctrl_hevc_scaling_matrix * const sm) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < 6; i++) { ++ unsigned int j; ++ ++ for (j = 0; j < 16; j++) ++ sm->scaling_list_4x4[i][j] = sl->sl[0][i][j]; ++ for (j = 0; j < 64; j++) { ++ sm->scaling_list_8x8[i][j] = sl->sl[1][i][j]; ++ sm->scaling_list_16x16[i][j] = sl->sl[2][i][j]; ++ if (i < 2) ++ sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j]; ++ } ++ sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i]; ++ if (i < 2) ++ sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3]; ++ } ++} ++ ++static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps) ++{ ++ uint64_t flags = 0; ++ ++ if (pps->dependent_slice_segments_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED; ++ ++ if (pps->output_flag_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT; ++ ++ if (pps->sign_data_hiding_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED; ++ ++ if (pps->cabac_init_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT; ++ ++ if (pps->constrained_intra_pred_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED; ++ ++ if (pps->transform_skip_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED; ++ ++ if (pps->cu_qp_delta_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED; ++ ++ if (pps->pic_slice_level_chroma_qp_offsets_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT; ++ ++ if (pps->weighted_pred_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED; ++ ++ if (pps->weighted_bipred_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED; ++ ++ if (pps->transquant_bypass_enable_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED; ++ ++ if (pps->tiles_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED; ++ ++ if (pps->entropy_coding_sync_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED; ++ ++ if (pps->loop_filter_across_tiles_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED; ++ ++ if (pps->seq_loop_filter_across_slices_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED; ++ ++ if (pps->deblocking_filter_override_enabled_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED; ++ ++ if (pps->disable_dbf) ++ flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER; ++ ++ if (pps->lists_modification_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT; ++ ++ if (pps->slice_header_extension_present_flag) ++ flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */ ++ *ctrl = (struct v4l2_ctrl_hevc_pps) { ++ .num_extra_slice_header_bits = pps->num_extra_slice_header_bits, ++ .init_qp_minus26 = pps->pic_init_qp_minus26, ++ .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth, ++ .pps_cb_qp_offset = pps->cb_qp_offset, ++ .pps_cr_qp_offset = pps->cr_qp_offset, ++ .pps_beta_offset_div2 = pps->beta_offset / 2, ++ .pps_tc_offset_div2 = pps->tc_offset / 2, ++ .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2, ++ .flags = flags ++ }; ++ ++ ++ if (pps->tiles_enabled_flag) { ++ ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1; ++ ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1; ++ ++ for (int i = 0; i < pps->num_tile_columns; i++) ++ ctrl->column_width_minus1[i] = pps->column_width[i] - 1; ++ ++ for (int i = 0; i < pps->num_tile_rows; i++) ++ ctrl->row_height_minus1[i] = pps->row_height[i] - 1; ++ } ++} ++ ++static int frame_finish(V4L2MediaReqDescriptor * const rd) ++{ ++ int rv = 0; ++ ++ if (rd->qe_dst) { ++ MediaBufsStatus stat = qent_dst_wait(rd->qe_dst); ++ if (stat != MEDIABUFS_STATUS_SUCCESS) ++ rv = -1; ++ } ++ ++ { ++ AVBufferRef **p = rd->refs; ++ for (; *p != NULL; ++p) ++ av_buffer_unref(p); ++ } ++ ++ return rv; ++} ++ ++// Called before finally returning the frame to the user ++// Set corrupt flag here as this is actually the frame structure that ++// is going to the user (in MT land each thread has its own pool) ++static int frame_post_process(void *logctx, AVFrame *frame) ++{ ++ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0]; ++ ++ fprintf(stderr, "<<< %s\n", __func__); ++ ++// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); ++ frame->flags &= ~AV_FRAME_FLAG_CORRUPT; ++ if (frame_finish(rd) != 0) { ++ av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__); ++ frame->flags |= AV_FRAME_FLAG_CORRUPT; ++ } ++ return 0; ++} ++ ++static inline struct timeval cvt_dpb_to_tv(uint64_t t) ++{ ++ t /= 1000; ++ return (struct timeval){ ++ .tv_usec = t % 1000000, ++ .tv_sec = t / 1000000 ++ }; ++} ++ ++static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t) ++{ ++ return (uint64_t)t * 1000; ++} ++ ++static int v4l2_request_hevc_start_frame(AVCodecContext *avctx, ++ V4L2RequestContextHEVC *const ctx, ++ av_unused const uint8_t *buffer, ++ av_unused uint32_t size) ++{ ++ const HEVCContext *h = avctx->priv_data; ++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->cur_frame->f->data[0]; ++ static int z = 0; ++ ++ fprintf(stderr, "<<< %s: %d\n", __func__, ++z); ++// av_log(NULL, AV_LOG_INFO, "%s\n", __func__); ++ decode_q_add(&ctx->decode_q, &rd->decode_ent); ++ ++ rd->num_slices = 0; ++ ctx->timestamp++; ++ rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp); ++ ++ { ++ FrameDecodeData * const fdd = (FrameDecodeData*)h->cur_frame->f->private_ref->data; ++ fdd->post_process = frame_post_process; ++ } ++ ++ // qe_dst needs to be bound to the data buffer and only returned when that is ++ if (!rd->qe_dst) ++ { ++ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); ++ return AVERROR(ENOMEM); ++ } ++ } ++ ++ // ff_thread_finish_setup by caller ++ ++ return 0; ++} ++ ++// Object fd & size will be zapped by this & need setting later ++static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format) ++{ ++ AVDRMLayerDescriptor *layer = &desc->layers[0]; ++ unsigned int width; ++ unsigned int height; ++ unsigned int bpl; ++ uint32_t pixelformat; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { ++ width = format->fmt.pix_mp.width; ++ height = format->fmt.pix_mp.height; ++ pixelformat = format->fmt.pix_mp.pixelformat; ++ bpl = format->fmt.pix_mp.plane_fmt[0].bytesperline; ++ } ++ else { ++ width = format->fmt.pix.width; ++ height = format->fmt.pix.height; ++ pixelformat = format->fmt.pix.pixelformat; ++ bpl = format->fmt.pix.bytesperline; ++ } ++ ++ switch (pixelformat) { ++ case V4L2_PIX_FMT_NV12: ++ layer->format = DRM_FORMAT_NV12; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ break; ++#if CONFIG_SAND ++ case V4L2_PIX_FMT_NV12_COL128: ++ layer->format = DRM_FORMAT_NV12; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); ++ break; ++ case V4L2_PIX_FMT_NV12_10_COL128: ++ layer->format = DRM_FORMAT_P030; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl); ++ break; ++#endif ++#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED ++ case V4L2_PIX_FMT_SUNXI_TILED_NV12: ++ layer->format = DRM_FORMAT_NV12; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED; ++ break; ++#endif ++#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15) ++ case V4L2_PIX_FMT_NV15: ++ layer->format = DRM_FORMAT_NV15; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ break; ++#endif ++ case V4L2_PIX_FMT_NV16: ++ layer->format = DRM_FORMAT_NV16; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ break; ++#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20) ++ case V4L2_PIX_FMT_NV20: ++ layer->format = DRM_FORMAT_NV20; ++ desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; ++ break; ++#endif ++ default: ++ return -1; ++ } ++ ++ desc->nb_objects = 1; ++ desc->objects[0].fd = -1; ++ desc->objects[0].size = 0; ++ ++ desc->nb_layers = 1; ++ layer->nb_planes = 2; ++ ++ layer->planes[0].object_index = 0; ++ layer->planes[0].offset = 0; ++ layer->planes[0].pitch = bpl; ++#if CONFIG_SAND ++ if (pixelformat == V4L2_PIX_FMT_NV12_COL128) { ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = height * 128; ++ layer->planes[0].pitch = width; ++ layer->planes[1].pitch = width; ++ } ++ else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = height * 128; ++ layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy ++ layer->planes[1].pitch = width * 2; ++ } ++ else ++#endif ++ { ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = layer->planes[0].pitch * height; ++ layer->planes[1].pitch = layer->planes[0].pitch; ++ } ++ ++ return 0; ++} ++ ++static int ++set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq, ++ struct req_controls *const controls, ++#if HEVC_CTRLS_VERSION >= 2 ++ struct v4l2_ctrl_hevc_decode_params * const dec, ++#endif ++ struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count, ++ void * const offsets, const size_t offset_count) ++{ ++ int rv; ++#if HEVC_CTRLS_VERSION >= 2 ++ unsigned int n = 3; ++#else ++ unsigned int n = 2; ++#endif ++ ++ struct v4l2_ext_control control[6] = { ++ { ++ .id = V4L2_CID_STATELESS_HEVC_SPS, ++ .ptr = &controls->sps, ++ .size = sizeof(controls->sps), ++ }, ++ { ++ .id = V4L2_CID_STATELESS_HEVC_PPS, ++ .ptr = &controls->pps, ++ .size = sizeof(controls->pps), ++ }, ++#if HEVC_CTRLS_VERSION >= 2 ++ { ++ .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, ++ .ptr = dec, ++ .size = sizeof(*dec), ++ }, ++#endif ++ }; ++ ++ if (slices) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, ++ .ptr = slices, ++ .size = sizeof(*slices) * slice_count, ++ }; ++ ++ if (controls->has_scaling) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, ++ .ptr = &controls->scaling_matrix, ++ .size = sizeof(controls->scaling_matrix), ++ }; ++ ++#if HEVC_CTRLS_VERSION >= 4 ++ if (offsets) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, ++ .ptr = offsets, ++ .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count, ++ }; ++#endif ++ ++ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n); ++ ++ return rv; ++} ++ ++static void ++add_ref_once(V4L2MediaReqDescriptor * const rd, struct HEVCFrame * const ref) ++{ ++ AVBufferRef **p = rd->refs; ++ int i = 0; ++ while (*p != NULL) { ++ if (ref->f->buf[0]->data == (*p)->data) ++ return; ++ ++p; ++ av_assert0(++i < 16); ++ } ++ *p = av_buffer_ref(ref->f->buf[0]); ++} ++ ++// This only works because we started out from a single coded frame buffer ++// that will remain intact until after end_frame ++static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, const uint8_t *buffer, uint32_t size) ++{ ++ const HEVCContext * const h = avctx->priv_data; ++ const SliceHeader * const sh = &h->sh; ++ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->cur_frame->f->data[0]; ++ uint32_t boff = (ptr_from_index(buffer, sh->data_offset) - buffer) * 8 - 1; ++ ++ const unsigned int n = rd->num_slices; ++ const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices; ++ ++ int rv; ++ struct slice_info * si; ++ ++ fprintf(stderr, "<<< %s: boff=%u\n", __func__, boff); ++ // This looks dodgy but we know that FFmpeg has parsed this from a buffer ++ // that contains the entire frame including the start code ++ if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { ++ buffer -= 3; ++ size += 3; ++ boff += 24; ++ if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) { ++ av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n", ++ buffer[0], buffer[1], buffer[2]); ++ } ++ } ++ ++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { ++ if (rd->slices == NULL) { ++ if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL) ++ return AVERROR(ENOMEM); ++ rd->slices->ptr = buffer; ++ rd->num_slices = 1; ++ } ++ rd->slices->len = buffer - rd->slices->ptr + size; ++ return 0; ++ } ++ ++ if ((rv = slice_add(rd)) != 0) ++ return rv; ++ ++ si = rd->slices + n; ++ si->ptr = buffer; ++ si->len = size; ++ si->n_offsets = rd->num_offsets; ++ ++ if (n != block_start) { ++ struct slice_info *const si0 = rd->slices + block_start; ++ const size_t offset = (buffer - si0->ptr); ++ boff += offset * 8; ++ size += offset; ++ si0->len = si->len + offset; ++ } ++ ++#if HEVC_CTRLS_VERSION >= 2 ++ if (n == 0) ++ fill_decode_params(h, &rd->dec); ++ fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff); ++#else ++ fill_slice_params(h, rd->slice_params + n, size * 8, boff); ++#endif ++ ++ { ++ RefPicList *rpl; ++ int i; ++ ++ if (sh->slice_type != HEVC_SLICE_I) { ++ rpl = &h->cur_frame->refPicList[0]; ++ for (i = 0; i < rpl->nb_refs; i++) ++ add_ref_once(rd, rpl->ref[i]); ++ } ++ ++ if (sh->slice_type == HEVC_SLICE_B) { ++ rpl = &h->cur_frame->refPicList[1]; ++ for (i = 0; i < rpl->nb_refs; i++) ++ add_ref_once(rd, rpl->ref[i]); ++ } ++ } ++ ++ if (ctx->max_offsets != 0 && ++ (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0) ++ return rv; ++ ++ return 0; ++} ++ ++static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx, V4L2RequestContextHEVC *const ctx) ++{ ++ const HEVCContext * const h = avctx->priv_data; ++ fprintf(stderr, "<<< %s\n", __func__); ++ if (h->cur_frame != NULL) { ++ V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->cur_frame->f->data[0]; ++ ++ media_request_abort(&rd->req); ++ mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src); ++ ++ decode_q_remove(&ctx->decode_q, &rd->decode_ent); ++ } ++} ++ ++static int send_slice(AVCodecContext * const avctx, ++ V4L2RequestContextHEVC * const ctx, ++ V4L2MediaReqDescriptor * const rd, ++ struct req_controls *const controls, ++ const unsigned int i, const unsigned int j) ++{ ++ const int is_last = (j == rd->num_slices); ++ struct slice_info *const si = rd->slices + i; ++ struct media_request * req = NULL; ++ struct qent_src * src = NULL; ++ MediaBufsStatus stat; ++ void * offsets = rd->offsets + rd->slices[i].n_offsets; ++ size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets; ++ ++ if ((req = media_request_get(ctx->mpool)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__); ++ return AVERROR(ENOMEM); ++ } ++ ++ if (set_req_ctls(ctx, req, ++ controls, ++#if HEVC_CTRLS_VERSION >= 2 ++ &rd->dec, ++#endif ++ rd->slice_params + i, j - i, ++ offsets, n_offsets)) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__); ++ goto fail1; ++ } ++ ++ if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__); ++ goto fail1; ++ } ++ ++ if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__); ++ goto fail2; ++ } ++ ++ if (qent_src_params_set(src, &controls->tv)) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__); ++ goto fail2; ++ } ++ ++ stat = mediabufs_start_request(ctx->mbufs, &req, &src, ++ i == 0 ? rd->qe_dst : NULL, ++ is_last); ++ ++ if (stat != MEDIABUFS_STATUS_SUCCESS) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__); ++ return AVERROR_UNKNOWN; ++ } ++ return 0; ++ ++fail2: ++ mediabufs_src_qent_abort(ctx->mbufs, &src); ++fail1: ++ media_request_abort(&req); ++ return AVERROR_UNKNOWN; ++} ++ ++static int v4l2_request_hevc_end_frame(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx) ++{ ++ const HEVCContext * const h = avctx->priv_data; ++ V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->cur_frame->f->data[0]; ++ struct req_controls rc; ++ unsigned int i; ++ int rv; ++ static int z = 0; ++ ++ fprintf(stderr, "<<< %s: %d\n", __func__, ++z); ++ ++ // It is possible, though maybe a bug, to get an end_frame without ++ // a previous start_frame. If we do then give up. ++ if (!decode_q_in_q(&rd->decode_ent)) { ++ av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__); ++ return AVERROR_INVALIDDATA; ++ } ++ ++ { ++ const HEVCPPS *pps = h->pps; ++ const HEVCSPS *sps = pps->sps; ++ const ScalingList *sl = pps->scaling_list_data_present_flag ? ++ &pps->scaling_list : ++ sps->scaling_list_enabled ? ++ &sps->scaling_list : NULL; ++ ++ ++ memset(&rc, 0, sizeof(rc)); ++ rc.tv = cvt_dpb_to_tv(rd->timestamp); ++ fill_sps(&rc.sps, sps); ++ fill_pps(&rc.pps, pps); ++ if (sl) { ++ rc.has_scaling = 1; ++ fill_scaling_matrix(sl, &rc.scaling_matrix); ++ } ++ } ++ ++ decode_q_wait(&ctx->decode_q, &rd->decode_ent); ++ ++ // qe_dst needs to be bound to the data buffer and only returned when that is ++ // Alloc almost certainly wants to be serialised if there is any chance of blocking ++ // so we get the next frame to be free in the thread that needs it for decode first. ++ // ++ // In our current world this probably isn't a concern but put it here anyway ++ if (!rd->qe_dst) ++ { ++ if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__); ++ rv = AVERROR(ENOMEM); ++ goto fail; ++ } ++ } ++ ++ // Send as slices ++ for (i = 0; i < rd->num_slices; i += ctx->max_slices) { ++ const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices); ++ if ((rv = send_slice(avctx, ctx, rd, &rc, i, e)) != 0) ++ goto fail; ++ } ++ ++ // Set the drm_prime desriptor ++ drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs)); ++ rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0)); ++ rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0)); ++ ++ decode_q_remove(&ctx->decode_q, &rd->decode_ent); ++ return 0; ++ ++fail: ++ decode_q_remove(&ctx->decode_q, &rd->decode_ent); ++ return rv; ++} ++ ++static inline int ++ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) ++{ ++ return v >= c->minimum && v <= c->maximum; ++} ++ ++// Initial check & init ++static int ++probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) ++{ ++ const HEVCContext *h = avctx->priv_data; ++ const HEVCSPS * const sps = h->pps->sps; ++ struct v4l2_ctrl_hevc_sps ctrl_sps; ++ unsigned int i; ++ ++ // Check for var slice array ++ struct v4l2_query_ext_ctrl qc[] = { ++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_SPS }, ++ { .id = V4L2_CID_STATELESS_HEVC_PPS }, ++ { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX }, ++#if HEVC_CTRLS_VERSION >= 2 ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS }, ++#endif ++ }; ++ // Order & size must match! ++ static const size_t ctrl_sizes[] = { ++ sizeof(struct v4l2_ctrl_hevc_slice_params), ++ sizeof(int32_t), ++ sizeof(struct v4l2_ctrl_hevc_sps), ++ sizeof(struct v4l2_ctrl_hevc_pps), ++ sizeof(struct v4l2_ctrl_hevc_scaling_matrix), ++#if HEVC_CTRLS_VERSION >= 2 ++ sizeof(struct v4l2_ctrl_hevc_decode_params), ++#endif ++ }; ++ const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc); ++ ++#if HEVC_CTRLS_VERSION == 2 ++ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(5, 18, 0)) ++ return AVERROR(EINVAL); ++#elif HEVC_CTRLS_VERSION == 3 ++ if (mediabufs_ctl_driver_version(ctx->mbufs) < MEDIABUFS_DRIVER_VERSION(5, 18, 0)) ++ return AVERROR(EINVAL); ++#endif ++ ++ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls); ++ i = 0; ++#if HEVC_CTRLS_VERSION >= 4 ++ // Skip slice check if no slice mode ++ if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ i = 1; ++#else ++ // Fail frame mode silently for anything prior to V4 ++ if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ return AVERROR(EINVAL); ++#endif ++ for (; i != noof_ctrls; ++i) { ++ if (qc[i].type == 0) { ++ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id); ++ return AVERROR(EINVAL); ++ } ++ if (ctrl_sizes[i] != (size_t)qc[i].elem_size) { ++ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n", ++ HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size); ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ fill_sps(&ctrl_sps, sps); ++ ++ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++// Final init ++static int ++set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) ++{ ++ int ret; ++ ++ struct v4l2_query_ext_ctrl querys[] = { ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, }, ++#if HEVC_CTRLS_VERSION >= 4 ++ { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, }, ++#endif ++ }; ++ ++ struct v4l2_ext_control ctrls[] = { ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, ++ }; ++ ++ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); ++ ++ ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) || ++ querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ? ++ 1 : querys[2].dims[0]; ++ av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices); ++ ++#if HEVC_CTRLS_VERSION >= 4 ++ ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ? ++ 0 : querys[3].dims[0]; ++ av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); ++#else ++ ctx->max_offsets = 0; ++#endif ++ ++ if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED || ++ querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) ++ ctx->decode_mode = querys[0].default_value; ++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)) ++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED; ++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED; ++ else { ++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__); ++ return AVERROR(EINVAL); ++ } ++ ++ if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE || ++ querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) ++ ctx->start_code = querys[1].default_value; ++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; ++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; ++ else { ++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); ++ return AVERROR(EINVAL); ++ } ++ ++ // If we are in slice mode & START_CODE_NONE supported then pick that ++ // as it doesn't require the slightly dodgy look backwards in our raw buffer ++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && ++ ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; ++ ++ ctrls[0].value = ctx->decode_mode; ++ ctrls[1].value = ctx->start_code; ++ ++ ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls)); ++ return !ret ? 0 : AVERROR(-ret); ++} ++ ++static void v4l2_req_frame_free(void *opaque, uint8_t *data) ++{ ++ AVCodecContext *avctx = opaque; ++ V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data; ++ ++ av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data); ++ ++ frame_finish(rd); ++ ++ qent_dst_unref(&rd->qe_dst); ++ ++ // We don't expect req or qe_src to be set ++ if (rd->req || rd->qe_src) ++ av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src); ++ ++ av_freep(&rd->slices); ++ av_freep(&rd->slice_params); ++ av_freep(&rd->offsets); ++ ++ av_free(rd); ++} ++ ++static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size) ++{ ++ AVCodecContext *avctx = opaque; ++// V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data; ++// V4L2MediaReqDescriptor *req; ++ AVBufferRef *ref; ++ uint8_t *data; ++// int ret; ++ ++ data = av_mallocz(size); ++ if (!data) ++ return NULL; ++ ++ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data); ++ ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0); ++ if (!ref) { ++ av_freep(&data); ++ return NULL; ++ } ++ return ref; ++} ++ ++#if 0 ++static void v4l2_req_pool_free(void *opaque) ++{ ++ av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque); ++} ++ ++static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc) ++{ ++ av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool); ++ ++ av_buffer_pool_uninit(&hwfc->pool); ++} ++#endif ++ ++static int frame_params(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, AVBufferRef *hw_frames_ctx) ++{ ++ AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data; ++ const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs); ++ ++ fprintf(stderr, "<<< %s\n", __func__); ++ ++ hwfc->format = AV_PIX_FMT_DRM_PRIME; ++ hwfc->sw_format = pixel_format_from_format(vfmt); ++ if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) { ++ hwfc->width = vfmt->fmt.pix_mp.width; ++ hwfc->height = vfmt->fmt.pix_mp.height; ++ } else { ++ hwfc->width = vfmt->fmt.pix.width; ++ hwfc->height = vfmt->fmt.pix.height; ++ } ++#if 0 ++ hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free); ++ if (!hwfc->pool) ++ return AVERROR(ENOMEM); ++ ++ hwfc->free = v4l2_req_hwframe_ctx_free; ++ ++ hwfc->initial_pool_size = 1; ++ ++ switch (avctx->codec_id) { ++ case AV_CODEC_ID_VP9: ++ hwfc->initial_pool_size += 8; ++ break; ++ case AV_CODEC_ID_VP8: ++ hwfc->initial_pool_size += 3; ++ break; ++ default: ++ hwfc->initial_pool_size += 2; ++ } ++#endif ++ av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size); ++ ++ return 0; ++} ++ ++static int alloc_frame(AVCodecContext * avctx, V4L2RequestContextHEVC *const ctx, AVFrame *frame) ++{ ++ int rv; ++ ++ fprintf(stderr, "<<< %s\n", __func__); ++ ++ frame->buf[0] = v4l2_req_frame_alloc(avctx, sizeof(V4L2MediaReqDescriptor)); ++ if (!frame->buf[0]) ++ return AVERROR(ENOMEM); ++ ++ frame->data[0] = frame->buf[0]->data; ++ ++ frame->hw_frames_ctx = av_buffer_ref(avctx->hw_frames_ctx); ++ ++ if ((rv = ff_attach_decode_data(frame)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to attach decode data to frame\n"); ++ av_frame_unref(frame); ++ return rv; ++ } ++ ++ return 0; ++} ++ ++const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = { ++ .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE, ++ .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION), ++ .probe = probe, ++ .set_controls = set_controls, ++ ++ .start_frame = v4l2_request_hevc_start_frame, ++ .decode_slice = v4l2_request_hevc_decode_slice, ++ .end_frame = v4l2_request_hevc_end_frame, ++ .abort_frame = v4l2_request_hevc_abort_frame, ++ .frame_params = frame_params, ++ .alloc_frame = alloc_frame, ++}; ++ diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c -index 980b306b8a72..910ac77bb6f9 100644 ---- a/libavcodec/v4l2_req_media.c +new file mode 100644 +index 000000000000..c94cc5b0f684 +--- /dev/null +++ b/libavcodec/v4l2_req_media.c -@@ -33,9 +33,11 @@ - #include - #include - #include +@@ -0,0 +1,1808 @@ ++/* ++ * Copyright (C) 2018 Paul Kocialkowski ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include +#include - #include - #include - #include ++#include ++#include ++#include +#include - - #include - -@@ -95,6 +97,32 @@ struct media_request { - struct polltask * pt; - }; - ++ ++#include ++ ++#include "v4l2_req_dmabufs.h" ++#include "v4l2_req_media.h" ++#include "v4l2_req_pollqueue.h" ++#include "v4l2_req_utils.h" ++#include "weak_link.h" ++ ++ ++/* floor(log2(x)) */ ++static unsigned int log2_size(size_t x) ++{ ++ unsigned int n = 0; ++ ++ if (x & ~0xffff) { ++ n += 16; ++ x >>= 16; ++ } ++ if (x & ~0xff) { ++ n += 8; ++ x >>= 8; ++ } ++ if (x & ~0xf) { ++ n += 4; ++ x >>= 4; ++ } ++ if (x & ~3) { ++ n += 2; ++ x >>= 2; ++ } ++ return (x & ~1) ? n + 1 : n; ++} ++ ++static size_t round_up_size(const size_t x) ++{ ++ /* Admit no size < 256 */ ++ const unsigned int n = x < 256 ? 8 : log2_size(x) - 1; ++ ++ return x >= (3 << n) ? 4 << n : (3 << n); ++} ++ ++struct media_request; ++ ++struct media_pool { ++ int fd; ++ sem_t sem; ++ pthread_mutex_t lock; ++ unsigned int pool_n; ++ struct media_request * pool_reqs; ++ struct media_request * free_reqs; ++ struct pollqueue * pq; ++}; ++ ++struct media_request { ++ struct media_request * next; ++ struct media_pool * mp; ++ int fd; ++ struct polltask * pt; ++}; ++ +static inline enum v4l2_memory +mediabufs_memory_to_v4l2(const enum mediabufs_memory m) +{ @@ -25341,190 +10140,1054 @@ index 980b306b8a72..910ac77bb6f9 100644 + return "Unknown"; +} + - - static inline int do_trywait(sem_t *const sem) - { -@@ -115,14 +143,14 @@ static inline int do_wait(sem_t *const sem) - } - - static int request_buffers(int video_fd, unsigned int type, -- enum v4l2_memory memory, unsigned int buffers_count) ++ ++static inline int do_trywait(sem_t *const sem) ++{ ++ while (sem_trywait(sem)) { ++ if (errno != EINTR) ++ return -errno; ++ } ++ return 0; ++} ++ ++static inline int do_wait(sem_t *const sem) ++{ ++ while (sem_wait(sem)) { ++ if (errno != EINTR) ++ return -errno; ++ } ++ return 0; ++} ++ ++static int request_buffers(int video_fd, unsigned int type, + enum mediabufs_memory memory, unsigned int buffers_count) - { - struct v4l2_requestbuffers buffers; - int rc; - - memset(&buffers, 0, sizeof(buffers)); - buffers.type = type; -- buffers.memory = memory; ++{ ++ struct v4l2_requestbuffers buffers; ++ int rc; ++ ++ memset(&buffers, 0, sizeof(buffers)); ++ buffers.type = type; + buffers.memory = mediabufs_memory_to_v4l2(memory); - buffers.count = buffers_count; - - rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers); -@@ -324,6 +352,7 @@ struct qent_base { - struct qent_base *next; - struct qent_base *prev; - enum qent_status status; ++ buffers.count = buffers_count; ++ ++ rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers); ++ if (rc < 0) { ++ rc = -errno; ++ request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc)); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++ ++static int set_stream(int video_fd, unsigned int type, bool enable) ++{ ++ enum v4l2_buf_type buf_type = type; ++ int rc; ++ ++ rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF, ++ &buf_type); ++ if (rc < 0) { ++ rc = -errno; ++ request_log("Unable to %sable stream: %s\n", ++ enable ? "en" : "dis", strerror(-rc)); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++ ++ ++struct media_request * media_request_get(struct media_pool * const mp) ++{ ++ struct media_request *req = NULL; ++ ++ /* Timeout handled by poll code */ ++ if (do_wait(&mp->sem)) ++ return NULL; ++ ++ pthread_mutex_lock(&mp->lock); ++ req = mp->free_reqs; ++ if (req) { ++ mp->free_reqs = req->next; ++ req->next = NULL; ++ } ++ pthread_mutex_unlock(&mp->lock); ++ return req; ++} ++ ++int media_request_fd(const struct media_request * const req) ++{ ++ return req->fd; ++} ++ ++int media_request_start(struct media_request * const req) ++{ ++ while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1) ++ { ++ const int err = errno; ++ if (err == EINTR) ++ continue; ++ request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err)); ++ return -err; ++ } ++ ++ pollqueue_add_task(req->pt, 2000); ++ return 0; ++} ++ ++static void media_request_done(void *v, short revents) ++{ ++ struct media_request *const req = v; ++ struct media_pool *const mp = req->mp; ++ ++ /* ** Not sure what to do about timeout */ ++ ++ if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0) ++ request_log("Unable to reinit media request: %s\n", ++ strerror(errno)); ++ ++ pthread_mutex_lock(&mp->lock); ++ req->next = mp->free_reqs; ++ mp->free_reqs = req; ++ pthread_mutex_unlock(&mp->lock); ++ sem_post(&mp->sem); ++} ++ ++int media_request_abort(struct media_request ** const preq) ++{ ++ struct media_request * const req = *preq; ++ ++ if (req == NULL) ++ return 0; ++ *preq = NULL; ++ ++ media_request_done(req, 0); ++ return 0; ++} ++ ++static void free_req_pool(struct media_request * const pool, const unsigned int n) ++{ ++ unsigned int i; ++ for (i = 0; i != n; ++i) { ++ struct media_request * const req = pool + i; ++ if (req->pt) ++ polltask_delete(&req->pt); ++ if (req->fd != -1) ++ close(req->fd); ++ } ++ free(pool); ++} ++ ++struct media_pool * media_pool_new(const char * const media_path, ++ struct pollqueue * const pq, ++ const unsigned int n) ++{ ++ struct media_pool * const mp = calloc(1, sizeof(*mp)); ++ unsigned int i; ++ ++ if (!mp) ++ goto fail0; ++ ++ mp->pq = pq; ++ pthread_mutex_init(&mp->lock, NULL); ++ mp->fd = open(media_path, O_RDWR | O_NONBLOCK); ++ if (mp->fd == -1) { ++ request_log("Failed to open '%s': %s\n", media_path, strerror(errno)); ++ goto fail1; ++ } ++ ++ if ((mp->pool_reqs = calloc(n, sizeof(*mp->pool_reqs))) == NULL) ++ goto fail3; ++ mp->pool_n = n; ++ for (i = 0; i != n; ++i) { ++ mp->pool_reqs[i].mp = mp; ++ mp->pool_reqs[i].fd = -1; ++ } ++ ++ for (i = 0; i != n; ++i) { ++ struct media_request * const req = mp->pool_reqs + i; ++ ++ if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) { ++ request_log("Failed to alloc request %d: %s\n", i, strerror(errno)); ++ goto fail4; ++ } ++ ++ req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req); ++ if (!req->pt) ++ goto fail4; ++ ++ req->next = mp->free_reqs, ++ mp->free_reqs = req; ++ } ++ ++ sem_init(&mp->sem, 0, n); ++ ++ return mp; ++ ++fail4: ++ free_req_pool(mp->pool_reqs, mp->pool_n); ++fail3: ++ close(mp->fd); ++ pthread_mutex_destroy(&mp->lock); ++fail1: ++ free(mp); ++fail0: ++ return NULL; ++} ++ ++void media_pool_delete(struct media_pool ** pMp) ++{ ++ struct media_pool * const mp = *pMp; ++ ++ if (!mp) ++ return; ++ *pMp = NULL; ++ ++ free_req_pool(mp->pool_reqs, mp->pool_n); ++ close(mp->fd); ++ sem_destroy(&mp->sem); ++ pthread_mutex_destroy(&mp->lock); ++ free(mp); ++} ++ ++ ++#define INDEX_UNSET (~(uint32_t)0) ++ ++enum qent_status { ++ QENT_NEW = 0, // Initial state - shouldn't last ++ QENT_FREE, // On free chain ++ QENT_PENDING, // User has ent ++ QENT_WAITING, // On inuse ++ QENT_DONE, // Frame rx ++ QENT_ERROR, // Error ++ QENT_IMPORT ++}; ++ ++struct qent_base { ++ atomic_int ref_count; ++ struct qent_base *next; ++ struct qent_base *prev; ++ enum qent_status status; + enum mediabufs_memory memtype; - uint32_t index; - struct dmabuf_h *dh[VIDEO_MAX_PLANES]; - struct timeval timestamp; -@@ -348,9 +377,9 @@ struct qe_list_head { - }; - - struct buf_pool { ++ uint32_t index; ++ struct dmabuf_h *dh[VIDEO_MAX_PLANES]; ++ struct timeval timestamp; ++}; ++ ++struct qent_src { ++ struct qent_base base; ++ int fixed_size; ++}; ++ ++struct qent_dst { ++ struct qent_base base; ++ bool waiting; ++ pthread_mutex_t lock; ++ pthread_cond_t cond; ++ struct ff_weak_link_client * mbc_wl; ++}; ++ ++struct qe_list_head { ++ struct qent_base *head; ++ struct qent_base *tail; ++}; ++ ++struct buf_pool { + enum mediabufs_memory memtype; - pthread_mutex_t lock; - sem_t free_sem; -- enum v4l2_buf_type buf_type; - struct qe_list_head free; - struct qe_list_head inuse; - }; -@@ -367,9 +396,10 @@ static inline struct qent_src *base_to_src(struct qent_base *be) - } - - --#define QENT_BASE_INITIALIZER {\ ++ pthread_mutex_t lock; ++ sem_t free_sem; ++ struct qe_list_head free; ++ struct qe_list_head inuse; ++}; ++ ++ ++static inline struct qent_dst *base_to_dst(struct qent_base *be) ++{ ++ return (struct qent_dst *)be; ++} ++ ++static inline struct qent_src *base_to_src(struct qent_base *be) ++{ ++ return (struct qent_src *)be; ++} ++ ++ +#define QENT_BASE_INITIALIZER(mtype) {\ - .ref_count = ATOMIC_VAR_INIT(0),\ - .status = QENT_NEW,\ ++ .ref_count = ATOMIC_VAR_INIT(0),\ ++ .status = QENT_NEW,\ + .memtype = (mtype),\ - .index = INDEX_UNSET\ - } - -@@ -390,13 +420,13 @@ static void qe_src_free(struct qent_src *const be_src) - free(be_src); - } - --static struct qent_src * qe_src_new(void) ++ .index = INDEX_UNSET\ ++} ++ ++static void qe_base_uninit(struct qent_base *const be) ++{ ++ unsigned int i; ++ for (i = 0; i != VIDEO_MAX_PLANES; ++i) { ++ dmabuf_free(be->dh[i]); ++ be->dh[i] = NULL; ++ } ++} ++ ++static void qe_src_free(struct qent_src *const be_src) ++{ ++ if (!be_src) ++ return; ++ qe_base_uninit(&be_src->base); ++ free(be_src); ++} ++ +static struct qent_src * qe_src_new(enum mediabufs_memory mtype) - { - struct qent_src *const be_src = malloc(sizeof(*be_src)); - if (!be_src) - return NULL; - *be_src = (struct qent_src){ -- .base = QENT_BASE_INITIALIZER ++{ ++ struct qent_src *const be_src = malloc(sizeof(*be_src)); ++ if (!be_src) ++ return NULL; ++ *be_src = (struct qent_src){ + .base = QENT_BASE_INITIALIZER(mtype) - }; - return be_src; - } -@@ -413,13 +443,13 @@ static void qe_dst_free(struct qent_dst *const be_dst) - free(be_dst); - } - --static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl) ++ }; ++ return be_src; ++} ++ ++static void qe_dst_free(struct qent_dst *const be_dst) ++{ ++ if (!be_dst) ++ return; ++ ++ ff_weak_link_unref(&be_dst->mbc_wl); ++ pthread_cond_destroy(&be_dst->cond); ++ pthread_mutex_destroy(&be_dst->lock); ++ qe_base_uninit(&be_dst->base); ++ free(be_dst); ++} ++ +static struct qent_dst* qe_dst_new(struct ff_weak_link_master * const wl, const enum mediabufs_memory memtype) - { - struct qent_dst *const be_dst = malloc(sizeof(*be_dst)); - if (!be_dst) - return NULL; - *be_dst = (struct qent_dst){ -- .base = QENT_BASE_INITIALIZER, ++{ ++ struct qent_dst *const be_dst = malloc(sizeof(*be_dst)); ++ if (!be_dst) ++ return NULL; ++ *be_dst = (struct qent_dst){ + .base = QENT_BASE_INITIALIZER(memtype), - .lock = PTHREAD_MUTEX_INITIALIZER, - .cond = PTHREAD_COND_INITIALIZER, - .mbc_wl = ff_weak_link_ref(wl) -@@ -553,14 +583,14 @@ static struct qent_base *queue_tryget_free(struct buf_pool *const bp) - return buf; - } - --static struct qent_base * queue_find_extract_fd(struct buf_pool *const bp, const int fd) ++ .lock = PTHREAD_MUTEX_INITIALIZER, ++ .cond = PTHREAD_COND_INITIALIZER, ++ .mbc_wl = ff_weak_link_ref(wl) ++ }; ++ return be_dst; ++} ++ ++static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be) ++{ ++ if (ql->tail) ++ ql->tail->next = be; ++ else ++ ql->head = be; ++ be->prev = ql->tail; ++ be->next = NULL; ++ ql->tail = be; ++} ++ ++static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be) ++{ ++ if (!be) ++ return NULL; ++ ++ if (be->next) ++ be->next->prev = be->prev; ++ else ++ ql->tail = be->prev; ++ if (be->prev) ++ be->prev->next = be->next; ++ else ++ ql->head = be->next; ++ be->next = NULL; ++ be->prev = NULL; ++ return be; ++} ++ ++ ++static void bq_put_free(struct buf_pool *const bp, struct qent_base * be) ++{ ++ ql_add_tail(&bp->free, be); ++} ++ ++static struct qent_base * bq_get_free(struct buf_pool *const bp) ++{ ++ return ql_extract(&bp->free, bp->free.head); ++} ++ ++static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be) ++{ ++ return ql_extract(&bp->inuse, be); ++} ++ ++static struct qent_base * bq_get_inuse(struct buf_pool *const bp) ++{ ++ return ql_extract(&bp->inuse, bp->inuse.head); ++} ++ ++static void bq_free_all_free_src(struct buf_pool *const bp) ++{ ++ struct qent_base *be; ++ while ((be = bq_get_free(bp)) != NULL) ++ qe_src_free(base_to_src(be)); ++} ++ ++static void bq_free_all_inuse_src(struct buf_pool *const bp) ++{ ++ struct qent_base *be; ++ while ((be = bq_get_inuse(bp)) != NULL) ++ qe_src_free(base_to_src(be)); ++} ++ ++static void bq_free_all_free_dst(struct buf_pool *const bp) ++{ ++ struct qent_base *be; ++ while ((be = bq_get_free(bp)) != NULL) ++ qe_dst_free(base_to_dst(be)); ++} ++ ++static void queue_put_free(struct buf_pool *const bp, struct qent_base *be) ++{ ++ unsigned int i; ++ ++ pthread_mutex_lock(&bp->lock); ++ /* Clear out state vars */ ++ be->timestamp.tv_sec = 0; ++ be->timestamp.tv_usec = 0; ++ be->status = QENT_FREE; ++ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) ++ dmabuf_len_set(be->dh[i], 0); ++ bq_put_free(bp, be); ++ pthread_mutex_unlock(&bp->lock); ++ sem_post(&bp->free_sem); ++} ++ ++static bool queue_is_inuse(const struct buf_pool *const bp) ++{ ++ return bp->inuse.tail != NULL; ++} ++ ++static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be) ++{ ++ if (!be) ++ return; ++ pthread_mutex_lock(&bp->lock); ++ ql_add_tail(&bp->inuse, be); ++ be->status = QENT_WAITING; ++ pthread_mutex_unlock(&bp->lock); ++} ++ ++static struct qent_base *queue_get_free(struct buf_pool *const bp) ++{ ++ struct qent_base *buf; ++ ++ if (do_wait(&bp->free_sem)) ++ return NULL; ++ pthread_mutex_lock(&bp->lock); ++ buf = bq_get_free(bp); ++ pthread_mutex_unlock(&bp->lock); ++ return buf; ++} ++ ++static struct qent_base *queue_tryget_free(struct buf_pool *const bp) ++{ ++ struct qent_base *buf; ++ ++ if (do_trywait(&bp->free_sem)) ++ return NULL; ++ pthread_mutex_lock(&bp->lock); ++ buf = bq_get_free(bp); ++ pthread_mutex_unlock(&bp->lock); ++ return buf; ++} ++ +static struct qent_base * queue_find_extract_index(struct buf_pool *const bp, const unsigned int index) - { - struct qent_base *be; - - pthread_mutex_lock(&bp->lock); - /* Expect 1st in Q, but allow anywhere */ - for (be = bp->inuse.head; be; be = be->next) { -- if (dmabuf_fd(be->dh[0]) == fd) { ++{ ++ struct qent_base *be; ++ ++ pthread_mutex_lock(&bp->lock); ++ /* Expect 1st in Q, but allow anywhere */ ++ for (be = bp->inuse.head; be; be = be->next) { + if (be->index == index) { - bq_extract_inuse(bp, be); - break; - } -@@ -602,6 +632,8 @@ struct mediabufs_ctl { - struct pollqueue * pq; - struct ff_weak_link_master * this_wlm; - ++ bq_extract_inuse(bp, be); ++ break; ++ } ++ } ++ pthread_mutex_unlock(&bp->lock); ++ ++ return be; ++} ++ ++static void queue_delete(struct buf_pool *const bp) ++{ ++ sem_destroy(&bp->free_sem); ++ pthread_mutex_destroy(&bp->lock); ++ free(bp); ++} ++ ++static struct buf_pool* queue_new(const int vfd) ++{ ++ struct buf_pool *bp = calloc(1, sizeof(*bp)); ++ if (!bp) ++ return NULL; ++ pthread_mutex_init(&bp->lock, NULL); ++ sem_init(&bp->free_sem, 0, 0); ++ return bp; ++} ++ ++ ++struct mediabufs_ctl { ++ atomic_int ref_count; /* 0 is single ref for easier atomics */ ++ void * dc; ++ int vfd; ++ bool stream_on; ++ bool polling; ++ bool dst_fixed; // Dst Q is fixed size ++ pthread_mutex_t lock; ++ struct buf_pool * src; ++ struct buf_pool * dst; ++ struct polltask * pt; ++ struct pollqueue * pq; ++ struct ff_weak_link_master * this_wlm; ++ + enum mediabufs_memory src_memtype; + enum mediabufs_memory dst_memtype; - struct v4l2_format src_fmt; - struct v4l2_format dst_fmt; - struct v4l2_capability capability; -@@ -614,7 +646,7 @@ static int qe_v4l2_queue(struct qent_base *const be, - { - struct v4l2_buffer buffer = { - .type = fmt->type, -- .memory = V4L2_MEMORY_DMABUF, ++ struct v4l2_format src_fmt; ++ struct v4l2_format dst_fmt; ++ struct v4l2_capability capability; ++}; ++ ++static int qe_v4l2_queue(struct qent_base *const be, ++ const int vfd, struct media_request *const mreq, ++ const struct v4l2_format *const fmt, ++ const bool is_dst, const bool hold_flag) ++{ ++ struct v4l2_buffer buffer = { ++ .type = fmt->type, + .memory = mediabufs_memory_to_v4l2(be->memtype), - .index = be->index - }; - struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; -@@ -628,7 +660,10 @@ static int qe_v4l2_queue(struct qent_base *const be, - /* *** Really need a pixdesc rather than a format so we can fill in data_offset */ - planes[i].length = dmabuf_size(be->dh[i]); - planes[i].bytesused = dmabuf_len(be->dh[i]); -- planes[i].m.fd = dmabuf_fd(be->dh[i]); ++ .index = be->index ++ }; ++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ unsigned int i; ++ for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) { ++ if (is_dst) ++ dmabuf_len_set(be->dh[i], 0); ++ ++ /* *** Really need a pixdesc rather than a format so we can fill in data_offset */ ++ planes[i].length = dmabuf_size(be->dh[i]); ++ planes[i].bytesused = dmabuf_len(be->dh[i]); + if (be->memtype == MEDIABUFS_MEMORY_DMABUF) + planes[i].m.fd = dmabuf_fd(be->dh[i]); + else + planes[i].m.mem_offset = 0; - } - buffer.m.planes = planes; - buffer.length = i; -@@ -639,7 +674,10 @@ static int qe_v4l2_queue(struct qent_base *const be, - - buffer.bytesused = dmabuf_len(be->dh[0]); - buffer.length = dmabuf_size(be->dh[0]); -- buffer.m.fd = dmabuf_fd(be->dh[0]); ++ } ++ buffer.m.planes = planes; ++ buffer.length = i; ++ } ++ else { ++ if (is_dst) ++ dmabuf_len_set(be->dh[0], 0); ++ ++ buffer.bytesused = dmabuf_len(be->dh[0]); ++ buffer.length = dmabuf_size(be->dh[0]); + if (be->memtype == MEDIABUFS_MEMORY_DMABUF) + buffer.m.fd = dmabuf_fd(be->dh[0]); + else + buffer.m.offset = 0; - } - - if (!is_dst && mreq) { -@@ -668,14 +706,13 @@ static struct qent_base * qe_dequeue(struct buf_pool *const bp, - const int vfd, - const struct v4l2_format * const f) - { -- int fd; - struct qent_base *be; - int rc; - const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type); - struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; - struct v4l2_buffer buffer = { - .type = f->type, -- .memory = V4L2_MEMORY_DMABUF ++ } ++ ++ if (!is_dst && mreq) { ++ buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD; ++ buffer.request_fd = media_request_fd(mreq); ++ if (hold_flag) ++ buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF; ++ } ++ ++ if (is_dst) ++ be->timestamp = (struct timeval){0,0}; ++ ++ buffer.timestamp = be->timestamp; ++ ++ while (ioctl(vfd, VIDIOC_QBUF, &buffer)) { ++ const int err = errno; ++ if (err != EINTR) { ++ request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err)); ++ return -err; ++ } ++ } ++ return 0; ++} ++ ++static struct qent_base * qe_dequeue(struct buf_pool *const bp, ++ const int vfd, ++ const struct v4l2_format * const f) ++{ ++ struct qent_base *be; ++ int rc; ++ const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type); ++ struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}}; ++ struct v4l2_buffer buffer = { ++ .type = f->type, + .memory = mediabufs_memory_to_v4l2(bp->memtype) - }; - if (mp) { - buffer.length = f->fmt.pix_mp.num_planes; -@@ -690,10 +727,9 @@ static struct qent_base * qe_dequeue(struct buf_pool *const bp, - return NULL; - } - -- fd = mp ? planes[0].m.fd : buffer.m.fd; -- be = queue_find_extract_fd(bp, fd); ++ }; ++ if (mp) { ++ buffer.length = f->fmt.pix_mp.num_planes; ++ buffer.m.planes = planes; ++ } ++ ++ while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 && ++ errno == EINTR) ++ /* Loop */; ++ if (rc) { ++ request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno)); ++ return NULL; ++ } ++ + be = queue_find_extract_index(bp, buffer.index); - if (!be) { -- request_log("Failed to find fd %d in Q\n", fd); ++ if (!be) { + request_log("Failed to find index %d in Q\n", buffer.index); - return NULL; - } - -@@ -1104,7 +1140,7 @@ static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, stru - - struct v4l2_create_buffers cbuf = { - .count = n, -- .memory = V4L2_MEMORY_DMABUF, ++ return NULL; ++ } ++ ++ if (mp) { ++ unsigned int i; ++ for (i = 0; i != buffer.length; ++i) ++ dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0); ++ } ++ else ++ dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0); ++ ++ be->timestamp = buffer.timestamp; ++ be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE; ++ return be; ++} ++ ++static void qe_dst_done(struct qent_dst * dst_be) ++{ ++ pthread_mutex_lock(&dst_be->lock); ++ dst_be->waiting = false; ++ pthread_cond_broadcast(&dst_be->cond); ++ pthread_mutex_unlock(&dst_be->lock); ++ ++ qent_dst_unref(&dst_be); ++} ++ ++static bool qe_dst_waiting(struct qent_dst *const dst_be) ++{ ++ bool waiting; ++ pthread_mutex_lock(&dst_be->lock); ++ waiting = dst_be->waiting; ++ dst_be->waiting = true; ++ pthread_mutex_unlock(&dst_be->lock); ++ return waiting; ++} ++ ++ ++static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc) ++{ ++ return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst); ++} ++ ++static void mediabufs_poll_cb(void * v, short revents) ++{ ++ struct mediabufs_ctl *mbc = v; ++ struct qent_src *src_be = NULL; ++ struct qent_dst *dst_be = NULL; ++ ++ if (!revents) ++ request_err(mbc->dc, "%s: Timeout\n", __func__); ++ ++ pthread_mutex_lock(&mbc->lock); ++ mbc->polling = false; ++ ++ if ((revents & POLLOUT) != 0) ++ src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt)); ++ if ((revents & POLLIN) != 0) ++ dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt)); ++ ++ /* Reschedule */ ++ if (mediabufs_wants_poll(mbc)) { ++ mbc->polling = true; ++ pollqueue_add_task(mbc->pt, 2000); ++ } ++ pthread_mutex_unlock(&mbc->lock); ++ ++ if (src_be) ++ queue_put_free(mbc->src, &src_be->base); ++ if (dst_be) ++ qe_dst_done(dst_be); ++} ++ ++int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp) ++{ ++ struct qent_base *const be = &be_src->base; ++ ++ be->timestamp = *timestamp; ++ return 0; ++} ++ ++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst) ++{ ++ return be_dst->base.timestamp; ++} ++ ++static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc) ++{ ++ if (!be->dh[0] || len > dmabuf_size(be->dh[0])) { ++ size_t newsize = round_up_size(len); ++ request_log("%s: Overrun %zd > %zd; trying %zd\n", __func__, len, dmabuf_size(be->dh[0]), newsize); ++ if (!dbsc) { ++ request_log("%s: No dmbabuf_ctrl for realloc\n", __func__); ++ return -ENOMEM; ++ } ++ if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) { ++ request_log("%s: Realloc %zd failed\n", __func__, newsize); ++ return -ENOMEM; ++ } ++ } ++ return 0; ++} ++ ++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc) ++{ ++ struct qent_base *const be = &be_src->base; ++ return qent_base_realloc(be, len, dbsc); ++} ++ ++ ++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc) ++{ ++ void * dst; ++ struct qent_base *const be = &be_src->base; ++ int rv; ++ ++ // Realloc doesn't copy so don't alloc if offset != 0 ++ if ((rv = qent_base_realloc(be, offset + len, ++ be_src->fixed_size || offset ? NULL : dbsc)) != 0) ++ return rv; ++ ++ dmabuf_write_start(be->dh[0]); ++ dst = dmabuf_map(be->dh[0]); ++ if (!dst) ++ return -1; ++ memcpy((char*)dst + offset, src, len); ++ dmabuf_len_set(be->dh[0], len); ++ dmabuf_write_end(be->dh[0]); ++ return 0; ++} ++ ++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane) ++{ ++ const struct qent_base *const be = &be_dst->base; ++ ++ return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane]; ++} ++ ++int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane) ++{ ++ return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane))); ++} ++ ++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, ++ struct media_request **const pmreq, ++ struct qent_src **const psrc_be, ++ struct qent_dst *const dst_be, ++ const bool is_final) ++{ ++ struct media_request * mreq = *pmreq; ++ struct qent_src *const src_be = *psrc_be; ++ ++ // Req & src are always both "consumed" ++ *pmreq = NULL; ++ *psrc_be = NULL; ++ ++ pthread_mutex_lock(&mbc->lock); ++ ++ if (!src_be) ++ goto fail1; ++ ++ if (dst_be) { ++ if (qe_dst_waiting(dst_be)) { ++ request_info(mbc->dc, "Request buffer already waiting on start\n"); ++ goto fail1; ++ } ++ dst_be->base.timestamp = (struct timeval){0,0}; ++ if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false)) ++ goto fail1; ++ ++ qent_dst_ref(dst_be); ++ queue_put_inuse(mbc->dst, &dst_be->base); ++ } ++ ++ if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final)) ++ goto fail1; ++ queue_put_inuse(mbc->src, &src_be->base); ++ ++ if (!mbc->polling && mediabufs_wants_poll(mbc)) { ++ mbc->polling = true; ++ pollqueue_add_task(mbc->pt, 2000); ++ } ++ pthread_mutex_unlock(&mbc->lock); ++ ++ if (media_request_start(mreq)) ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++fail1: ++ media_request_abort(&mreq); ++ if (src_be) ++ queue_put_free(mbc->src, &src_be->base); ++ ++// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q ++ if (dst_be) { ++ dst_be->base.status = QENT_ERROR; ++ qe_dst_done(dst_be); ++ } ++ pthread_mutex_unlock(&mbc->lock); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++} ++ ++ ++static int qe_alloc_from_fmt(struct qent_base *const be, ++ struct dmabufs_ctl *const dbsc, ++ const struct v4l2_format *const fmt) ++{ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ unsigned int i; ++ for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) { ++ be->dh[i] = dmabuf_realloc(dbsc, be->dh[i], ++ fmt->fmt.pix_mp.plane_fmt[i].sizeimage); ++ /* On failure tidy up and die */ ++ if (!be->dh[i]) { ++ while (i--) { ++ dmabuf_free(be->dh[i]); ++ be->dh[i] = NULL; ++ } ++ return -1; ++ } ++ } ++ } ++ else { ++// be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage); ++ size_t size = fmt->fmt.pix.sizeimage; ++ be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size); ++ if (!be->dh[0]) ++ return -1; ++ } ++ return 0; ++} ++ ++static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd, ++ const enum v4l2_buf_type buftype, ++ uint32_t pixfmt, ++ const unsigned int width, const unsigned int height, ++ const size_t bufsize) ++{ ++ *fmt = (struct v4l2_format){.type = buftype}; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { ++ fmt->fmt.pix_mp.width = width; ++ fmt->fmt.pix_mp.height = height; ++ fmt->fmt.pix_mp.pixelformat = pixfmt; ++ if (bufsize) { ++ fmt->fmt.pix_mp.num_planes = 1; ++ fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize; ++ } ++ } ++ else { ++ fmt->fmt.pix.width = width; ++ fmt->fmt.pix.height = height; ++ fmt->fmt.pix.pixelformat = pixfmt; ++ fmt->fmt.pix.sizeimage = bufsize; ++ } ++ ++ while (ioctl(fd, VIDIOC_S_FMT, fmt)) ++ if (errno != EINTR) ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ ++ // Treat anything where we don't get at least what we asked for as a fail ++ if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) { ++ if (fmt->fmt.pix_mp.width < width || ++ fmt->fmt.pix_mp.height < height || ++ fmt->fmt.pix_mp.pixelformat != pixfmt) { ++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; ++ } ++ } ++ else { ++ if (fmt->fmt.pix.width < width || ++ fmt->fmt.pix.height < height || ++ fmt->fmt.pix.pixelformat != pixfmt) { ++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; ++ } ++ } ++ ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt, ++ const int fd, ++ const unsigned int type_v4l2, ++ const uint32_t flags_must, ++ const uint32_t flags_not, ++ const unsigned int width, ++ const unsigned int height, ++ mediabufs_dst_fmt_accept_fn *const accept_fn, ++ void *const accept_v) ++{ ++ unsigned int i; ++ ++ for (i = 0;; ++i) { ++ struct v4l2_fmtdesc fmtdesc = { ++ .index = i, ++ .type = type_v4l2 ++ }; ++ while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) { ++ if (errno != EINTR) ++ return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE; ++ } ++ if ((fmtdesc.flags & flags_must) != flags_must || ++ (fmtdesc.flags & flags_not)) ++ continue; ++ if (!accept_fn(accept_v, &fmtdesc)) ++ continue; ++ ++ if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat, ++ width, height, 0) == MEDIABUFS_STATUS_SUCCESS) ++ return MEDIABUFS_STATUS_SUCCESS; ++ } ++ return 0; ++} ++ ++ ++/* Wait for qent done */ ++ ++MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst) ++{ ++ struct qent_base *const be = &be_dst->base; ++ enum qent_status estat; ++ ++ pthread_mutex_lock(&be_dst->lock); ++ while (be_dst->waiting && ++ !pthread_cond_wait(&be_dst->cond, &be_dst->lock)) ++ /* Loop */; ++ estat = be->status; ++ pthread_mutex_unlock(&be_dst->lock); ++ ++ return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS : ++ estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR : ++ MEDIABUFS_ERROR_OPERATION_FAILED; ++} ++ ++const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no) ++{ ++ struct qent_base *const be = &be_dst->base; ++ return dmabuf_map(be->dh[buf_no]); ++} ++ ++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst) ++{ ++ struct qent_base *const be = &be_dst->base; ++ unsigned int i; ++ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { ++ if (dmabuf_read_start(be->dh[i])) { ++ while (i--) ++ dmabuf_read_end(be->dh[i]); ++ return MEDIABUFS_ERROR_ALLOCATION_FAILED; ++ } ++ } ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst) ++{ ++ struct qent_base *const be = &be_dst->base; ++ unsigned int i; ++ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; ++ ++ for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) { ++ if (dmabuf_read_end(be->dh[i])) ++ status = MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ return status; ++} ++ ++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst) ++{ ++ if (be_dst) ++ atomic_fetch_add(&be_dst->base.ref_count, 1); ++ return be_dst; ++} ++ ++void qent_dst_unref(struct qent_dst ** const pbe_dst) ++{ ++ struct qent_dst * const be_dst = *pbe_dst; ++ struct mediabufs_ctl * mbc; ++ if (!be_dst) ++ return; ++ *pbe_dst = NULL; ++ ++ if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0) ++ return; ++ ++ if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) { ++ queue_put_free(mbc->dst, &be_dst->base); ++ ff_weak_link_unlock(be_dst->mbc_wl); ++ } ++ else { ++ qe_dst_free(be_dst); ++ } ++} ++ ++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, ++ unsigned int plane, ++ int fd, size_t size) ++{ ++ struct qent_base *const be = &be_dst->base; ++ struct dmabuf_h * dh; ++ ++ if (be->status != QENT_IMPORT || be->dh[plane]) ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ ++ dh = dmabuf_import(fd, size); ++ if (!dh) ++ return MEDIABUFS_ERROR_ALLOCATION_FAILED; ++ ++ be->dh[plane] = dh; ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++// Returns noof buffers created, -ve for error ++static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, struct qent_dst * const qes[]) ++{ ++ unsigned int i; ++ ++ struct v4l2_create_buffers cbuf = { ++ .count = n, + .memory = mediabufs_memory_to_v4l2(mbc->dst->memtype), - .format = mbc->dst_fmt, - }; - -@@ -1125,12 +1161,97 @@ static int create_dst_bufs(struct mediabufs_ctl *const mbc, unsigned int n, stru - return cbuf.count; - } - ++ .format = mbc->dst_fmt, ++ }; ++ ++ while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) { ++ const int err = -errno; ++ if (err != EINTR) { ++ request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__); ++ return -err; ++ } ++ } ++ ++ if (cbuf.count != n) ++ request_warn(mbc->dc, "%s: Created %d of %d V4L2 buffers requested\n", __func__, cbuf.count, n); ++ ++ for (i = 0; i != cbuf.count; ++i) ++ qes[i]->base.index = cbuf.index + i; ++ ++ return cbuf.count; ++} ++ +static MediaBufsStatus +qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, const struct v4l2_format *const fmt, + const unsigned int n, const bool x_dmabuf) @@ -25561,8 +11224,10 @@ index 980b306b8a72..910ac77bb6f9 100644 + .plane = i, + .flags = O_RDWR, // *** Arguably O_RDONLY would be fine + }; -+ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) ++ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) { + be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length); ++ close(xbuf.fd); // dmabuf_import dups the fd so close this one ++ } + } + else { + be->dh[i] = dmabuf_import_mmap( @@ -25610,35 +11275,36 @@ index 980b306b8a72..910ac77bb6f9 100644 + return 0; +} + - struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc) - { - struct qent_dst * be_dst; - - if (mbc == NULL) { -- be_dst = qe_dst_new(NULL); ++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc) ++{ ++ struct qent_dst * be_dst; ++ ++ if (mbc == NULL) { + be_dst = qe_dst_new(NULL, MEDIABUFS_MEMORY_DMABUF); - if (be_dst) - be_dst->base.status = QENT_IMPORT; - return be_dst; -@@ -1144,7 +1265,7 @@ struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struc - else { - be_dst = base_to_dst(queue_tryget_free(mbc->dst)); - if (!be_dst) { -- be_dst = qe_dst_new(mbc->this_wlm); ++ if (be_dst) ++ be_dst->base.status = QENT_IMPORT; ++ return be_dst; ++ } ++ ++ if (mbc->dst_fixed) { ++ be_dst = base_to_dst(queue_get_free(mbc->dst)); ++ if (!be_dst) ++ return NULL; ++ } ++ else { ++ be_dst = base_to_dst(queue_tryget_free(mbc->dst)); ++ if (!be_dst) { + be_dst = qe_dst_new(mbc->this_wlm, mbc->dst->memtype); - if (!be_dst) - return NULL; - -@@ -1155,12 +1276,21 @@ struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struc - } - } - -- if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) { -- /* Given how create buf works we can't uncreate it on alloc failure -- * all we can do is put it on the free Q -- */ -- queue_put_free(mbc->dst, &be_dst->base); -- return NULL; ++ if (!be_dst) ++ return NULL; ++ ++ if (create_dst_bufs(mbc, 1, &be_dst) != 1) { ++ qe_dst_free(be_dst); ++ return NULL; ++ } ++ } ++ } ++ + if (mbc->dst->memtype == MEDIABUFS_MEMORY_MMAP) { + if (qe_import_from_buf(mbc, &be_dst->base, &mbc->dst_fmt, be_dst->base.index, true)) { + request_err(mbc->dc, "Failed to export as dmabuf\n"); @@ -25654,36 +11320,107 @@ index 980b306b8a72..910ac77bb6f9 100644 + queue_put_free(mbc->dst, &be_dst->base); + return NULL; + } - } - - be_dst->base.status = QENT_PENDING; -@@ -1208,7 +1338,7 @@ MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, - - // ** This is a mess if we get partial alloc but without any way to remove - // individual V4L2 Q members we are somewhat stuffed --MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed) ++ } ++ ++ be_dst->base.status = QENT_PENDING; ++ atomic_store(&be_dst->base.ref_count, 0); ++ return be_dst; ++} ++ ++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc) ++{ ++ return &mbc->dst_fmt; ++} ++ ++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, ++ const unsigned int width, ++ const unsigned int height, ++ mediabufs_dst_fmt_accept_fn *const accept_fn, ++ void *const accept_v) ++{ ++ MediaBufsStatus status; ++ unsigned int i; ++ const enum v4l2_buf_type buf_type = mbc->dst_fmt.type; ++ static const struct { ++ unsigned int flags_must; ++ unsigned int flags_not; ++ } trys[] = { ++ {0, V4L2_FMT_FLAG_EMULATED}, ++ {V4L2_FMT_FLAG_EMULATED, 0}, ++ }; ++ for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) { ++ status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd, ++ buf_type, ++ trys[i].flags_must, ++ trys[i].flags_not, ++ width, height, accept_fn, accept_v); ++ if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE) ++ return status; ++ } ++ ++ if (status != MEDIABUFS_STATUS_SUCCESS) ++ return status; ++ ++ /* Try to create a buffer - don't alloc */ ++ return status; ++} ++ ++// ** This is a mess if we get partial alloc but without any way to remove ++// individual V4L2 Q members we are somewhat stuffed +MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype) - { - unsigned int i; - int a = 0; -@@ -1218,10 +1348,12 @@ MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, cons - if (n > 32) - return MEDIABUFS_ERROR_ALLOCATION_FAILED; - ++{ ++ unsigned int i; ++ int a = 0; ++ unsigned int qc; ++ struct qent_dst * qes[32]; ++ ++ if (n > 32) ++ return MEDIABUFS_ERROR_ALLOCATION_FAILED; ++ + mbc->dst->memtype = memtype; + - // Create qents first as it is hard to get rid of the V4L2 buffers on error - for (qc = 0; qc != n; ++qc) - { -- if ((qes[qc] = qe_dst_new(mbc->this_wlm)) == NULL) ++ // Create qents first as it is hard to get rid of the V4L2 buffers on error ++ for (qc = 0; qc != n; ++qc) ++ { + if ((qes[qc] = qe_dst_new(mbc->this_wlm, mbc->dst->memtype)) == NULL) - goto fail; - } - -@@ -1260,19 +1392,61 @@ void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src * - queue_put_free(mbc->src, &qe_src->base); - } - ++ goto fail; ++ } ++ ++ if ((a = create_dst_bufs(mbc, n, qes)) < 0) ++ goto fail; ++ ++ for (i = 0; i != a; ++i) ++ queue_put_free(mbc->dst, &qes[i]->base); ++ ++ if (a != n) ++ goto fail; ++ ++ mbc->dst_fixed = fixed; ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++fail: ++ for (i = (a < 0 ? 0 : a); i != qc; ++i) ++ qe_dst_free(qes[i]); ++ ++ return MEDIABUFS_ERROR_ALLOCATION_FAILED; ++} ++ ++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc) ++{ ++ struct qent_base * buf = queue_get_free(mbc->src); ++ buf->status = QENT_PENDING; ++ return base_to_src(buf); ++} ++ ++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src) ++{ ++ struct qent_src *const qe_src = *pqe_src; ++ if (!qe_src) ++ return; ++ *pqe_src = NULL; ++ queue_put_free(mbc->src, &qe_src->base); ++} ++ +static MediaBufsStatus +chk_memory_type(struct mediabufs_ctl *const mbc, + const struct v4l2_format * const f, @@ -25725,37 +11462,38 @@ index 980b306b8a72..910ac77bb6f9 100644 + return chk_memory_type(mbc, &mbc->dst_fmt, memtype); +} + - /* src format must have been set up before this */ - MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc, - struct dmabufs_ctl * const dbsc, -- unsigned int n) ++/* src format must have been set up before this */ ++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc, ++ struct dmabufs_ctl * const dbsc, + unsigned int n, const enum mediabufs_memory memtype) - { - unsigned int i; - struct v4l2_requestbuffers req = { - .count = n, - .type = mbc->src_fmt.type, -- .memory = V4L2_MEMORY_DMABUF ++{ ++ unsigned int i; ++ struct v4l2_requestbuffers req = { ++ .count = n, ++ .type = mbc->src_fmt.type, + .memory = mediabufs_memory_to_v4l2(memtype) - }; - - bq_free_all_free_src(mbc->src); ++ }; + - while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) { - if (errno != EINTR) { - request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__); -@@ -1286,21 +1460,36 @@ MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc, - } - - for (i = 0; i != n; ++i) { -- struct qent_src *const be_src = qe_src_new(); ++ bq_free_all_free_src(mbc->src); ++ ++ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) { ++ if (errno != EINTR) { ++ request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ } ++ ++ if (n > req.count) { ++ request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n); ++ n = req.count; ++ } ++ ++ for (i = 0; i != n; ++i) { + struct qent_src *const be_src = qe_src_new(memtype); - if (!be_src) { - request_err(mbc->dc, "Failed to create src be %d\n", i); - goto fail; - } -- if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) { -- qe_src_free(be_src); ++ if (!be_src) { ++ request_err(mbc->dc, "Failed to create src be %d\n", i); ++ goto fail; ++ } + switch (memtype) { + case MEDIABUFS_MEMORY_MMAP: + if (qe_import_from_buf(mbc, &be_src->base, &mbc->src_fmt, i, false)) { @@ -25773,48 +11511,395 @@ index 980b306b8a72..910ac77bb6f9 100644 + break; + default: + request_err(mbc->dc, "Unexpected memorty type\n"); - goto fail; - } - be_src->base.index = i; -- be_src->fixed_size = !mediabufs_src_resizable(mbc); - - queue_put_free(mbc->src, &be_src->base); - } - ++ goto fail; ++ } ++ be_src->base.index = i; ++ ++ queue_put_free(mbc->src, &be_src->base); ++ } ++ + mbc->src->memtype = memtype; - return MEDIABUFS_STATUS_SUCCESS; - - fail: -@@ -1437,9 +1626,13 @@ int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ - - int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc) - { ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++fail: ++ bq_free_all_free_src(mbc->src); ++ req.count = 0; ++ while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 && ++ errno == EINTR) ++ /* Loop */; ++ ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++} ++ ++ ++ ++/* ++ * Set stuff order: ++ * Set src fmt ++ * Set parameters (sps) on vfd ++ * Negotiate dst format (dst_fmt_set) ++ * Create src buffers ++ * Alloc a dst buffer or Create dst slots ++*/ ++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc) ++{ ++ if (mbc->stream_on) ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++ if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) { ++ request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) { ++ request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type); ++ set_stream(mbc->vfd, mbc->src_fmt.type, false); ++ return MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ mbc->stream_on = true; ++ return MEDIABUFS_STATUS_SUCCESS; ++} ++ ++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc) ++{ ++ MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS; ++ ++ if (!mbc->stream_on) ++ return MEDIABUFS_STATUS_SUCCESS; ++ ++ if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) { ++ request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type); ++ status = MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) { ++ request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type); ++ status = MEDIABUFS_ERROR_OPERATION_FAILED; ++ } ++ ++ mbc->stream_on = false; ++ return status; ++} ++ ++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n) ++{ ++ struct v4l2_ext_controls controls = { ++ .controls = control_array, ++ .count = n ++ }; ++ ++ if (mreq) { ++ controls.which = V4L2_CTRL_WHICH_REQUEST_VAL; ++ controls.request_fd = media_request_fd(mreq); ++ } ++ ++ while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls)) ++ { ++ const int err = errno; ++ if (err != EINTR) { ++ request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err)); ++ return -err; ++ } ++ } ++ ++ return 0; ++} ++ ++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, ++ struct media_request * const mreq, ++ unsigned int id, void *data, ++ unsigned int size) ++{ ++ struct v4l2_ext_control control = { ++ .id = id, ++ .ptr = data, ++ .size = size ++ }; ++ ++ int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1); ++ return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED; ++} ++ ++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, ++ enum v4l2_buf_type buf_type, ++ const uint32_t pixfmt, ++ const uint32_t width, const uint32_t height, ++ const size_t bufsize) ++{ ++ MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize); ++ if (rv != MEDIABUFS_STATUS_SUCCESS) ++ request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height); ++ ++ return rv; ++} ++ ++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n) ++{ ++ int rv = 0; ++ while (n--) { ++ while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) { ++ const int err = errno; ++ if (err != EINTR) { ++ // Often used for probing - errors are to be expected ++ request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err); ++ ctrls->type = 0; // 0 is invalid ++ rv = -err; ++ break; ++ } ++ } ++ ++ctrls; ++ } ++ return rv; ++} ++ ++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc) ++{ +#if 1 + return 0; +#else - // Single planar OUTPUT can only take exact size buffers - // Multiplanar will take larger than negotiated - return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type); ++ // Single planar OUTPUT can only take exact size buffers ++ // Multiplanar will take larger than negotiated ++ return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type); +#endif - } - - static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc) ++} ++ ++static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc) ++{ ++ if (!mbc) ++ return; ++ ++ // Break the weak link first ++ ff_weak_link_break(&mbc->this_wlm); ++ ++ polltask_delete(&mbc->pt); ++ ++ mediabufs_stream_off(mbc); ++ ++ // Empty v4l2 buffer stash ++ request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0); ++ request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0); ++ ++ bq_free_all_free_src(mbc->src); ++ bq_free_all_inuse_src(mbc->src); ++ bq_free_all_free_dst(mbc->dst); ++ ++ { ++ struct qent_dst *dst_be; ++ while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) { ++ dst_be->base.timestamp = (struct timeval){0}; ++ dst_be->base.status = QENT_ERROR; ++ qe_dst_done(dst_be); ++ } ++ } ++ ++ queue_delete(mbc->dst); ++ queue_delete(mbc->src); ++ close(mbc->vfd); ++ pthread_mutex_destroy(&mbc->lock); ++ ++ free(mbc); ++} ++ ++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc) ++{ ++ atomic_fetch_add(&mbc->ref_count, 1); ++ return mbc; ++} ++ ++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc) ++{ ++ struct mediabufs_ctl *const mbc = *pmbc; ++ int n; ++ ++ if (!mbc) ++ return; ++ *pmbc = NULL; ++ n = atomic_fetch_sub(&mbc->ref_count, 1); ++ if (n) ++ return; ++ mediabufs_ctl_delete(mbc); ++} ++ ++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc) ++{ ++ return mbc->capability.version; ++} ++ ++static int set_capabilities(struct mediabufs_ctl *const mbc) ++{ ++ uint32_t caps; ++ ++ if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &mbc->capability)) { ++ int err = errno; ++ request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err)); ++ return -err; ++ } ++ ++ caps = (mbc->capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ? ++ mbc->capability.device_caps : ++ mbc->capability.capabilities; ++ ++ if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) { ++ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; ++ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; ++ } ++ else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) { ++ mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; ++ mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; ++ } ++ else { ++ request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/* One of these per context */ ++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq) ++{ ++ struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc)); ++ ++ if (!mbc) ++ return NULL; ++ ++ mbc->dc = dc; ++ // Default mono planar ++ mbc->pq = pq; ++ pthread_mutex_init(&mbc->lock, NULL); ++ ++ /* Pick a default - could we scan for this? */ ++ if (vpath == NULL) ++ vpath = "/dev/media0"; ++ ++ while ((mbc->vfd = open(vpath, O_RDWR)) == -1) ++ { ++ const int err = errno; ++ if (err != EINTR) { ++ request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err)); ++ goto fail0; ++ } ++ } ++ ++ if (set_capabilities(mbc)) { ++ request_err(dc, "Bad capabilities for video dev '%s'\n", vpath); ++ goto fail1; ++ } ++ ++ mbc->src = queue_new(mbc->vfd); ++ if (!mbc->src) ++ goto fail1; ++ mbc->dst = queue_new(mbc->vfd); ++ if (!mbc->dst) ++ goto fail2; ++ mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc); ++ if (!mbc->pt) ++ goto fail3; ++ mbc->this_wlm = ff_weak_link_new(mbc); ++ if (!mbc->this_wlm) ++ goto fail4; ++ ++ /* Cannot add polltask now - polling with nothing pending ++ * generates infinite error polls ++ */ ++ return mbc; ++ ++fail4: ++ polltask_delete(&mbc->pt); ++fail3: ++ queue_delete(mbc->dst); ++fail2: ++ queue_delete(mbc->src); ++fail1: ++ close(mbc->vfd); ++fail0: ++ free(mbc); ++ request_info(dc, "%s: FAILED\n", __func__); ++ return NULL; ++} ++ ++ ++ diff --git a/libavcodec/v4l2_req_media.h b/libavcodec/v4l2_req_media.h -index 0307a831defd..890947b2e210 100644 ---- a/libavcodec/v4l2_req_media.h +new file mode 100644 +index 000000000000..0f1c79fb4ee1 +--- /dev/null +++ b/libavcodec/v4l2_req_media.h -@@ -43,6 +43,7 @@ typedef enum media_buf_status { - MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE, - MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT, - MEDIABUFS_ERROR_ALLOCATION_FAILED, +@@ -0,0 +1,171 @@ ++/* ++e.h ++* ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the ++ * "Software"), to deal in the Software without restriction, including ++ * without limitation the rights to use, copy, modify, merge, publish, ++ * distribute, sub license, and/or sell copies of the Software, and to ++ * permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice (including the ++ * next paragraph) shall be included in all copies or substantial portions ++ * of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. ++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef AVCODEC_V4L2_REQ_MEDIA_H ++#define AVCODEC_V4L2_REQ_MEDIA_H ++ ++#include ++#include ++ ++struct v4l2_format; ++struct v4l2_fmtdesc; ++struct v4l2_query_ext_ctrl; ++ ++struct pollqueue; ++struct media_request; ++struct media_pool; ++ ++typedef enum media_buf_status { ++ MEDIABUFS_STATUS_SUCCESS = 0, ++ MEDIABUFS_ERROR_OPERATION_FAILED, ++ MEDIABUFS_ERROR_DECODING_ERROR, ++ MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE, ++ MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT, ++ MEDIABUFS_ERROR_ALLOCATION_FAILED, + MEDIABUFS_ERROR_UNSUPPORTED_MEMORY, - } MediaBufsStatus; - - struct media_pool * media_pool_new(const char * const media_path, -@@ -70,6 +71,15 @@ struct qent_dst; - struct dmabuf_h; - struct dmabufs_ctl; - ++} MediaBufsStatus; ++ ++struct media_pool * media_pool_new(const char * const media_path, ++ struct pollqueue * const pq, ++ const unsigned int n); ++void media_pool_delete(struct media_pool ** pmp); ++ ++// Obtain a media request ++// Will block if none availible - has a 2sec timeout ++struct media_request * media_request_get(struct media_pool * const mp); ++int media_request_fd(const struct media_request * const req); ++ ++// Start this request ++// Request structure is returned to pool once done ++int media_request_start(struct media_request * const req); ++ ++// Return an *unstarted* media_request to the pool ++// May later be upgraded to allow for aborting a started req ++int media_request_abort(struct media_request ** const preq); ++ ++ ++struct mediabufs_ctl; ++struct qent_src; ++struct qent_dst; ++struct dmabuf_h; ++struct dmabufs_ctl; ++ +// 1-1 mammping to V4L2 type - just defined separetely to avoid some include versioning difficulties +enum mediabufs_memory { + MEDIABUFS_MEMORY_UNSET = 0, @@ -25824,60 +11909,834 @@ index 0307a831defd..890947b2e210 100644 + MEDIABUFS_MEMORY_DMABUF = 4, +}; + - int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp); - struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst); - -@@ -93,6 +103,8 @@ MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, - unsigned int plane, - int fd, size_t size); - ++int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp); ++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst); ++ ++// prealloc ++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc); ++// dbsc may be NULL if realloc not required ++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc); ++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane); ++int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane); ++MediaBufsStatus qent_dst_wait(struct qent_dst *const be); ++void qent_dst_delete(struct qent_dst *const be); ++// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead ++void qent_dst_unref(struct qent_dst ** const pbe_dst); ++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst); ++ ++const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no); ++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be); ++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be); ++/* Import an fd unattached to any mediabuf */ ++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst, ++ unsigned int plane, ++ int fd, size_t size); ++ +const char * mediabufs_memory_name(const enum mediabufs_memory m); + - MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, - struct media_request **const pmreq, - struct qent_src **const psrc_be, -@@ -106,7 +118,7 @@ struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, - // Create dst slots without alloc - // If fixed true then qent_alloc will only get slots from this pool and will - // block until a qent has been unrefed --MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed); ++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc, ++ struct media_request **const pmreq, ++ struct qent_src **const psrc_be, ++ struct qent_dst *const dst_be, ++ const bool is_final); ++// Get / alloc a dst buffer & associate with a slot ++// If the dst pool is empty then behaviour depends on the fixed flag passed to ++// dst_slots_create. Default is !fixed = unlimited alloc ++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, ++ struct dmabufs_ctl *const dbsc); ++// Create dst slots without alloc ++// If fixed true then qent_alloc will only get slots from this pool and will ++// block until a qent has been unrefed +MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, const unsigned int n, const bool fixed, const enum mediabufs_memory memtype); - - MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc); - MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc); -@@ -140,7 +152,12 @@ MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, - - MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw, - struct dmabufs_ctl * const dbsc, -- unsigned int n); ++ ++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc); ++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc); ++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc); ++ ++typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc); ++ ++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc, ++ const unsigned int width, ++ const unsigned int height, ++ mediabufs_dst_fmt_accept_fn *const accept_fn, ++ void *const accept_v); ++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc); ++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src); ++ ++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, ++ struct v4l2_ext_control control_array[], unsigned int n); ++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc, ++ struct media_request * const mreq, ++ unsigned int id, void *data, ++ unsigned int size); ++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n); ++ ++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc); ++ ++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc, ++ enum v4l2_buf_type buf_type, ++ const uint32_t pixfmt, ++ const uint32_t width, const uint32_t height, ++ const size_t bufsize); ++ ++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw, ++ struct dmabufs_ctl * const dbsc, + unsigned int n, + const enum mediabufs_memory memtype); + +// Want to have appropriate formats set first +MediaBufsStatus mediabufs_src_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype); +MediaBufsStatus mediabufs_dst_chk_memtype(struct mediabufs_ctl *const mbc, const enum mediabufs_memory memtype); - - #define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c)) - unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc); ++ ++#define MEDIABUFS_DRIVER_VERSION(a, b, c) (((a) << 16) | ((b) << 8) | (c)) ++unsigned int mediabufs_ctl_driver_version(struct mediabufs_ctl *const mbc); ++ ++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, ++ const char *vpath, struct pollqueue *const pq); ++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc); ++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc); ++ ++ ++#endif +diff --git a/libavcodec/v4l2_req_pollqueue.c b/libavcodec/v4l2_req_pollqueue.c +new file mode 100644 +index 000000000000..4b4984e5b064 +--- /dev/null ++++ b/libavcodec/v4l2_req_pollqueue.c +@@ -0,0 +1,385 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "v4l2_req_pollqueue.h" ++#include "v4l2_req_utils.h" ++ ++ ++struct pollqueue; ++ ++enum polltask_state { ++ POLLTASK_UNQUEUED = 0, ++ POLLTASK_QUEUED, ++ POLLTASK_RUNNING, ++ POLLTASK_Q_KILL, ++ POLLTASK_RUN_KILL, ++}; ++ ++struct polltask { ++ struct polltask *next; ++ struct polltask *prev; ++ struct pollqueue *q; ++ enum polltask_state state; ++ ++ int fd; ++ short events; ++ ++ void (*fn)(void *v, short revents); ++ void * v; ++ ++ uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */ ++ sem_t kill_sem; ++}; ++ ++struct pollqueue { ++ atomic_int ref_count; ++ pthread_mutex_t lock; ++ ++ struct polltask *head; ++ struct polltask *tail; ++ ++ bool kill; ++ bool no_prod; ++ int prod_fd; ++ struct polltask *prod_pt; ++ pthread_t worker; ++}; ++ ++struct polltask *polltask_new(struct pollqueue *const pq, ++ const int fd, const short events, ++ void (*const fn)(void *v, short revents), ++ void *const v) ++{ ++ struct polltask *pt; ++ ++ if (!events) ++ return NULL; ++ ++ pt = malloc(sizeof(*pt)); ++ if (!pt) ++ return NULL; ++ ++ *pt = (struct polltask){ ++ .next = NULL, ++ .prev = NULL, ++ .q = pollqueue_ref(pq), ++ .fd = fd, ++ .events = events, ++ .fn = fn, ++ .v = v ++ }; ++ ++ sem_init(&pt->kill_sem, 0, 0); ++ ++ return pt; ++} ++ ++static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt) ++{ ++ if (pt->prev) ++ pt->prev->next = pt->next; ++ else ++ pq->head = pt->next; ++ if (pt->next) ++ pt->next->prev = pt->prev; ++ else ++ pq->tail = pt->prev; ++ pt->next = NULL; ++ pt->prev = NULL; ++} ++ ++static void polltask_free(struct polltask * const pt) ++{ ++ sem_destroy(&pt->kill_sem); ++ free(pt); ++} ++ ++static int pollqueue_prod(const struct pollqueue *const pq) ++{ ++ static const uint64_t one = 1; ++ return write(pq->prod_fd, &one, sizeof(one)); ++} ++ ++void polltask_delete(struct polltask **const ppt) ++{ ++ struct polltask *const pt = *ppt; ++ struct pollqueue * pq; ++ enum polltask_state state; ++ bool prodme; ++ ++ if (!pt) ++ return; ++ ++ pq = pt->q; ++ pthread_mutex_lock(&pq->lock); ++ state = pt->state; ++ pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL; ++ prodme = !pq->no_prod; ++ pthread_mutex_unlock(&pq->lock); ++ ++ if (state != POLLTASK_UNQUEUED) { ++ if (prodme) ++ pollqueue_prod(pq); ++ while (sem_wait(&pt->kill_sem) && errno == EINTR) ++ /* loop */; ++ } ++ ++ // Leave zapping the ref until we have DQed the PT as might well be ++ // legitimately used in it ++ *ppt = NULL; ++ polltask_free(pt); ++ pollqueue_unref(&pq); ++} ++ ++static uint64_t pollqueue_now(int timeout) ++{ ++ struct timespec now; ++ uint64_t now_ms; ++ ++ if (clock_gettime(CLOCK_MONOTONIC, &now)) ++ return 0; ++ now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout; ++ return now_ms ? now_ms : (uint64_t)1; ++} ++ ++void pollqueue_add_task(struct polltask *const pt, const int timeout) ++{ ++ bool prodme = false; ++ struct pollqueue * const pq = pt->q; ++ ++ pthread_mutex_lock(&pq->lock); ++ if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) { ++ if (pq->tail) ++ pq->tail->next = pt; ++ else ++ pq->head = pt; ++ pt->prev = pq->tail; ++ pt->next = NULL; ++ pt->state = POLLTASK_QUEUED; ++ pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout); ++ pq->tail = pt; ++ prodme = !pq->no_prod; ++ } ++ pthread_mutex_unlock(&pq->lock); ++ if (prodme) ++ pollqueue_prod(pq); ++} ++ ++static void *poll_thread(void *v) ++{ ++ struct pollqueue *const pq = v; ++ struct pollfd *a = NULL; ++ size_t asize = 0; ++ ++ pthread_mutex_lock(&pq->lock); ++ do { ++ unsigned int i; ++ unsigned int n = 0; ++ struct polltask *pt; ++ struct polltask *pt_next; ++ uint64_t now = pollqueue_now(0); ++ int timeout = -1; ++ int rv; ++ ++ for (pt = pq->head; pt; pt = pt_next) { ++ int64_t t; ++ ++ pt_next = pt->next; ++ ++ if (pt->state == POLLTASK_Q_KILL) { ++ pollqueue_rem_task(pq, pt); ++ sem_post(&pt->kill_sem); ++ continue; ++ } ++ ++ if (n >= asize) { ++ asize = asize ? asize * 2 : 4; ++ a = realloc(a, asize * sizeof(*a)); ++ if (!a) { ++ request_log("Failed to realloc poll array to %zd\n", asize); ++ goto fail_locked; ++ } ++ } ++ ++ a[n++] = (struct pollfd){ ++ .fd = pt->fd, ++ .events = pt->events ++ }; ++ ++ t = (int64_t)(pt->timeout - now); ++ if (pt->timeout && t < INT_MAX && ++ (timeout < 0 || (int)t < timeout)) ++ timeout = (t < 0) ? 0 : (int)t; ++ } ++ pthread_mutex_unlock(&pq->lock); ++ ++ if ((rv = poll(a, n, timeout)) == -1) { ++ if (errno != EINTR) { ++ request_log("Poll error: %s\n", strerror(errno)); ++ goto fail_unlocked; ++ } ++ } ++ ++ pthread_mutex_lock(&pq->lock); ++ now = pollqueue_now(0); ++ ++ /* Prodding in this loop is pointless and might lead to ++ * infinite looping ++ */ ++ pq->no_prod = true; ++ for (i = 0, pt = pq->head; i < n; ++i, pt = pt_next) { ++ pt_next = pt->next; ++ ++ /* Pending? */ ++ if (a[i].revents || ++ (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) { ++ pollqueue_rem_task(pq, pt); ++ if (pt->state == POLLTASK_QUEUED) ++ pt->state = POLLTASK_RUNNING; ++ if (pt->state == POLLTASK_Q_KILL) ++ pt->state = POLLTASK_RUN_KILL; ++ pthread_mutex_unlock(&pq->lock); ++ ++ /* This can add new entries to the Q but as ++ * those are added to the tail our existing ++ * chain remains intact ++ */ ++ pt->fn(pt->v, a[i].revents); ++ ++ pthread_mutex_lock(&pq->lock); ++ if (pt->state == POLLTASK_RUNNING) ++ pt->state = POLLTASK_UNQUEUED; ++ if (pt->state == POLLTASK_RUN_KILL) ++ sem_post(&pt->kill_sem); ++ } ++ } ++ pq->no_prod = false; ++ ++ } while (!pq->kill); ++ ++fail_locked: ++ pthread_mutex_unlock(&pq->lock); ++fail_unlocked: ++ free(a); ++ return NULL; ++} ++ ++static void prod_fn(void *v, short revents) ++{ ++ struct pollqueue *const pq = v; ++ char buf[8]; ++ if (revents) ++ read(pq->prod_fd, buf, 8); ++ if (!pq->kill) ++ pollqueue_add_task(pq->prod_pt, -1); ++} ++ ++struct pollqueue * pollqueue_new(void) ++{ ++ struct pollqueue *pq = malloc(sizeof(*pq)); ++ if (!pq) ++ return NULL; ++ *pq = (struct pollqueue){ ++ .ref_count = ATOMIC_VAR_INIT(0), ++ .lock = PTHREAD_MUTEX_INITIALIZER, ++ .head = NULL, ++ .tail = NULL, ++ .kill = false, ++ .prod_fd = -1 ++ }; ++ ++ pq->prod_fd = eventfd(0, EFD_NONBLOCK); ++ if (pq->prod_fd == 1) ++ goto fail1; ++ pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq); ++ if (!pq->prod_pt) ++ goto fail2; ++ pollqueue_add_task(pq->prod_pt, -1); ++ if (pthread_create(&pq->worker, NULL, poll_thread, pq)) ++ goto fail3; ++ // Reset ref count which will have been inced by the add_task ++ atomic_store(&pq->ref_count, 0); ++ return pq; ++ ++fail3: ++ polltask_free(pq->prod_pt); ++fail2: ++ close(pq->prod_fd); ++fail1: ++ free(pq); ++ return NULL; ++} ++ ++static void pollqueue_free(struct pollqueue *const pq) ++{ ++ void *rv; ++ ++ pthread_mutex_lock(&pq->lock); ++ pq->kill = true; ++ pollqueue_prod(pq); ++ pthread_mutex_unlock(&pq->lock); ++ ++ pthread_join(pq->worker, &rv); ++ polltask_free(pq->prod_pt); ++ pthread_mutex_destroy(&pq->lock); ++ close(pq->prod_fd); ++ free(pq); ++} ++ ++struct pollqueue * pollqueue_ref(struct pollqueue *const pq) ++{ ++ atomic_fetch_add(&pq->ref_count, 1); ++ return pq; ++} ++ ++void pollqueue_unref(struct pollqueue **const ppq) ++{ ++ struct pollqueue * const pq = *ppq; ++ ++ if (!pq) ++ return; ++ *ppq = NULL; ++ ++ if (atomic_fetch_sub(&pq->ref_count, 1) != 0) ++ return; ++ ++ pollqueue_free(pq); ++} ++ ++ ++ +diff --git a/libavcodec/v4l2_req_pollqueue.h b/libavcodec/v4l2_req_pollqueue.h +new file mode 100644 +index 000000000000..9634f33d48fd +--- /dev/null ++++ b/libavcodec/v4l2_req_pollqueue.h +@@ -0,0 +1,42 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef AVCODEC_V4L2_REQ_POLLQUEUE_H ++#define AVCODEC_V4L2_REQ_POLLQUEUE_H ++ ++struct polltask; ++struct pollqueue; ++ ++struct polltask *polltask_new(struct pollqueue *const pq, ++ const int fd, const short events, ++ void (*const fn)(void *v, short revents), ++ void *const v); ++void polltask_delete(struct polltask **const ppt); ++ ++void pollqueue_add_task(struct polltask *const pt, const int timeout); ++struct pollqueue * pollqueue_new(void); ++void pollqueue_unref(struct pollqueue **const ppq); ++struct pollqueue * pollqueue_ref(struct pollqueue *const pq); ++ ++#endif /* AVCODEC_V4L2_REQ_POLLQUEUE_H_ */ +diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h +new file mode 100644 +index 000000000000..a6160c5e1c3b +--- /dev/null ++++ b/libavcodec/v4l2_req_utils.h +@@ -0,0 +1,51 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef AVCODEC_V4L2_REQ_UTILS_H ++#define AVCODEC_V4L2_REQ_UTILS_H ++ ++#include ++#include "libavutil/log.h" ++ ++#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__) ++ ++#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__) ++#define request_warn(_ctx, ...) av_log(_ctx, AV_LOG_WARNING, __VA_ARGS__) ++#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__) ++#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__) ++ ++static inline char safechar(char c) { ++ return c > 0x20 && c < 0x7f ? c : '.'; ++} ++ ++static inline const char * strfourcc(char tbuf[5], uint32_t fcc) { ++ tbuf[0] = safechar((fcc >> 0) & 0xff); ++ tbuf[1] = safechar((fcc >> 8) & 0xff); ++ tbuf[2] = safechar((fcc >> 16) & 0xff); ++ tbuf[3] = safechar((fcc >> 24) & 0xff); ++ tbuf[4] = '\0'; ++ return tbuf; ++} ++ ++#endif diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -index cd79aad5631a..5cf17dd5e3fb 100644 ---- a/libavcodec/v4l2_request_hevc.c +new file mode 100644 +index 000000000000..94c647380364 +--- /dev/null +++ b/libavcodec/v4l2_request_hevc.c -@@ -144,6 +144,8 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - const struct decdev * decdev; - const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 1).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes - size_t src_size; +@@ -0,0 +1,410 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++ ++#include "config.h" ++#include "decode.h" ++#include "hevc/hevcdec.h" ++#include "hwaccel_internal.h" ++#include "hwconfig.h" ++#include "internal.h" ++ ++#include "v4l2_request_hevc.h" ++ ++#include "libavutil/hwcontext_drm.h" ++#include "libavutil/mem.h" ++#include "libavutil/pixdesc.h" ++ ++#include "v4l2_req_devscan.h" ++#include "v4l2_req_dmabufs.h" ++#include "v4l2_req_pollqueue.h" ++#include "v4l2_req_media.h" ++#include "v4l2_req_utils.h" ++ ++static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8) ++{ ++ const size_t wxh = w * h; ++ size_t bits_alloc; ++ ++ /* Annex A gives a min compression of 2 @ lvl 3.1 ++ * (wxh <= 983040) and min 4 thereafter but avoid ++ * the odity of 983041 having a lower limit than ++ * 983040. ++ * Multiply by 3/2 for 4:2:0 ++ */ ++ bits_alloc = wxh < 983040 ? wxh * 3 / 4 : ++ wxh < 983040 * 2 ? 983040 * 3 / 4 : ++ wxh * 3 / 8; ++ /* Allow for bit depth */ ++ bits_alloc += (bits_alloc * bits_minus8) / 8; ++ /* Add a few bytes (16k) for overhead */ ++ bits_alloc += 0x4000; ++ return bits_alloc; ++} ++ ++static int v4l2_req_hevc_start_frame(AVCodecContext *avctx, ++ av_unused const uint8_t *buffer, ++ av_unused uint32_t size) ++{ ++ V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data; ++ V4L2RequestContextHEVC *const ctx = priv->cctx; ++ return ctx->fns->start_frame(avctx, ctx, buffer, size); ++} ++ ++static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) ++{ ++ V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data; ++ V4L2RequestContextHEVC *const ctx = priv->cctx; ++ return ctx->fns->decode_slice(avctx, ctx, buffer, size); ++} ++ ++static int v4l2_req_hevc_end_frame(AVCodecContext *avctx) ++{ ++ V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data; ++ V4L2RequestContextHEVC *const ctx = priv->cctx; ++ return ctx->fns->end_frame(avctx, ctx); ++} ++ ++static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx) ++{ ++ V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data; ++ V4L2RequestContextHEVC *const ctx = priv->cctx; ++ ctx->fns->abort_frame(avctx, ctx); ++} ++ ++static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) ++{ ++ V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data; ++ V4L2RequestContextHEVC *const ctx = priv->cctx; ++ return ctx->fns->frame_params(avctx, ctx, hw_frames_ctx); ++} ++ ++static int v4l2_req_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame) ++{ ++ V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data; ++ V4L2RequestContextHEVC *const ctx = priv->cctx; ++ return ctx->fns->alloc_frame(avctx, ctx, frame); ++} ++ ++ ++static void ++cctx_free(void * v, uint8_t * data) ++{ ++ V4L2RequestContextHEVC *const ctx = (V4L2RequestContextHEVC *)data; ++ ++ mediabufs_ctl_unref(&ctx->mbufs); ++ media_pool_delete(&ctx->mpool); ++ pollqueue_unref(&ctx->pq); ++ dmabufs_ctl_unref(&ctx->dbufs); ++ devscan_delete(&ctx->devscan); ++ ++ decode_q_uninit(&ctx->decode_q); ++ ++ av_free(ctx); ++} ++ ++static int v4l2_request_hevc_uninit(AVCodecContext *avctx) ++{ ++ V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data; ++ ++ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++// decode_q_wait(&ctx->decode_q, NULL); // Wait for all other threads to be out of decode ++ ++ priv->cctx = NULL; ++ av_buffer_unref(&priv->cctx_buf); ++ ++// if (avctx->hw_frames_ctx) { ++// AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data; ++// av_buffer_pool_flush(hwfc->pool); ++// } ++ return 0; ++} ++ ++static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc) ++{ ++ AVCodecContext *const avctx = v; ++ const HEVCContext *const h = avctx->priv_data; ++ const HEVCPPS * const pps = h->pps; ++ const HEVCSPS * const sps = pps->sps; ++ ++ if (sps->bit_depth == 8) { ++ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 || ++ fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) { ++ return 1; ++ } ++ } ++ else if (sps->bit_depth == 10) { ++ if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) { ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++static int v4l2_request_hevc_init(AVCodecContext *avctx) ++{ ++ const HEVCContext *h = avctx->priv_data; ++ V4L2RequestPrivHEVC * const priv = avctx->internal->hwaccel_priv_data; ++ V4L2RequestContextHEVC * ctx; ++ const HEVCPPS * const pps = h->pps; ++ const HEVCSPS * const sps = pps->sps; ++ int ret; ++ const struct decdev * decdev; ++ const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes ++ size_t src_size; + enum mediabufs_memory src_memtype; + enum mediabufs_memory dst_memtype; - - av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); - -@@ -174,8 +176,14 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - decdev_media_path(decdev), decdev_video_path(decdev)); - - if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) { -- av_log(avctx, AV_LOG_ERROR, "Unable to open dmabufs\n"); -- goto fail0; ++ ++ av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ // Give up immediately if this is something that we have no code to deal with ++ if (sps->chroma_format_idc != 1) { ++ av_log(avctx, AV_LOG_WARNING, "chroma_format_idc(%d) != 1: Not implemented\n", sps->chroma_format_idc); ++ return AVERROR_PATCHWELCOME; ++ } ++ if (!(sps->bit_depth == 10 || sps->bit_depth == 8) || ++ sps->bit_depth != sps->bit_depth_chroma) { ++ av_log(avctx, AV_LOG_WARNING, "Bit depth Y:%d C:%d: Not implemented\n", sps->bit_depth, sps->bit_depth_chroma); ++ return AVERROR_PATCHWELCOME; ++ } ++ ++ if ((ctx = av_mallocz(sizeof(*ctx))) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to allocate context"); ++ return AVERROR(ENOMEM); ++ } ++ if ((priv->cctx_buf = av_buffer_create((uint8_t*)ctx, sizeof(*ctx), cctx_free, NULL, 0)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to allocate context buffer"); ++ av_free(ctx); ++ return AVERROR(ENOMEM); ++ } ++ priv->cctx = ctx; ++ ++ if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n"); ++ ret = AVERROR(-ret); ++ goto fail0; ++ } ++ ret = AVERROR(ENOMEM); // Assume mem fail by default for these ++ ++ if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL) ++ { ++ av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n"); ++ ret = AVERROR(ENODEV); ++ goto fail0; ++ } ++ av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n", ++ decdev_media_path(decdev), decdev_video_path(decdev)); ++ ++ if ((ctx->pq = pollqueue_new()) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n"); ++ goto fail1; ++ } ++ ++ if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n"); ++ goto fail2; ++ } ++ ++ if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) { ++ av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n"); ++ goto fail3; ++ } ++ ++ // Version test for functional Pi5 HEVC iommu. ++ // rpivid kernel patch was merged in 6.1.57 ++ // *** Remove when it is unlikely that there are any broken kernels left ++ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(6,1,57)) ++ ctx->dbufs = dmabufs_ctl_new_vidbuf_cached(); ++ else ++ ctx->dbufs = dmabufs_ctl_new(); ++ ++ if (ctx->dbufs == NULL) { + av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n"); + src_memtype = MEDIABUFS_MEMORY_MMAP; + dst_memtype = MEDIABUFS_MEMORY_MMAP; @@ -25886,24 +12745,26 @@ index cd79aad5631a..5cf17dd5e3fb 100644 + av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n"); + src_memtype = MEDIABUFS_MEMORY_DMABUF; + dst_memtype = MEDIABUFS_MEMORY_DMABUF; - } - - if ((ctx->pq = pollqueue_new()) == NULL) { -@@ -196,8 +204,9 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - // Ask for an initial bitbuf size of max size / 4 - // We will realloc if we need more - // Must use sps->h/w as avctx contains cropped size ++ } ++ ++ // Ask for an initial bitbuf size of max size / 4 ++ // We will realloc if we need more ++ // Must use sps->h/w as avctx contains cropped size +retry_src_memtype: - src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8); -- if (mediabufs_src_resizable(ctx->mbufs)) ++ src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8); + if (src_memtype == MEDIABUFS_MEMORY_DMABUF && mediabufs_src_resizable(ctx->mbufs)) - src_size /= 4; - // Kludge for conformance tests which break Annex A limits - else if (src_size < 0x40000) -@@ -210,6 +219,15 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - goto fail4; - } - ++ src_size /= 4; ++ // Kludge for conformance tests which break Annex A limits ++ else if (src_size < 0x40000) ++ src_size = 0x40000; ++ ++ if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt, ++ sps->width, sps->height, src_size)) { ++ char tbuf1[5]; ++ av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); ++ goto fail4; ++ } ++ + if (mediabufs_src_chk_memtype(ctx->mbufs, src_memtype)) { + if (src_memtype == MEDIABUFS_MEMORY_DMABUF) { + src_memtype = MEDIABUFS_MEMORY_MMAP; @@ -25913,22 +12774,43 @@ index cd79aad5631a..5cf17dd5e3fb 100644 + goto fail4; + } + - if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) { - av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n"); - ctx->fns = &V2(ff_v4l2_req_hevc, 4); -@@ -238,7 +256,7 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - goto fail4; - } - -- if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6)) { ++ if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) ++ ctx->fns = &V2(ff_v4l2_req_hevc, 4); ++#if CONFIG_V4L2_REQ_HEVC_VX ++ else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) ++ ctx->fns = &V2(ff_v4l2_req_hevc, 3); ++ else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) ++ ctx->fns = &V2(ff_v4l2_req_hevc, 2); ++ else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) ++ ctx->fns = &V2(ff_v4l2_req_hevc, 1); ++#endif ++ else { ++ av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n"); ++ ret = AVERROR(EINVAL); ++ goto fail4; ++ } ++ ++ av_log(avctx, AV_LOG_DEBUG, "%s probed successfully: driver v %#x\n", ++ ctx->fns->name, mediabufs_ctl_driver_version(ctx->mbufs)); ++ ++ if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) { ++ char tbuf1[5]; ++ av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); ++ goto fail4; ++ } ++ + if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6, src_memtype)) { - av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n"); - goto fail4; - } -@@ -250,8 +268,17 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering, - avctx->thread_count, avctx->extra_hw_frames); - ++ av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n"); ++ goto fail4; ++ } ++ ++ { ++ unsigned int dst_slots = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering + ++ avctx->thread_count + (avctx->extra_hw_frames > 0 ? avctx->extra_hw_frames : 6); ++ av_log(avctx, AV_LOG_DEBUG, "Slots=%d: Reordering=%d, threads=%d, hw+=%d\n", dst_slots, ++ sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering, ++ avctx->thread_count, avctx->extra_hw_frames); ++ + if (mediabufs_dst_chk_memtype(ctx->mbufs, dst_memtype)) { + if (dst_memtype != MEDIABUFS_MEMORY_DMABUF) { + av_log(avctx, AV_LOG_ERROR, "Failed to get dst memory type\n"); @@ -25938,488 +12820,2295 @@ index cd79aad5631a..5cf17dd5e3fb 100644 + dst_memtype = MEDIABUFS_MEMORY_MMAP; + } + - // extra_hw_frames is -1 if unset -- if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0))) { ++ // extra_hw_frames is -1 if unset + if (mediabufs_dst_slots_create(ctx->mbufs, dst_slots, (avctx->extra_hw_frames > 0), dst_memtype)) { - av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n"); - goto fail4; - } -@@ -277,9 +304,10 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - // Set our s/w format - avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format; - -- av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s\n", -+ av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s\n", - ctx->fns->name, -- decdev_media_path(decdev), decdev_video_path(decdev)); -+ decdev_media_path(decdev), decdev_video_path(decdev), -+ mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype)); - - return 0; - - -From 3087f58ab5abfac2a3d50359db08431a89d446df Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 22 Aug 2022 12:35:40 +0000 -Subject: [PATCH 064/186] Set buffer lengths on DQ - ---- - libavcodec/v4l2_req_media.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c -index 910ac77bb6f9..1a9944774a48 100644 ---- a/libavcodec/v4l2_req_media.c -+++ b/libavcodec/v4l2_req_media.c -@@ -733,6 +733,14 @@ static struct qent_base * qe_dequeue(struct buf_pool *const bp, - return NULL; - } - -+ if (mp) { -+ unsigned int i; -+ for (i = 0; i != buffer.length; ++i) -+ dmabuf_len_set(be->dh[i], V4L2_TYPE_IS_CAPTURE(f->type) ? planes[i].bytesused : 0); -+ } -+ else -+ dmabuf_len_set(be->dh[0], V4L2_TYPE_IS_CAPTURE(f->type) ? buffer.length : 0); -+ - be->timestamp = buffer.timestamp; - be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE; - return be; - -From d761ce983b4738df798b02636433bfc342e387c1 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 22 Aug 2022 17:11:24 +0000 -Subject: [PATCH 065/186] Fix compile if videodev2.h defines V4L2 HEVC request - API - -If videodev2.h does define the HEVC request API it is really hard to -set old variations of the controls so if it does then we only compile -against the system includes and remove the back compatability. ---- - configure | 9 +++++++++ - libavcodec/Makefile | 4 ++-- - libavcodec/hevc-ctrls-v4.h | 2 ++ - libavcodec/v4l2_req_hevc_vx.c | 5 ----- - libavcodec/v4l2_request_hevc.c | 6 ++++-- - 5 files changed, 17 insertions(+), 9 deletions(-) - -diff --git a/configure b/configure -index f3991452e4a5..055944934476 100755 ---- a/configure -+++ b/configure -@@ -1946,6 +1946,7 @@ FEATURE_LIST=" - swscale_alpha - vout_drm - vout_egl -+ v4l2_req_hevc_vx - " - - # this list should be kept in linking order -@@ -6912,6 +6913,14 @@ fi - - check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns - check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;" -+disable v4l2_req_hevc_vx -+if enabled hevc_v4l2request_hwaccel; then -+ enable v4l2_req_hevc_vx -+fi -+if enabled hevc_v4l2_request; then -+ disable v4l2_req_hevc_vx -+fi -+ - check_headers sys/videoio.h - test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index d433a712366f..11f183c9b9ba 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -999,8 +999,8 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL) += dxva2_hevc.o - OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL) += dxva2_hevc.o - OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL) += nvdec_hevc.o - OBJS-$(CONFIG_HEVC_QSV_HWACCEL) += qsvdec.o --OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o\ -- v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o v4l2_req_hevc_v4.o -+OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o v4l2_req_hevc_v4.o -+OBJS-$(CONFIG_V4L2_REQ_HEVC_VX) += v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o - OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o - OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o - OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o -diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h -index 7e05f6e7c39b..7829d8208435 100644 ---- a/libavcodec/hevc-ctrls-v4.h -+++ b/libavcodec/hevc-ctrls-v4.h -@@ -53,6 +53,8 @@ - #include - #include - -+#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ -+ - #define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) - #define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401) - #define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402) -diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c -index 5d083016f89a..e1bd5c6a1f09 100644 ---- a/libavcodec/v4l2_req_hevc_vx.c -+++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -40,11 +40,6 @@ - #define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B - #endif - --// Should be in videodev2 but we might not have a good enough one --#ifndef V4L2_PIX_FMT_HEVC_SLICE --#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ --#endif -- - #include "v4l2_request_hevc.h" - - #include "libavutil/hwcontext_drm.h" -diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -index 5cf17dd5e3fb..614a1b4d99e4 100644 ---- a/libavcodec/v4l2_request_hevc.c -+++ b/libavcodec/v4l2_request_hevc.c -@@ -17,7 +17,7 @@ - */ - - -- -+#include "config.h" - #include "decode.h" - #include "hevcdec.h" - #include "hwconfig.h" -@@ -142,7 +142,7 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - const HEVCSPS * const sps = h->ps.sps; - int ret; - const struct decdev * decdev; -- const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 1).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes -+ const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 4).src_pix_fmt_v4l2; // Assuming constant for all APIs but avoiding V4L2 includes - size_t src_size; - enum mediabufs_memory src_memtype; - enum mediabufs_memory dst_memtype; -@@ -232,6 +232,7 @@ retry_src_memtype: - av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n"); - ctx->fns = &V2(ff_v4l2_req_hevc, 4); - } -+#if CONFIG_V4L2_REQ_HEVC_VX - else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { - av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n"); - ctx->fns = &V2(ff_v4l2_req_hevc, 3); -@@ -244,6 +245,7 @@ retry_src_memtype: - av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n"); - ctx->fns = &V2(ff_v4l2_req_hevc, 1); - } -+#endif - else { - av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n"); - ret = AVERROR(EINVAL); - -From 8fff782ad6a053a67e3621ffaa06dfa6d6b6bba6 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 12 Sep 2022 17:59:22 +0100 -Subject: [PATCH 066/186] v4l2_m2m_enc: Send headers in in pkt side_data - -If GLOBAL_HEADERS are requested then we can't provide them at init time -so send as NEW_EXTRADATA side data in a similar way to some AV1 -encoders. ---- - libavcodec/v4l2_m2m_enc.c | 33 +++++++++++++++++++++++---------- - 1 file changed, 23 insertions(+), 10 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c -index 05ff6ba72655..099ad23928d3 100644 ---- a/libavcodec/v4l2_m2m_enc.c -+++ b/libavcodec/v4l2_m2m_enc.c -@@ -544,14 +544,12 @@ dequeue: - av_freep(&avctx->extradata); - avctx->extradata_size = 0; - -- if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) != NULL) -- memcpy(data, avpkt->data, len); -+ if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) -+ goto fail_no_mem; - -+ memcpy(data, avpkt->data, len); - av_packet_unref(avpkt); - -- if (data == NULL) -- return AVERROR(ENOMEM); -- - // We need to copy the header, but keep local if not global - if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) { - avctx->extradata = data; -@@ -567,18 +565,28 @@ dequeue: - } - - // First frame must be key so mark as such even if encoder forgot -- if (capture->first_buf == 2) -+ if (capture->first_buf == 2) { - avpkt->flags |= AV_PKT_FLAG_KEY; - -+ // Add any extradata to the 1st packet we emit as we cannot create it at init -+ if (avctx->extradata_size > 0 && avctx->extradata) { -+ void * const side = av_packet_new_side_data(avpkt, -+ AV_PKT_DATA_NEW_EXTRADATA, -+ avctx->extradata_size); -+ if (!side) -+ goto fail_no_mem; -+ -+ memcpy(side, avctx->extradata, avctx->extradata_size); ++ av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n"); ++ goto fail4; + } + } + - // Add SPS/PPS to the start of every key frame if non-global headers - if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) { - const size_t newlen = s->extdata_size + avpkt->size; - AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE); - -- if (buf == NULL) { -- av_packet_unref(avpkt); -- return AVERROR(ENOMEM); -- } -+ if (buf == NULL) -+ goto fail_no_mem; - - memcpy(buf->data, s->extdata_data, s->extdata_size); - memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size); -@@ -592,6 +600,11 @@ dequeue: - // av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret); - capture->first_buf = 0; - return 0; ++ if (mediabufs_stream_on(ctx->mbufs)) { ++ av_log(avctx, AV_LOG_ERROR, "Failed stream on\n"); ++ goto fail4; ++ } + -+fail_no_mem: -+ ret = AVERROR(ENOMEM); -+ av_packet_unref(avpkt); ++ if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n"); ++ goto fail4; ++ } ++ ++ if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed set controls\n"); ++ goto fail5; ++ } ++ ++ decode_q_init(&ctx->decode_q); ++ ++ // Set our s/w format ++ avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format; ++ ++ av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n", ++ ctx->fns->name, ++ decdev_media_path(decdev), decdev_video_path(decdev), ++ mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype), ++ av_get_pix_fmt_name(avctx->sw_pix_fmt)); ++ ++ return 0; ++ ++fail5: ++ av_buffer_unref(&avctx->hw_frames_ctx); ++fail4: ++fail3: ++fail2: ++fail1: ++fail0: ++ priv->cctx = NULL; ++ av_buffer_unref(&priv->cctx_buf); + return ret; - } - - static av_cold int v4l2_encode_init(AVCodecContext *avctx) - -From 9d4bafaf9c0f149c2ad4b4b26d5c55a4c2deaaa0 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 14 Sep 2022 15:44:10 +0000 -Subject: [PATCH 067/186] matroskaenc: Allow H264 SPS/PPS headers in packet - sidedata - ---- - libavformat/matroskaenc.c | 26 ++++++++++++++++++++++---- - 1 file changed, 22 insertions(+), 4 deletions(-) - -diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c -index 113541bd9a20..61e4c976ef76 100644 ---- a/libavformat/matroskaenc.c -+++ b/libavformat/matroskaenc.c -@@ -77,6 +77,10 @@ - - #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \ - ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER) ++} + -+/* Reserved size for H264 headers if not extant at init time */ -+#define MAX_H264_HEADER_SIZE 1024 ++static int ++v4l2_request_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) ++{ ++ V4L2RequestPrivHEVC * const spriv = src->internal->hwaccel_priv_data; ++ V4L2RequestPrivHEVC * const dpriv = dst->internal->hwaccel_priv_data; ++ int rv; + - #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \ - !(mkv)->is_live) ++ av_log(dst, AV_LOG_DEBUG, "<<< %s (%s)\n", __func__, dpriv->cctx_buf ? "old" : "new"); ++ ++ if ((rv = av_buffer_replace(&dpriv->cctx_buf, spriv->cctx_buf)) != 0) ++ return rv; ++ ++ dpriv->cctx = spriv->cctx; ++ return 0; ++} ++ ++static void ++v4l2_request_free_frame_priv(FFRefStructOpaque hwctx, void *data) ++{ ++ fprintf(stderr, "%s\n", __func__); ++} ++ ++const FFHWAccel ff_hevc_v4l2request_hwaccel = { ++ .p = { ++ .name = "hevc_v4l2request", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .id = AV_CODEC_ID_HEVC, ++ .pix_fmt = AV_PIX_FMT_DRM_PRIME, ++ }, ++ .alloc_frame = v4l2_req_hevc_alloc_frame, ++ .start_frame = v4l2_req_hevc_start_frame, ++ .decode_slice = v4l2_req_hevc_decode_slice, ++ .end_frame = v4l2_req_hevc_end_frame, ++ .abort_frame = v4l2_req_hevc_abort_frame, ++ .init = v4l2_request_hevc_init, ++ .uninit = v4l2_request_hevc_uninit, ++ .free_frame_priv = v4l2_request_free_frame_priv, ++ .frame_priv_data_size = 128, ++ .update_thread_context = v4l2_request_update_thread_context, ++ .priv_data_size = sizeof(V4L2RequestPrivHEVC), ++ .frame_params = v4l2_req_hevc_frame_params, ++ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, ++}; +diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h +new file mode 100644 +index 000000000000..9b41cbe9ceb3 +--- /dev/null ++++ b/libavcodec/v4l2_request_hevc.h +@@ -0,0 +1,131 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef AVCODEC_V4L2_REQUEST_HEVC_H ++#define AVCODEC_V4L2_REQUEST_HEVC_H ++ ++#include ++#include ++#include "v4l2_req_decode_q.h" ++ ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++ ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++ ++// P030 should be defined in drm_fourcc.h and hopefully will be sometime ++// in the future but until then... ++#ifndef DRM_FORMAT_P030 ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') ++#endif ++ ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++ ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++ ++#include ++#ifndef V4L2_CID_CODEC_BASE ++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE ++#endif ++ ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in drm_fourcc.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ ++#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY ++#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 ++#endif ++ ++#define VCAT(name, version) name##_v##version ++#define V2(n,v) VCAT(n, v) ++#define V(n) V2(n, HEVC_CTRLS_VERSION) ++ ++#define S2(x) #x ++#define STR(x) S2(x) ++ ++// 1 per decoder ++struct v4l2_req_decode_fns; ++ ++typedef struct V4L2RequestContextHEVC { ++// V4L2RequestContext base; ++ const struct v4l2_req_decode_fns * fns; ++ ++ unsigned int timestamp; // ?? maybe uint64_t ++ ++ int decode_mode; ++ int start_code; ++ unsigned int max_slices; // 0 => not wanted (frame mode) ++ unsigned int max_offsets; // 0 => not wanted ++ ++ req_decode_q decode_q; ++ ++ struct devscan *devscan; ++ struct dmabufs_ctl *dbufs; ++ struct pollqueue *pq; ++ struct media_pool * mpool; ++ struct mediabufs_ctl *mbufs; ++} V4L2RequestContextHEVC; ++ ++typedef struct V4L2RequestPrivHEVC { ++ V4L2RequestContextHEVC * cctx; // Common context ++ AVBufferRef * cctx_buf; // Buf for cctx ++} V4L2RequestPrivHEVC; ++ ++typedef struct v4l2_req_decode_fns { ++ int src_pix_fmt_v4l2; ++ const char * name; ++ ++ // Init setup ++ int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); ++ int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx); ++ ++ // Passthrough of hwaccel fns ++ int (*start_frame)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, const uint8_t *buf, uint32_t buf_size); ++ int (*decode_slice)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, const uint8_t *buf, uint32_t buf_size); ++ int (*end_frame)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx); ++ void (*abort_frame)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx); ++ int (*frame_params)(AVCodecContext *avctx, V4L2RequestContextHEVC *const ctx, AVBufferRef *hw_frames_ctx); ++ int (*alloc_frame)(AVCodecContext * avctx, V4L2RequestContextHEVC *const ctx, AVFrame *frame); ++} v4l2_req_decode_fns; ++ ++ ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3); ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4); ++ ++#endif +diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c +new file mode 100644 +index 000000000000..71f6cc356720 +--- /dev/null ++++ b/libavcodec/weak_link.c +@@ -0,0 +1,127 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++#include "weak_link.h" ++ ++struct ff_weak_link_master { ++ atomic_int ref_count; /* 0 is single ref for easier atomics */ ++ pthread_rwlock_t lock; ++ void * ptr; ++}; ++ ++static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c) ++{ ++ return (struct ff_weak_link_master *)c; ++} ++ ++struct ff_weak_link_master * ff_weak_link_new(void * p) ++{ ++ struct ff_weak_link_master * w = malloc(sizeof(*w)); ++ if (!w) ++ return NULL; ++ atomic_init(&w->ref_count, 0); ++ w->ptr = p; ++ if (pthread_rwlock_init(&w->lock, NULL)) { ++ free(w); ++ return NULL; ++ } ++ return w; ++} ++ ++static void weak_link_do_unref(struct ff_weak_link_master * const w) ++{ ++ int n = atomic_fetch_sub(&w->ref_count, 1); ++ if (n) ++ return; ++ ++ pthread_rwlock_destroy(&w->lock); ++ free(w); ++} ++ ++// Unref & break link ++void ff_weak_link_break(struct ff_weak_link_master ** ppLink) ++{ ++ struct ff_weak_link_master * const w = *ppLink; ++ if (!w) ++ return; ++ ++ *ppLink = NULL; ++ pthread_rwlock_wrlock(&w->lock); ++ w->ptr = NULL; ++ pthread_rwlock_unlock(&w->lock); ++ ++ weak_link_do_unref(w); ++} ++ ++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w) ++{ ++ if (!w) ++ return NULL; ++ atomic_fetch_add(&w->ref_count, 1); ++ return (struct ff_weak_link_client*)w; ++} ++ ++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink) ++{ ++ struct ff_weak_link_master * const w = weak_link_x(*ppLink); ++ if (!w) ++ return; ++ ++ *ppLink = NULL; ++ weak_link_do_unref(w); ++} ++ ++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink) ++{ ++ struct ff_weak_link_master * const w = weak_link_x(*ppLink); ++ ++ if (!w) ++ return NULL; ++ ++ if (pthread_rwlock_rdlock(&w->lock)) ++ goto broken; ++ ++ if (w->ptr) ++ return w->ptr; ++ ++ pthread_rwlock_unlock(&w->lock); ++ ++broken: ++ *ppLink = NULL; ++ weak_link_do_unref(w); ++ return NULL; ++} ++ ++// Ignores a NULL c (so can be on the return path of both broken & live links) ++void ff_weak_link_unlock(struct ff_weak_link_client * c) ++{ ++ struct ff_weak_link_master * const w = weak_link_x(c); ++ if (w) ++ pthread_rwlock_unlock(&w->lock); ++} ++ ++ +diff --git a/libavcodec/weak_link.h b/libavcodec/weak_link.h +new file mode 100644 +index 000000000000..5c66b29f9b61 +--- /dev/null ++++ b/libavcodec/weak_link.h +@@ -0,0 +1,46 @@ ++/* ++ Copyright (C) 2024 John Cox john.cox@raspberrypi.com ++ ++ Permission is hereby granted, free of charge, to any person ++ obtaining a copy of this software and associated documentation ++ files (the "Software"), to deal in the Software without ++ restriction, including without limitation the rights to use, copy, ++ modify, merge, publish, distribute, sublicense, and/or sell copies ++ of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ++ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT ++ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, ++ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef AVCODEC_WEAK_LINK_H ++#define AVCODEC_WEAK_LINK_H ++ ++struct ff_weak_link_master; ++struct ff_weak_link_client; ++ ++struct ff_weak_link_master * ff_weak_link_new(void * p); ++void ff_weak_link_break(struct ff_weak_link_master ** ppLink); ++ ++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w); ++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink); ++ ++// Returns NULL if link broken - in this case it will also zap ++// *ppLink and unref the weak_link. ++// Returns NULL if *ppLink is NULL (so a link once broken stays broken) ++// ++// The above does mean that there is a race if this is called simultainiously ++// by two threads using the same weak_link_client (so don't do that) ++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink); ++void ff_weak_link_unlock(struct ff_weak_link_client * c); ++ ++#endif +diff --git a/libavdevice/Makefile b/libavdevice/Makefile +index c30449201d47..d39dadb1dee1 100644 +--- a/libavdevice/Makefile ++++ b/libavdevice/Makefile +@@ -48,6 +48,8 @@ OBJS-$(CONFIG_SNDIO_OUTDEV) += sndio_enc.o sndio.o + OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o v4l2-common.o timefilter.o + OBJS-$(CONFIG_V4L2_OUTDEV) += v4l2enc.o v4l2-common.o + OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o ++OBJS-$(CONFIG_VOUT_DRM_OUTDEV) += drm_vout.o ++OBJS-$(CONFIG_VOUT_EGL_OUTDEV) += egl_vout.o + OBJS-$(CONFIG_XCBGRAB_INDEV) += xcbgrab.o + OBJS-$(CONFIG_XV_OUTDEV) += xv.o -@@ -1121,8 +1125,12 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn - case AV_CODEC_ID_WAVPACK: - return put_wv_codecpriv(dyn_cp, extradata, extradata_size); - case AV_CODEC_ID_H264: -- return ff_isom_write_avcc(dyn_cp, extradata, -- extradata_size); -+ if (par->extradata_size) -+ return ff_isom_write_avcc(dyn_cp, extradata, -+ extradata_size); -+ else -+ *size_to_reserve = MAX_H264_HEADER_SIZE; -+ break; - case AV_CODEC_ID_HEVC: - return ff_isom_write_hvcc(dyn_cp, extradata, - extradata_size, 0); -@@ -2731,8 +2739,8 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) - } - break; - #endif -- // FIXME: Remove the following once libaom starts propagating proper extradata during init() -- // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208 -+ // FIXME: Remove the following once libaom starts propagating extradata during init() -+ // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012 - case AV_CODEC_ID_AV1: - if (side_data_size && mkv->track.bc && !par->extradata_size) { - // If the reserved space doesn't suffice, only write -@@ -2744,6 +2752,16 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) - } else if (!par->extradata_size) - return AVERROR_INVALIDDATA; - break; -+ // H264 V4L2 has a similar issue -+ case AV_CODEC_ID_H264: -+ if (side_data_size && mkv->track.bc && !par->extradata_size) { -+ ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size, -+ par, mkv->track.bc, track, 0); -+ if (ret < 0) -+ return ret; -+ } else if (!par->extradata_size) -+ return AVERROR_INVALIDDATA; -+ break; - default: - if (side_data_size) - av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index); - -From 969917342459c78f480f327ea682d8880357a2df Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 14 Sep 2022 15:55:15 +0000 -Subject: [PATCH 068/186] movenc: Allow H264 SPS/PPS headers in packet sidedata - ---- - libavformat/movenc.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/libavformat/movenc.c b/libavformat/movenc.c -index c4fcb5f8b1b3..891adbf7b26c 100644 ---- a/libavformat/movenc.c -+++ b/libavformat/movenc.c -@@ -6343,6 +6343,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt) - if (trk->par->codec_id == AV_CODEC_ID_MP4ALS || - trk->par->codec_id == AV_CODEC_ID_AAC || - trk->par->codec_id == AV_CODEC_ID_AV1 || -+ trk->par->codec_id == AV_CODEC_ID_H264 || - trk->par->codec_id == AV_CODEC_ID_FLAC) { - size_t side_size; - uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); - -From a78c7c1a9afc53f0ef71d251cb06789763babb26 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 26 Sep 2022 12:45:05 +0100 -Subject: [PATCH 069/186] Allow ffmpeg to select codec internal hwfmts if - no_cvt_hw - -This allows the selection of DRM_PRIME from v4l2m2m without forcing it -in the decoder. - -Not utterly sure this is the right method for 5.1 but it does work ---- - fftools/ffmpeg.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c -index 04bea4ef4fe9..0de534618323 100644 ---- a/fftools/ffmpeg.c -+++ b/fftools/ffmpeg.c -@@ -2766,12 +2766,15 @@ static enum AVPixelFormat get_format(AVCodecContext *s, const enum AVPixelFormat - break; +diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c +index 9b9a9146c7d7..1b68c16e4f19 100644 +--- a/libavdevice/alldevices.c ++++ b/libavdevice/alldevices.c +@@ -56,6 +56,8 @@ extern const FFOutputFormat ff_sndio_muxer; + extern const FFInputFormat ff_v4l2_demuxer; + extern const FFOutputFormat ff_v4l2_muxer; + extern const FFInputFormat ff_vfwcap_demuxer; ++extern const FFOutputFormat ff_vout_drm_muxer; ++extern const FFOutputFormat ff_vout_egl_muxer; + extern const FFInputFormat ff_xcbgrab_demuxer; + extern const FFOutputFormat ff_xv_muxer; - if (ist->hwaccel_id == HWACCEL_GENERIC || -- ist->hwaccel_id == HWACCEL_AUTO) { -+ ist->hwaccel_id == HWACCEL_AUTO || -+ no_cvt_hw) { - for (i = 0;; i++) { - config = avcodec_get_hw_config(s->codec, i); - if (!config) - break; -- if (!(config->methods & -+ if (no_cvt_hw && (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL)) -+ av_log(s, AV_LOG_DEBUG, "no_cvt_hw so trying pix_fmt %d with codec internal hwaccel\n", *p); -+ else if (!(config->methods & - AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) - continue; - if (config->pix_fmt == *p) - -From 72c4c2e860365d46301c688d8586b2f4f023ac8d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 1 Sep 2022 11:42:41 +0000 -Subject: [PATCH 070/186] vf_deinterlace_v4l2m2m: Add a v4l2m2m scaler - -The logic for running an isp based scaler is pretty much identical to -that for the deinterlacer so add to the deinterlacer. This requires -some rework of the setup code to avoid assumptions that are true for -deinterlace but not scale but the reworked code requires few switches -based on operation. ---- - libavfilter/allfilters.c | 1 + - libavfilter/vf_deinterlace_v4l2m2m.c | 1123 ++++++++++++++++++++------ - 2 files changed, 877 insertions(+), 247 deletions(-) - +diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c +new file mode 100644 +index 000000000000..6d11e98d7521 +--- /dev/null ++++ b/libavdevice/drm_vout.c +@@ -0,0 +1,684 @@ ++/* ++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++ ++// *** This module is a work in progress and its utility is strictly ++// limited to testing. ++ ++#include "libavutil/opt.h" ++#include "libavutil/frame.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_drm.h" ++#include "libavformat/mux.h" ++#include "avdevice.h" ++ ++#include "pthread.h" ++#include ++#include ++ ++#include ++#include ++#include ++ ++#define TRACE_ALL 0 ++ ++#define DRM_MODULE "vc4" ++ ++#define ERRSTR strerror(errno) ++ ++struct drm_setup { ++ int conId; ++ uint32_t crtcId; ++ int crtcIdx; ++ uint32_t planeId; ++ unsigned int out_fourcc; ++ struct { ++ int x, y, width, height; ++ } compose; ++}; ++ ++typedef struct drm_aux_s { ++ unsigned int fb_handle; ++ uint32_t bo_handles[AV_DRM_MAX_PLANES]; ++ AVFrame * frame; ++} drm_aux_t; ++ ++// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS ++// we get initial flicker probably due to dodgy drm timing ++#define AUX_SIZE 3 ++typedef struct drm_display_env_s ++{ ++ AVClass *class; ++ ++ int drm_fd; ++ uint32_t con_id; ++ struct drm_setup setup; ++ enum AVPixelFormat avfmt; ++ ++ int show_all; ++ const char * drm_module; ++ ++ unsigned int ano; ++ drm_aux_t aux[AUX_SIZE]; ++ ++ pthread_t q_thread; ++ sem_t q_sem_in; ++ sem_t q_sem_out; ++ int q_terminate; ++ AVFrame * q_next; ++ ++} drm_display_env_t; ++ ++ ++static int drm_vout_write_trailer(AVFormatContext *s) ++{ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); ++#endif ++ ++ return 0; ++} ++ ++static int drm_vout_write_header(AVFormatContext *s) ++{ ++ const AVCodecParameters * const par = s->streams[0]->codecpar; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); ++#endif ++ if ( s->nb_streams > 1 ++ || par->codec_type != AVMEDIA_TYPE_VIDEO ++ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { ++ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++static int find_plane(struct AVFormatContext * const avctx, ++ const int drmfd, const int crtcidx, const uint32_t format, ++ uint32_t * const pplane_id) ++{ ++ drmModePlaneResPtr planes; ++ drmModePlanePtr plane; ++ drmModeObjectPropertiesPtr props = NULL; ++ drmModePropertyPtr prop = NULL; ++ unsigned int i; ++ unsigned int j; ++ int ret = -1; ++ ++ planes = drmModeGetPlaneResources(drmfd); ++ if (!planes) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR); ++ return -1; ++ } ++ ++ for (i = 0; i < planes->count_planes; ++i) { ++ plane = drmModeGetPlane(drmfd, planes->planes[i]); ++ if (!planes) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR); ++ break; ++ } ++ ++ if (!(plane->possible_crtcs & (1 << crtcidx))) { ++ drmModeFreePlane(plane); ++ continue; ++ } ++ ++ for (j = 0; j < plane->count_formats; ++j) { ++ if (plane->formats[j] == format) ++ break; ++ } ++ ++ if (j == plane->count_formats) { ++ drmModeFreePlane(plane); ++ continue; ++ } ++ ++ *pplane_id = plane->plane_id; ++ drmModeFreePlane(plane); ++ break; ++ } ++ ++ if (i == planes->count_planes) { ++ ret = -1; ++ goto fail; ++ } ++ ++ props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE); ++ if (!props) ++ goto fail; ++ for (i = 0; i != props->count_props; ++i) { ++ if (prop) ++ drmModeFreeProperty(prop); ++ prop = drmModeGetProperty(drmfd, props->props[i]); ++ if (!prop) ++ goto fail; ++ if (strcmp("zpos", prop->name) == 0) { ++ if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0) ++ av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]); ++ else ++ av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n"); ++ break; ++ } ++ } ++ ++ ret = 0; ++fail: ++ if (props) ++ drmModeFreeObjectProperties(props); ++ if (prop) ++ drmModeFreeProperty(prop); ++ drmModeFreePlaneResources(planes); ++ return ret; ++} ++ ++static void da_uninit(drm_display_env_t * const de, drm_aux_t * da) ++{ ++ if (da->fb_handle != 0) { ++ drmModeRmFB(de->drm_fd, da->fb_handle); ++ da->fb_handle = 0; ++ } ++ ++ for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) { ++ if (da->bo_handles[i]) { ++ struct drm_gem_close gem_close = {.handle = da->bo_handles[i]}; ++ drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close); ++ da->bo_handles[i] = 0; ++ } ++ } ++ av_frame_free(&da->frame); ++} ++ ++static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame) ++{ ++ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0]; ++ drm_aux_t * da = de->aux + de->ano; ++ const uint32_t format = desc->layers[0].format; ++ int ret = 0; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd); ++#endif ++ ++ if (de->setup.out_fourcc != format) { ++ if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) { ++ av_frame_free(&frame); ++ av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format); ++ return -1; ++ } ++ de->setup.out_fourcc = format; ++ } ++ ++ { ++ drmVBlank vbl = { ++ .request = { ++ .type = DRM_VBLANK_RELATIVE, ++ .sequence = 0 ++ } ++ }; ++ ++ while (drmWaitVBlank(de->drm_fd, &vbl)) { ++ if (errno != EINTR) { ++// av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR); ++ break; ++ } ++ } ++ } ++ ++ da_uninit(de, da); ++ ++ { ++ uint32_t pitches[4] = {0}; ++ uint32_t offsets[4] = {0}; ++ uint64_t modifiers[4] = {0}; ++ uint32_t bo_handles[4] = {0}; ++ int has_mods = 0; ++ int i, j, n; ++ ++ da->frame = frame; ++ ++ for (i = 0; i < desc->nb_objects; ++i) { ++ if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) { ++ av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR); ++ return -1; ++ } ++ if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR && ++ desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID) ++ has_mods = 1; ++ } ++ ++ n = 0; ++ for (i = 0; i < desc->nb_layers; ++i) { ++ for (j = 0; j < desc->layers[i].nb_planes; ++j) { ++ const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j; ++ const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index; ++ pitches[n] = p->pitch; ++ offsets[n] = p->offset; ++ modifiers[n] = obj->format_modifier; ++ bo_handles[n] = da->bo_handles[p->object_index]; ++ ++n; ++ } ++ } ++ ++#if 1 && TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d," ++ " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n", ++ av_frame_cropped_width(frame), ++ av_frame_cropped_height(frame), ++ desc->layers[0].format, ++ bo_handles[0], ++ bo_handles[1], ++ bo_handles[2], ++ bo_handles[3], ++ pitches[0], ++ pitches[1], ++ pitches[2], ++ pitches[3], ++ offsets[0], ++ offsets[1], ++ offsets[2], ++ offsets[3], ++ (long long)modifiers[0], ++ (long long)modifiers[1], ++ (long long)modifiers[2], ++ (long long)modifiers[3] ++ ); ++#endif ++ ++ if (drmModeAddFB2WithModifiers(de->drm_fd, ++ av_frame_cropped_width(frame), ++ av_frame_cropped_height(frame), ++ desc->layers[0].format, bo_handles, ++ pitches, offsets, ++ has_mods ? modifiers : NULL, ++ &da->fb_handle, ++ has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) { ++ av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR); ++ return -1; ++ } ++ } ++ ++ ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId, ++ da->fb_handle, 0, ++ de->setup.compose.x, de->setup.compose.y, ++ de->setup.compose.width, ++ de->setup.compose.height, ++ 0, 0, ++ av_frame_cropped_width(frame) << 16, ++ av_frame_cropped_height(frame) << 16); ++ ++ if (ret != 0) { ++ av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR); ++ } ++ ++ de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1; ++ ++ return ret; ++} ++ ++static int do_sem_wait(sem_t * const sem, const int nowait) ++{ ++ while (nowait ? sem_trywait(sem) : sem_wait(sem)) { ++ if (errno != EINTR) ++ return -errno; ++ } ++ return 0; ++} ++ ++static void * display_thread(void * v) ++{ ++ AVFormatContext * const s = v; ++ drm_display_env_t * const de = s->priv_data; ++ int i; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++#endif ++ ++ sem_post(&de->q_sem_out); ++ ++ for (;;) { ++ AVFrame * frame; ++ ++ do_sem_wait(&de->q_sem_in, 0); ++ ++ if (de->q_terminate) ++ break; ++ ++ frame = de->q_next; ++ de->q_next = NULL; ++ sem_post(&de->q_sem_out); ++ ++ do_display(s, de, frame); ++ } ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++#endif ++ ++ for (i = 0; i != AUX_SIZE; ++i) ++ da_uninit(de, de->aux + i); ++ ++ av_frame_free(&de->q_next); ++ ++ return NULL; ++} ++ ++static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt) ++{ ++ const AVFrame * const src_frame = (AVFrame *)pkt->data; ++ AVFrame * frame; ++ drm_display_env_t * const de = s->priv_data; ++ int ret; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%s\n", __func__); ++#endif ++ ++ if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) { ++ av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts); ++ return 0; ++ } ++ ++ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { ++ frame = av_frame_alloc(); ++ av_frame_ref(frame, src_frame); ++ } ++ else if (src_frame->format == AV_PIX_FMT_VAAPI) { ++ frame = av_frame_alloc(); ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ if (av_hwframe_map(frame, src_frame, 0) != 0) ++ { ++ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); ++ av_frame_free(&frame); ++ return AVERROR(EINVAL); ++ } ++ } ++ else { ++ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); ++ return AVERROR(EINVAL); ++ } ++ ++ ret = do_sem_wait(&de->q_sem_out, !de->show_all); ++ if (ret) { ++ av_frame_free(&frame); ++ } ++ else { ++ de->q_next = frame; ++ sem_post(&de->q_sem_in); ++ } ++ ++ return 0; ++} ++ ++static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, ++ unsigned flags) ++{ ++ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); ++ return AVERROR_PATCHWELCOME; ++} ++ ++static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) ++{ ++#if TRACE_ALL ++ av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type); ++#endif ++ switch(type) { ++ case AV_APP_TO_DEV_WINDOW_REPAINT: ++ return 0; ++ default: ++ break; ++ } ++ return AVERROR(ENOSYS); ++} ++ ++static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId) ++{ ++ int ret = -1; ++ int i; ++ drmModeRes *res = drmModeGetResources(drmfd); ++ drmModeConnector *c; ++ ++ if(!res) ++ { ++ printf( "drmModeGetResources failed: %s\n", ERRSTR); ++ return -1; ++ } ++ ++ if (res->count_crtcs <= 0) ++ { ++ printf( "drm: no crts\n"); ++ goto fail_res; ++ } ++ ++ if (!s->conId) { ++ fprintf(stderr, ++ "No connector ID specified. Choosing default from list:\n"); ++ ++ for (i = 0; i < res->count_connectors; i++) { ++ drmModeConnector *con = ++ drmModeGetConnector(drmfd, res->connectors[i]); ++ drmModeEncoder *enc = NULL; ++ drmModeCrtc *crtc = NULL; ++ ++ if (con->encoder_id) { ++ enc = drmModeGetEncoder(drmfd, con->encoder_id); ++ if (enc->crtc_id) { ++ crtc = drmModeGetCrtc(drmfd, enc->crtc_id); ++ } ++ } ++ ++ if (!s->conId && crtc) { ++ s->conId = con->connector_id; ++ s->crtcId = crtc->crtc_id; ++ } ++ ++ av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n", ++ con->connector_id, ++ crtc ? crtc->crtc_id : 0, ++ con->connector_type, ++ crtc ? crtc->width : 0, ++ crtc ? crtc->height : 0, ++ (s->conId == (int)con->connector_id ? ++ " (chosen)" : "")); ++ ++ if (crtc) ++ drmModeFreeCrtc(crtc); ++ if (enc) ++ drmModeFreeEncoder(enc); ++ if (con) ++ drmModeFreeConnector(con); ++ } ++ ++ if (!s->conId) { ++ av_log(avctx, AV_LOG_ERROR, ++ "No suitable enabled connector found.\n"); ++ return -1;; ++ } ++ } ++ ++ s->crtcIdx = -1; ++ ++ for (i = 0; i < res->count_crtcs; ++i) { ++ if (s->crtcId == res->crtcs[i]) { ++ s->crtcIdx = i; ++ break; ++ } ++ } ++ ++ if (s->crtcIdx == -1) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId); ++ goto fail_res; ++ } ++ ++ if (res->count_connectors <= 0) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n"); ++ goto fail_res; ++ } ++ ++ c = drmModeGetConnector(drmfd, s->conId); ++ if (!c) ++ { ++ av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR); ++ goto fail_res; ++ } ++ ++ if (!c->count_modes) ++ { ++ av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n"); ++ goto fail_conn; ++ } ++ ++ { ++ drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId); ++ s->compose.x = crtc->x; ++ s->compose.y = crtc->y; ++ s->compose.width = crtc->width; ++ s->compose.height = crtc->height; ++ drmModeFreeCrtc(crtc); ++ } ++ ++ if (pConId) ++ *pConId = c->connector_id; ++ ret = 0; ++ ++fail_conn: ++ drmModeFreeConnector(c); ++ ++fail_res: ++ drmModeFreeResources(res); ++ ++ return ret; ++} ++ ++// deinit is called if init fails so no need to clean up explicity here ++static int drm_vout_init(struct AVFormatContext * s) ++{ ++ drm_display_env_t * const de = s->priv_data; ++ int rv; ++ ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ de->drm_fd = -1; ++ de->con_id = 0; ++ de->setup = (struct drm_setup){0}; ++ de->q_terminate = 0; ++ ++ if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0) ++ { ++ rv = AVERROR(errno); ++ av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv)); ++ return rv; ++ } ++ ++ if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0) ++ { ++ av_log(s, AV_LOG_ERROR, "failed to find valid mode\n"); ++ rv = AVERROR(EINVAL); ++ goto fail_close; ++ } ++ ++ sem_init(&de->q_sem_in, 0, 0); ++ sem_init(&de->q_sem_out, 0, 0); ++ if (pthread_create(&de->q_thread, NULL, display_thread, s)) { ++ rv = AVERROR(errno); ++ av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv)); ++ goto fail_close; ++ } ++ ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++ ++ return 0; ++ ++fail_close: ++ close(de->drm_fd); ++ de->drm_fd = -1; ++ av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__); ++ ++ return rv; ++} ++ ++static void drm_vout_deinit(struct AVFormatContext * s) ++{ ++ drm_display_env_t * const de = s->priv_data; ++ ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ de->q_terminate = 1; ++ sem_post(&de->q_sem_in); ++ pthread_join(de->q_thread, NULL); ++ sem_destroy(&de->q_sem_in); ++ sem_destroy(&de->q_sem_out); ++ ++ for (unsigned int i = 0; i != AUX_SIZE; ++i) ++ da_uninit(de, de->aux + i); ++ ++ av_frame_free(&de->q_next); ++ ++ if (de->drm_fd >= 0) { ++ close(de->drm_fd); ++ de->drm_fd = -1; ++ } ++ ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++} ++ ++ ++#define OFFSET(x) offsetof(drm_display_env_t, x) ++static const AVOption options[] = { ++ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, ++ { NULL } ++}; ++ ++static const AVClass drm_vout_class = { ++ .class_name = "drm vid outdev", ++ .item_name = av_default_item_name, ++ .option = options, ++ .version = LIBAVUTIL_VERSION_INT, ++ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, ++}; ++ ++FFOutputFormat ff_vout_drm_muxer = { ++ .p = { ++ .name = "vout_drm", ++ .long_name = NULL_IF_CONFIG_SMALL("Drm video output device"), ++ .audio_codec = AV_CODEC_ID_NONE, ++ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, ++ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, ++ .priv_class = &drm_vout_class, ++ }, ++ .priv_data_size = sizeof(drm_display_env_t), ++ .write_header = drm_vout_write_header, ++ .write_packet = drm_vout_write_packet, ++ .write_uncoded_frame = drm_vout_write_frame, ++ .write_trailer = drm_vout_write_trailer, ++ .control_message = drm_vout_control_message, ++ .init = drm_vout_init, ++ .deinit = drm_vout_deinit, ++}; ++ +diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c +new file mode 100644 +index 000000000000..0c8c629852fb +--- /dev/null ++++ b/libavdevice/egl_vout.c +@@ -0,0 +1,784 @@ ++/* ++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++ ++// *** This module is a work in progress and its utility is strictly ++// limited to testing. ++// Amongst other issues it doesn't wait for the pic to be displayed before ++// returning the buffer so flikering does occur. ++ ++#include ++#include ++ ++#include "libavutil/opt.h" ++#include "libavutil/avassert.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/imgutils.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_drm.h" ++#include "libavformat/mux.h" ++#include "avdevice.h" ++ ++#include "pthread.h" ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "libavutil/rpi_sand_fns.h" ++ ++#define TRACE_ALL 0 ++ ++struct egl_setup { ++ int conId; ++ ++ Display *dpy; ++ EGLDisplay egl_dpy; ++ EGLContext ctx; ++ EGLSurface surf; ++ Window win; ++ ++ uint32_t crtcId; ++ int crtcIdx; ++ uint32_t planeId; ++ struct { ++ int x, y, width, height; ++ } compose; ++}; ++ ++typedef struct egl_aux_s { ++ int fd; ++ GLuint texture; ++ ++} egl_aux_t; ++ ++typedef struct egl_display_env_s { ++ AVClass *class; ++ ++ struct egl_setup setup; ++ enum AVPixelFormat avfmt; ++ ++ int show_all; ++ int window_width, window_height; ++ int window_x, window_y; ++ int fullscreen; ++ ++ egl_aux_t aux[32]; ++ ++ pthread_t q_thread; ++ pthread_mutex_t q_lock; ++ sem_t display_start_sem; ++ sem_t q_sem; ++ int q_terminate; ++ AVFrame *q_this; ++ AVFrame *q_next; ++ ++} egl_display_env_t; ++ ++ ++/** ++ * Remove window border/decorations. ++ */ ++static void ++no_border(Display *dpy, Window w) ++{ ++ static const unsigned MWM_HINTS_DECORATIONS = (1 << 1); ++ static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5; ++ ++ typedef struct { ++ unsigned long flags; ++ unsigned long functions; ++ unsigned long decorations; ++ long inputMode; ++ unsigned long status; ++ } PropMotifWmHints; ++ ++ PropMotifWmHints motif_hints; ++ Atom prop, proptype; ++ unsigned long flags = 0; ++ ++ /* setup the property */ ++ motif_hints.flags = MWM_HINTS_DECORATIONS; ++ motif_hints.decorations = flags; ++ ++ /* get the atom for the property */ ++ prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True); ++ if (!prop) { ++ /* something went wrong! */ ++ return; ++ } ++ ++ /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */ ++ proptype = prop; ++ ++ XChangeProperty(dpy, w, /* display, window */ ++ prop, proptype, /* property, type */ ++ 32, /* format: 32-bit datums */ ++ PropModeReplace, /* mode */ ++ (unsigned char *)&motif_hints, /* data */ ++ PROP_MOTIF_WM_HINTS_ELEMENTS /* nelements */ ++ ); ++} ++ ++ ++/* ++ * Create an RGB, double-buffered window. ++ * Return the window and context handles. ++ */ ++static int ++make_window(struct AVFormatContext *const s, ++ egl_display_env_t *const de, ++ Display *dpy, EGLDisplay egl_dpy, const char *name, ++ Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet) ++{ ++ int scrnum = DefaultScreen(dpy); ++ XSetWindowAttributes attr; ++ unsigned long mask; ++ Window root = RootWindow(dpy, scrnum); ++ Window win; ++ EGLContext ctx; ++ const int fullscreen = de->fullscreen; ++ EGLConfig config; ++ int x = de->window_x; ++ int y = de->window_y; ++ int width = de->window_width ? de->window_width : 1280; ++ int height = de->window_height ? de->window_height : 720; ++ ++ ++ if (fullscreen) { ++ int scrnum = DefaultScreen(dpy); ++ ++ x = 0; y = 0; ++ width = DisplayWidth(dpy, scrnum); ++ height = DisplayHeight(dpy, scrnum); ++ } ++ ++ { ++ EGLint num_configs; ++ static const EGLint attribs[] = { ++ EGL_RED_SIZE, 1, ++ EGL_GREEN_SIZE, 1, ++ EGL_BLUE_SIZE, 1, ++ EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, ++ EGL_NONE ++ }; ++ ++ if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { ++ av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); ++ return -1; ++ } ++ } ++ ++ { ++ EGLint vid; ++ if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { ++ av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); ++ return -1; ++ } ++ ++ { ++ XVisualInfo visTemplate = { ++ .visualid = vid, ++ }; ++ int num_visuals; ++ XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, ++ &visTemplate, &num_visuals); ++ ++ /* window attributes */ ++ attr.background_pixel = 0; ++ attr.border_pixel = 0; ++ attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone); ++ attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; ++ /* XXX this is a bad way to get a borderless window! */ ++ mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; ++ ++ win = XCreateWindow(dpy, root, x, y, width, height, ++ 0, visinfo->depth, InputOutput, ++ visinfo->visual, mask, &attr); ++ XFree(visinfo); ++ } ++ } ++ ++ if (fullscreen) ++ no_border(dpy, win); ++ ++ /* set hints and properties */ ++ { ++ XSizeHints sizehints; ++ sizehints.x = x; ++ sizehints.y = y; ++ sizehints.width = width; ++ sizehints.height = height; ++ sizehints.flags = USSize | USPosition; ++ XSetNormalHints(dpy, win, &sizehints); ++ XSetStandardProperties(dpy, win, name, name, ++ None, (char **)NULL, 0, &sizehints); ++ } ++ ++ eglBindAPI(EGL_OPENGL_ES_API); ++ ++ { ++ static const EGLint ctx_attribs[] = { ++ EGL_CONTEXT_CLIENT_VERSION, 2, ++ EGL_NONE ++ }; ++ ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs); ++ if (!ctx) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); ++ return -1; ++ } ++ } ++ ++ ++ XMapWindow(dpy, win); ++ ++ { ++ EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL); ++ if (!surf) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); ++ return -1; ++ } ++ ++ if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { ++ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); ++ return -1; ++ } ++ ++ *winRet = win; ++ *ctxRet = ctx; ++ *surfRet = surf; ++ } ++ ++ return 0; ++} ++ ++static GLint ++compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source) ++{ ++ GLuint s = glCreateShader(target); ++ ++ if (s == 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n"); ++ return 0; ++ } ++ ++ glShaderSource(s, 1, (const GLchar **)&source, NULL); ++ glCompileShader(s); ++ ++ { ++ GLint ok; ++ glGetShaderiv(s, GL_COMPILE_STATUS, &ok); ++ ++ if (!ok) { ++ GLchar *info; ++ GLint size; ++ ++ glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); ++ info = malloc(size); ++ ++ glGetShaderInfoLog(s, size, NULL, info); ++ av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); ++ ++ return 0; ++ } ++ } ++ ++ return s; ++} ++ ++static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs) ++{ ++ GLuint prog = glCreateProgram(); ++ ++ if (prog == 0) { ++ av_log(s, AV_LOG_ERROR, "Failed to create program\n"); ++ return 0; ++ } ++ ++ glAttachShader(prog, vs); ++ glAttachShader(prog, fs); ++ glLinkProgram(prog); ++ ++ { ++ GLint ok; ++ glGetProgramiv(prog, GL_LINK_STATUS, &ok); ++ if (!ok) { ++ /* Some drivers return a size of 1 for an empty log. This is the size ++ * of a log that contains only a terminating NUL character. ++ */ ++ GLint size; ++ GLchar *info = NULL; ++ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); ++ if (size > 1) { ++ info = malloc(size); ++ glGetProgramInfoLog(prog, size, NULL, info); ++ } ++ ++ av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", ++ (info != NULL) ? info : ""); ++ return 0; ++ } ++ } ++ ++ return prog; ++} ++ ++static int ++gl_setup(struct AVFormatContext *const s) ++{ ++ const char *vs = ++ "attribute vec4 pos;\n" ++ "varying vec2 texcoord;\n" ++ "\n" ++ "void main() {\n" ++ " gl_Position = pos;\n" ++ " texcoord.x = (pos.x + 1.0) / 2.0;\n" ++ " texcoord.y = (-pos.y + 1.0) / 2.0;\n" ++ "}\n"; ++ const char *fs = ++ "#extension GL_OES_EGL_image_external : enable\n" ++ "precision mediump float;\n" ++ "uniform samplerExternalOES s;\n" ++ "varying vec2 texcoord;\n" ++ "void main() {\n" ++ " gl_FragColor = texture2D(s, texcoord);\n" ++ "}\n"; ++ ++ GLuint vs_s; ++ GLuint fs_s; ++ GLuint prog; ++ ++ if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) || ++ !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) || ++ !(prog = link_program(s, vs_s, fs_s))) ++ return -1; ++ ++ glUseProgram(prog); ++ ++ { ++ static const float verts[] = { ++ -1, -1, ++ 1, -1, ++ 1, 1, ++ -1, 1, ++ }; ++ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); ++ } ++ ++ glEnableVertexAttribArray(0); ++ return 0; ++} ++ ++static int egl_vout_write_trailer(AVFormatContext *s) ++{ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "%s\n", __func__); ++#endif ++ ++ return 0; ++} ++ ++static int egl_vout_write_header(AVFormatContext *s) ++{ ++ const AVCodecParameters *const par = s->streams[0]->codecpar; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "%s\n", __func__); ++#endif ++ if (s->nb_streams > 1 ++ || par->codec_type != AVMEDIA_TYPE_VIDEO ++ || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { ++ av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ return 0; ++} ++ ++ ++static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame) ++{ ++ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0]; ++ egl_aux_t *da = NULL; ++ unsigned int i; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); ++#endif ++ ++ for (i = 0; i != 32; ++i) { ++ if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) { ++ da = de->aux + i; ++ break; ++ } ++ } ++ ++ if (da == NULL) { ++ av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__); ++ return AVERROR(EINVAL); ++ } ++ ++ if (da->texture == 0) { ++ EGLint attribs[50]; ++ EGLint *a = attribs; ++ int i, j; ++ static const EGLint anames[] = { ++ EGL_DMA_BUF_PLANE0_FD_EXT, ++ EGL_DMA_BUF_PLANE0_OFFSET_EXT, ++ EGL_DMA_BUF_PLANE0_PITCH_EXT, ++ EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, ++ EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, ++ EGL_DMA_BUF_PLANE1_FD_EXT, ++ EGL_DMA_BUF_PLANE1_OFFSET_EXT, ++ EGL_DMA_BUF_PLANE1_PITCH_EXT, ++ EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT, ++ EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT, ++ EGL_DMA_BUF_PLANE2_FD_EXT, ++ EGL_DMA_BUF_PLANE2_OFFSET_EXT, ++ EGL_DMA_BUF_PLANE2_PITCH_EXT, ++ EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT, ++ EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT, ++ }; ++ const EGLint *b = anames; ++ ++ *a++ = EGL_WIDTH; ++ *a++ = av_frame_cropped_width(frame); ++ *a++ = EGL_HEIGHT; ++ *a++ = av_frame_cropped_height(frame); ++ *a++ = EGL_LINUX_DRM_FOURCC_EXT; ++ *a++ = desc->layers[0].format; ++ ++ for (i = 0; i < desc->nb_layers; ++i) { ++ for (j = 0; j < desc->layers[i].nb_planes; ++j) { ++ const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j; ++ const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index; ++ *a++ = *b++; ++ *a++ = obj->fd; ++ *a++ = *b++; ++ *a++ = p->offset; ++ *a++ = *b++; ++ *a++ = p->pitch; ++ if (obj->format_modifier == 0) { ++ b += 2; ++ } ++ else { ++ *a++ = *b++; ++ *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF); ++ *a++ = *b++; ++ *a++ = (EGLint)(obj->format_modifier >> 32); ++ } ++ } ++ } ++ ++ *a = EGL_NONE; ++ ++#if TRACE_ALL ++ for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) { ++ av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]); ++ } ++#endif ++ { ++ const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, ++ EGL_NO_CONTEXT, ++ EGL_LINUX_DMA_BUF_EXT, ++ NULL, attribs); ++ if (!image) { ++ av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd); ++ return -1; ++ } ++ ++ glGenTextures(1, &da->texture); ++ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); ++ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); ++ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); ++ glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); ++ ++ eglDestroyImageKHR(de->setup.egl_dpy, image); ++ } ++ ++ da->fd = desc->objects[0].fd; ++ } ++ ++ glClearColor(0.5, 0.5, 0.5, 0.5); ++ glClear(GL_COLOR_BUFFER_BIT); ++ ++ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); ++ glDrawArrays(GL_TRIANGLE_FAN, 0, 4); ++ eglSwapBuffers(de->setup.egl_dpy, de->setup.surf); ++ ++ glDeleteTextures(1, &da->texture); ++ da->texture = 0; ++ da->fd = -1; ++ ++ return 0; ++} ++ ++static void* display_thread(void *v) ++{ ++ AVFormatContext *const s = v; ++ egl_display_env_t *const de = s->priv_data; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); ++#endif ++ { ++ EGLint egl_major, egl_minor; ++ ++ de->setup.dpy = XOpenDisplay(NULL); ++ if (!de->setup.dpy) { ++ av_log(s, AV_LOG_ERROR, "Couldn't open X display\n"); ++ goto fail; ++ } ++ ++ de->setup.egl_dpy = eglGetDisplay(de->setup.dpy); ++ if (!de->setup.egl_dpy) { ++ av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n"); ++ goto fail; ++ } ++ ++ if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) { ++ av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n"); ++ goto fail; ++ } ++ ++ av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor); ++ ++ if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) { ++ av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n"); ++ goto fail; ++ } ++ } ++ ++ if (!de->window_width || !de->window_height) { ++ de->window_width = 1280; ++ de->window_height = 720; ++ } ++ if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout", ++ &de->setup.win, &de->setup.ctx, &de->setup.surf)) { ++ av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__); ++ goto fail; ++ } ++ ++ if (gl_setup(s)) { ++ av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__); ++ goto fail; ++ } ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__); ++#endif ++ sem_post(&de->display_start_sem); ++ ++ for (;;) { ++ AVFrame *frame; ++ ++ while (sem_wait(&de->q_sem) != 0) { ++ av_assert0(errno == EINTR); ++ } ++ ++ if (de->q_terminate) ++ break; ++ ++ pthread_mutex_lock(&de->q_lock); ++ frame = de->q_next; ++ de->q_next = NULL; ++ pthread_mutex_unlock(&de->q_lock); ++ ++ do_display(s, de, frame); ++ ++ av_frame_free(&de->q_this); ++ de->q_this = frame; ++ } ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, ">>> %s\n", __func__); ++#endif ++ ++ return NULL; ++ ++fail: ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__); ++#endif ++ de->q_terminate = 1; ++ sem_post(&de->display_start_sem); ++ ++ return NULL; ++} ++ ++static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) ++{ ++ const AVFrame *const src_frame = (AVFrame *)pkt->data; ++ AVFrame *frame; ++ egl_display_env_t *const de = s->priv_data; ++ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "%s\n", __func__); ++#endif ++ ++ if (src_frame->format == AV_PIX_FMT_DRM_PRIME) { ++ frame = av_frame_alloc(); ++ av_frame_ref(frame, src_frame); ++ } ++ else if (src_frame->format == AV_PIX_FMT_VAAPI) { ++ frame = av_frame_alloc(); ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ if (av_hwframe_map(frame, src_frame, 0) != 0) { ++ av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); ++ av_frame_free(&frame); ++ return AVERROR(EINVAL); ++ } ++ } ++ else { ++ av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format); ++ return AVERROR(EINVAL); ++ } ++ ++ // Really hacky sync ++ while (de->show_all && de->q_next) { ++ usleep(3000); ++ } ++ ++ pthread_mutex_lock(&de->q_lock); ++ { ++ AVFrame *const t = de->q_next; ++ de->q_next = frame; ++ frame = t; ++ } ++ pthread_mutex_unlock(&de->q_lock); ++ ++ if (frame == NULL) ++ sem_post(&de->q_sem); ++ else ++ av_frame_free(&frame); ++ ++ return 0; ++} ++ ++static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, ++ unsigned flags) ++{ ++ av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); ++ return AVERROR_PATCHWELCOME; ++} ++ ++static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size) ++{ ++#if TRACE_ALL ++ av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type); ++#endif ++ switch (type) { ++ case AV_APP_TO_DEV_WINDOW_REPAINT: ++ return 0; ++ default: ++ break; ++ } ++ return AVERROR(ENOSYS); ++} ++ ++// deinit is called if init fails so no need to clean up explicity here ++static int egl_vout_init(struct AVFormatContext *s) ++{ ++ egl_display_env_t *const de = s->priv_data; ++ unsigned int i; ++ ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ de->setup = (struct egl_setup) { 0 }; ++ ++ for (i = 0; i != 32; ++i) { ++ de->aux[i].fd = -1; ++ } ++ ++ de->q_terminate = 0; ++ pthread_mutex_init(&de->q_lock, NULL); ++ sem_init(&de->q_sem, 0, 0); ++ sem_init(&de->display_start_sem, 0, 0); ++ av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0); ++ ++ sem_wait(&de->display_start_sem); ++ if (de->q_terminate) { ++ av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__); ++ return -1; ++ } ++ ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++ ++ return 0; ++} ++ ++static void egl_vout_deinit(struct AVFormatContext *s) ++{ ++ egl_display_env_t *const de = s->priv_data; ++ ++ av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); ++ ++ de->q_terminate = 1; ++ sem_post(&de->q_sem); ++ pthread_join(de->q_thread, NULL); ++ sem_destroy(&de->q_sem); ++ pthread_mutex_destroy(&de->q_lock); ++ ++ av_frame_free(&de->q_next); ++ av_frame_free(&de->q_this); ++ ++ av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); ++} ++ ++#define OFFSET(x) offsetof(egl_display_env_t, x) ++static const AVOption options[] = { ++ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "window_size", "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "window_x", "set window x offset", OFFSET(window_x), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "window_y", "set window y offset", OFFSET(window_y), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, ++ { "fullscreen", "set fullscreen display", OFFSET(fullscreen), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, ++ { NULL } ++ ++}; ++ ++static const AVClass egl_vout_class = { ++ .class_name = "egl vid outdev", ++ .item_name = av_default_item_name, ++ .option = options, ++ .version = LIBAVUTIL_VERSION_INT, ++ .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT, ++}; ++ ++FFOutputFormat ff_vout_egl_muxer = { ++ .p = { ++ .name = "vout_egl", ++ .long_name = NULL_IF_CONFIG_SMALL("Egl video output device"), ++ .audio_codec = AV_CODEC_ID_NONE, ++ .video_codec = AV_CODEC_ID_WRAPPED_AVFRAME, ++ .flags = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS, ++ .priv_class = &egl_vout_class, ++ }, ++ .priv_data_size = sizeof(egl_display_env_t), ++ .write_header = egl_vout_write_header, ++ .write_packet = egl_vout_write_packet, ++ .write_uncoded_frame = egl_vout_write_frame, ++ .write_trailer = egl_vout_write_trailer, ++ .control_message = egl_vout_control_message, ++ .init = egl_vout_init, ++ .deinit = egl_vout_deinit, ++}; ++ +diff --git a/libavfilter/Makefile b/libavfilter/Makefile +index 91487afb2185..6dd422a9358c 100644 +--- a/libavfilter/Makefile ++++ b/libavfilter/Makefile +@@ -272,6 +272,7 @@ OBJS-$(CONFIG_DEFLATE_FILTER) += vf_neighbor.o + OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o + OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_vpp_qsv.o + OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o ++OBJS-$(CONFIG_DEINTERLACE_V4L2M2M_FILTER) += vf_deinterlace_v4l2m2m.o + OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o + OBJS-$(CONFIG_DELOGO_FILTER) += vf_delogo.o + OBJS-$(CONFIG_DENOISE_VAAPI_FILTER) += vf_misc_vaapi.o vaapi_vpp.o +@@ -536,6 +537,7 @@ OBJS-$(CONFIG_TRANSPOSE_VT_FILTER) += vf_transpose_vt.o + OBJS-$(CONFIG_TRANSPOSE_VULKAN_FILTER) += vf_transpose_vulkan.o vulkan.o vulkan_filter.o + OBJS-$(CONFIG_TRIM_FILTER) += trim.o + OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o ++OBJS-$(CONFIG_UNSAND_FILTER) += vf_unsand.o + OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o + OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER) += vf_unsharp_opencl.o opencl.o \ + opencl/unsharp.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c -index 357ff61ca803..d504fa1bc8de 100644 +index 9819f0f95b7f..a4ef3091157d 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c -@@ -421,6 +421,7 @@ extern const AVFilter ff_vf_scale; +@@ -256,6 +256,7 @@ extern const AVFilter ff_vf_derain; + extern const AVFilter ff_vf_deshake; + extern const AVFilter ff_vf_deshake_opencl; + extern const AVFilter ff_vf_despill; ++extern const AVFilter ff_vf_deinterlace_v4l2m2m; + extern const AVFilter ff_vf_detelecine; + extern const AVFilter ff_vf_dilation; + extern const AVFilter ff_vf_dilation_opencl; +@@ -434,6 +435,7 @@ extern const AVFilter ff_vf_scale; extern const AVFilter ff_vf_scale_cuda; extern const AVFilter ff_vf_scale_npp; extern const AVFilter ff_vf_scale_qsv; +extern const AVFilter ff_vf_scale_v4l2m2m; extern const AVFilter ff_vf_scale_vaapi; + extern const AVFilter ff_vf_scale_vt; extern const AVFilter ff_vf_scale_vulkan; - extern const AVFilter ff_vf_scale2ref; -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -index 1a3bef5bcba6..2df39ec0f19f 100644 ---- a/libavfilter/vf_deinterlace_v4l2m2m.c -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -52,31 +52,36 @@ - #include "avfilter.h" - #include "formats.h" - #include "internal.h" -+#include "scale_eval.h" - #include "video.h" +@@ -507,6 +509,7 @@ extern const AVFilter ff_vf_trim; + extern const AVFilter ff_vf_unpremultiply; + extern const AVFilter ff_vf_unsharp; + extern const AVFilter ff_vf_unsharp_opencl; ++extern const AVFilter ff_vf_unsand; + extern const AVFilter ff_vf_untile; + extern const AVFilter ff_vf_uspp; + extern const AVFilter ff_vf_v360; +diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c +index 5811720c61ff..13e1f3446585 100644 +--- a/libavfilter/buffersink.c ++++ b/libavfilter/buffersink.c +@@ -60,6 +60,11 @@ typedef struct BufferSinkContext { + int sample_rates_size; + AVFrame *peeked_frame; ++ ++ union { ++ av_buffersink_alloc_video_frame * video; ++ } alloc_cb; ++ void * alloc_v; + } BufferSinkContext; + + #define NB_ITEMS(list) (list ## _size / sizeof(*list)) +@@ -129,6 +134,22 @@ int attribute_align_arg av_buffersink_get_samples(AVFilterContext *ctx, + return get_frame_internal(ctx, frame, 0, nb_samples); + } + ++static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h) ++{ ++ AVFilterContext * const ctx = link->dst; ++ BufferSinkContext * const bs = ctx->priv; ++ return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) : ++ ff_default_get_video_buffer(link, w, h); ++} ++ ++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v) ++{ ++ BufferSinkContext * const bs = ctx->priv; ++ bs->alloc_cb.video = cb; ++ bs->alloc_v = v; ++ return 0; ++} ++ + static av_cold int common_init(AVFilterContext *ctx) + { + BufferSinkContext *buf = ctx->priv; +@@ -355,6 +376,14 @@ static const AVOption abuffersink_options[] = { + AVFILTER_DEFINE_CLASS(buffersink); + AVFILTER_DEFINE_CLASS(abuffersink); + ++static const AVFilterPad avfilter_vsink_buffer_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .get_buffer = {.video = alloc_video_buffer}, ++ }, ++}; ++ + const AVFilter ff_vsink_buffer = { + .name = "buffersink", + .description = NULL_IF_CONFIG_SMALL("Buffer video frames, and make them available to the end of the filter graph."), +@@ -363,7 +392,7 @@ const AVFilter ff_vsink_buffer = { + .init = common_init, + .uninit = uninit, + .activate = activate, +- FILTER_INPUTS(ff_video_default_filterpad), ++ FILTER_INPUTS(avfilter_vsink_buffer_inputs), + .outputs = NULL, + FILTER_QUERY_FUNC(vsink_query_formats), + }; +diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h +index 361d60367933..47a296cf5edb 100644 +--- a/libavfilter/buffersink.h ++++ b/libavfilter/buffersink.h +@@ -166,6 +166,9 @@ int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame); + */ + int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples); + ++typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h); ++int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v); ++ + /** + * @} + */ +diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c +index b5682006f05c..e42ff26d5182 100644 +--- a/libavfilter/buffersrc.c ++++ b/libavfilter/buffersrc.c +@@ -210,7 +210,7 @@ int attribute_align_arg av_buffersrc_add_frame_flags(AVFilterContext *ctx, AVFra + + switch (ctx->outputs[0]->type) { + case AVMEDIA_TYPE_VIDEO: +- CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height, ++ CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame), + frame->format, frame->colorspace, + frame->color_range, frame->pts); + break; +diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c +index d41a25f8ea99..233b17a21bde 100644 +--- a/libavfilter/vf_bwdif.c ++++ b/libavfilter/vf_bwdif.c +@@ -115,19 +115,28 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic, + YADIFContext *yadif = &bwdif->yadif; + ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff }; + int i; ++ int last_plane = -1; + + for (i = 0; i < yadif->csp->nb_components; i++) { + int w = dstpic->width; + int h = dstpic->height; ++ const AVComponentDescriptor * const comp = yadif->csp->comp + i; ++ ++ // If the last plane was the same as this plane assume we've dealt ++ // with all the pels already ++ if (last_plane == comp->plane) ++ continue; ++ last_plane = comp->plane; + + if (i == 1 || i == 2) { + w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w); + h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h); + } + +- td.w = w; +- td.h = h; +- td.plane = i; ++ // comp step is in bytes but td.w is in pels ++ td.w = w * comp->step / ((comp->depth + 7) / 8); ++ td.h = h; ++ td.plane = comp->plane; + + ff_filter_execute(ctx, filter_slice, &td, NULL, + FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx))); +@@ -151,6 +160,7 @@ static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, + AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, + AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, ++ AV_PIX_FMT_NV12, + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16, +diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c +new file mode 100644 +index 000000000000..a5f3a776f824 +--- /dev/null ++++ b/libavfilter/vf_deinterlace_v4l2m2m.c +@@ -0,0 +1,2120 @@ ++/* ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file ++ * deinterlace video filter - V4L2 M2M ++ */ ++ ++#include ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "config.h" ++ ++#include "libavutil/avassert.h" ++#include "libavutil/avstring.h" ++#include "libavutil/common.h" ++#include "libavutil/hwcontext.h" ++#include "libavutil/hwcontext_drm.h" ++#include "libavutil/internal.h" ++#include "libavutil/mathematics.h" ++#include "libavutil/mem.h" ++#include "libavutil/opt.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/time.h" ++ ++#define FF_INTERNAL_FIELDS 1 ++#include "framequeue.h" ++#include "filters.h" ++#include "avfilter.h" ++#include "formats.h" ++#include "scale_eval.h" ++#include "video.h" ++ +#ifndef DRM_FORMAT_P030 +#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ +#endif + - typedef struct V4L2Queue V4L2Queue; - typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared; - --typedef struct V4L2PlaneInfo { -- int bytesperline; -- size_t length; --} V4L2PlaneInfo; ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in drm_fourcc.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ ++typedef struct V4L2Queue V4L2Queue; ++typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared; ++ +typedef enum filter_type_v4l2_e +{ + FILTER_V4L2_DEINTERLACE = 1, + FILTER_V4L2_SCALE, +} filter_type_v4l2_t; - - typedef struct V4L2Buffer { - int enqueued; - int reenqueue; -- int fd; - struct v4l2_buffer buffer; - AVFrame frame; - struct v4l2_plane planes[VIDEO_MAX_PLANES]; - int num_planes; -- V4L2PlaneInfo plane_info[VIDEO_MAX_PLANES]; - AVDRMFrameDescriptor drm_frame; - V4L2Queue *q; - } V4L2Buffer; - - typedef struct V4L2Queue { - struct v4l2_format format; ++ ++typedef struct V4L2Buffer { ++ int enqueued; ++ int reenqueue; ++ struct v4l2_buffer buffer; ++ AVFrame frame; ++ struct v4l2_plane planes[VIDEO_MAX_PLANES]; ++ int num_planes; ++ AVDRMFrameDescriptor drm_frame; ++ V4L2Queue *q; ++} V4L2Buffer; ++ ++typedef struct V4L2Queue { ++ struct v4l2_format format; + struct v4l2_selection sel; - int num_buffers; - V4L2Buffer *buffers; - DeintV4L2M2MContextShared *ctx; -@@ -111,11 +116,18 @@ typedef struct pts_track_s - - typedef struct DeintV4L2M2MContextShared { - void * logctx; // For logging - will be NULL when done ++ int eos; ++ int num_buffers; ++ V4L2Buffer *buffers; ++ const char * name; ++ DeintV4L2M2MContextShared *ctx; ++} V4L2Queue; ++ ++typedef struct pts_stats_s ++{ ++ void * logctx; ++ const char * name; // For debug ++ unsigned int last_count; ++ unsigned int last_interval; ++ int64_t last_pts; ++} pts_stats_t; ++ ++#define PTS_TRACK_SIZE 32 ++typedef struct pts_track_el_s ++{ ++ uint32_t n; ++ unsigned int interval; ++ AVFrame * props; ++} pts_track_el_t; ++ ++typedef struct pts_track_s ++{ ++ uint32_t n; ++ uint32_t last_n; ++ int got_2; ++ void * logctx; ++ pts_stats_t stats; ++ pts_track_el_t a[PTS_TRACK_SIZE]; ++} pts_track_t; ++ ++typedef enum drain_state_e ++{ ++ DRAIN_NONE = 0, // Not draining ++ DRAIN_TIMEOUT, // Drain until normal timeout setup yields no frame ++ DRAIN_LAST, // Drain with long timeout last_frame in received on output expected ++ DRAIN_EOS, // Drain with long timeout EOS expected ++ DRAIN_DONE // Drained ++} drain_state_t; ++ ++typedef struct DeintV4L2M2MContextShared { ++ void * logctx; // For logging - will be NULL when done + filter_type_v4l2_t filter_type; - - int fd; - int done; - int width; - int height; ++ ++ int fd; ++ int done; // fd closed - awating all refs dropped ++ int width; ++ int height; ++ ++ int drain; // EOS received (inlink status) ++ drain_state_t drain_state; ++ int64_t drain_pts; // PTS associated with inline status ++ ++ unsigned int frames_rx; ++ unsigned int frames_tx; + + // from options + int output_width; + int output_height; + enum AVPixelFormat output_format; + - int orig_width; - int orig_height; - atomic_uint refcount; -@@ -134,8 +146,60 @@ typedef struct DeintV4L2M2MContext { - const AVClass *class; - - DeintV4L2M2MContextShared *shared; ++ int has_enc_stop; ++ // We expect to get exactly the same number of frames out as we put in ++ // We can drain by matching input to output ++ int one_to_one; ++ ++ int orig_width; ++ int orig_height; ++ atomic_uint refcount; ++ ++ AVBufferRef *hw_frames_ctx; ++ ++ unsigned int field_order; ++ ++ pts_track_t track; ++ ++ V4L2Queue output; ++ V4L2Queue capture; ++} DeintV4L2M2MContextShared; ++ ++typedef struct DeintV4L2M2MContext { ++ const AVClass *class; ++ ++ DeintV4L2M2MContextShared *shared; + + char * w_expr; + char * h_expr; @@ -26438,8 +15127,47 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + enum AVColorTransferCharacteristic colour_transfer; + enum AVColorSpace colour_matrix; + enum AVChromaLocation chroma_location; - } DeintV4L2M2MContext; - ++} DeintV4L2M2MContext; ++ ++ ++static inline void frame_set_progressive(AVFrame* frame) ++{ ++#if FF_API_INTERLACED_FRAME ++FF_DISABLE_DEPRECATION_WARNINGS ++ frame->interlaced_frame = 0; ++ frame->top_field_first = 0; ++FF_ENABLE_DEPRECATION_WARNINGS ++#endif ++ frame->flags &= ~(AV_FRAME_FLAG_TOP_FIELD_FIRST | AV_FRAME_FLAG_INTERLACED); ++} ++ ++static inline int frame_is_interlaced(const AVFrame* const frame) ++{ ++#if FF_API_INTERLACED_FRAME ++FF_DISABLE_DEPRECATION_WARNINGS ++ return frame->interlaced_frame || (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0; ++FF_ENABLE_DEPRECATION_WARNINGS ++#else ++ return (frame->flags & AV_FRAME_FLAG_INTERLACED) != 0; ++#endif ++} ++ ++static inline int frame_is_tff(const AVFrame* const frame) ++{ ++#if FF_API_INTERLACED_FRAME ++FF_DISABLE_DEPRECATION_WARNINGS ++ return frame->top_field_first || (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0; ++FF_ENABLE_DEPRECATION_WARNINGS ++#else ++ return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) != 0; ++#endif ++} ++ ++static inline int drain_frame_expected(const drain_state_t d) ++{ ++ return d == DRAIN_EOS || d == DRAIN_LAST; ++} ++ +// These just list the ones we know we can cope with +static uint32_t +fmt_av_to_v4l2(const enum AVPixelFormat avfmt) @@ -26449,9 +15177,11 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + return V4L2_PIX_FMT_YUV420; + case AV_PIX_FMT_NV12: + return V4L2_PIX_FMT_NV12; ++#if CONFIG_SAND + case AV_PIX_FMT_RPI4_8: + case AV_PIX_FMT_SAND128: + return V4L2_PIX_FMT_NV12_COL128; ++#endif + default: + break; + } @@ -26466,21 +15196,188 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + return AV_PIX_FMT_YUV420P; + case V4L2_PIX_FMT_NV12: + return AV_PIX_FMT_NV12; ++#if CONFIG_SAND + case V4L2_PIX_FMT_NV12_COL128: + return AV_PIX_FMT_RPI4_8; ++#endif + default: + break; + } + return AV_PIX_FMT_NONE; +} + - static unsigned int pts_stats_interval(const pts_stats_t * const stats) - { - return stats->last_interval; -@@ -301,6 +365,39 @@ static int pts_track_init(pts_track_t * const trk, void *logctx) - return 0; - } - ++static unsigned int pts_stats_interval(const pts_stats_t * const stats) ++{ ++ return stats->last_interval; ++} ++ ++// Pick 64 for max last count - that is >1sec at 60fps ++#define STATS_LAST_COUNT_MAX 64 ++#define STATS_INTERVAL_MAX (1 << 30) ++static void pts_stats_add(pts_stats_t * const stats, int64_t pts) ++{ ++ if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { ++ if (stats->last_count < STATS_LAST_COUNT_MAX) ++ ++stats->last_count; ++ return; ++ } ++ ++ if (stats->last_pts != AV_NOPTS_VALUE) { ++ const int64_t interval = pts - stats->last_pts; ++ ++ if (interval < 0 || interval >= STATS_INTERVAL_MAX || ++ stats->last_count >= STATS_LAST_COUNT_MAX) { ++ if (stats->last_interval != 0) ++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: Bad interval: %" PRId64 "/%d\n", ++ __func__, stats->name, interval, stats->last_count); ++ stats->last_interval = 0; ++ } ++ else { ++ const int64_t frame_time = interval / (int64_t)stats->last_count; ++ ++ if (frame_time != stats->last_interval) ++ av_log(stats->logctx, AV_LOG_DEBUG, "%s: %s: New interval: %u->%" PRId64 "/%d=%" PRId64 "\n", ++ __func__, stats->name, stats->last_interval, interval, stats->last_count, frame_time); ++ stats->last_interval = frame_time; ++ } ++ } ++ ++ stats->last_pts = pts; ++ stats->last_count = 1; ++} ++ ++static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char * name) ++{ ++ *stats = (pts_stats_t){ ++ .logctx = logctx, ++ .name = name, ++ .last_count = 1, ++ .last_interval = 0, ++ .last_pts = AV_NOPTS_VALUE ++ }; ++} ++ ++static inline uint32_t pts_track_next_n(pts_track_t * const trk) ++{ ++ if (++trk->n == 0) ++ trk->n = 1; ++ return trk->n; ++} ++ ++static int pts_track_get_frame(pts_track_t * const trk, const struct timeval tv, AVFrame * const dst) ++{ ++ uint32_t n = (uint32_t)(tv.tv_usec / 2 + tv.tv_sec * 500000); ++ pts_track_el_t * t; ++ ++ // As a first guess assume that n==0 means last frame ++ if (n == 0) { ++ n = trk->last_n; ++ if (n == 0) ++ goto fail; ++ } ++ ++ t = trk->a + (n & (PTS_TRACK_SIZE - 1)); ++ ++ if (t->n != n) { ++ av_log(trk->logctx, AV_LOG_ERROR, "%s: track failure: got %u, expected %u\n", __func__, n, trk->n); ++ goto fail; ++ } ++ ++ // 1st frame is simple - just believe it ++ if (n != trk->last_n) { ++ trk->last_n = n; ++ trk->got_2 = 0; ++ return av_frame_copy_props(dst, t->props); ++ } ++ ++ // Only believe in a single interpolated frame ++ if (trk->got_2) ++ goto fail; ++ trk->got_2 = 1; ++ ++ av_frame_copy_props(dst, t->props); ++ ++ ++ // If we can't guess - don't ++ if (t->interval == 0) { ++ dst->best_effort_timestamp = AV_NOPTS_VALUE; ++ dst->pts = AV_NOPTS_VALUE; ++ dst->pkt_dts = AV_NOPTS_VALUE; ++ } ++ else { ++ if (dst->best_effort_timestamp != AV_NOPTS_VALUE) ++ dst->best_effort_timestamp += t->interval / 2; ++ if (dst->pts != AV_NOPTS_VALUE) ++ dst->pts += t->interval / 2; ++ if (dst->pkt_dts != AV_NOPTS_VALUE) ++ dst->pkt_dts += t->interval / 2; ++ } ++ ++ return 0; ++ ++fail: ++ trk->last_n = 0; ++ trk->got_2 = 0; ++ dst->pts = AV_NOPTS_VALUE; ++ dst->pkt_dts = AV_NOPTS_VALUE; ++ return 0; ++} ++ ++// We are only ever expecting in-order frames so nothing more clever is required ++static unsigned int ++pts_track_count(const pts_track_t * const trk) ++{ ++ return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1); ++} ++ ++static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src) ++{ ++ const uint32_t n = pts_track_next_n(trk); ++ pts_track_el_t * const t = trk->a + (n & (PTS_TRACK_SIZE - 1)); ++ ++ pts_stats_add(&trk->stats, src->pts); ++ ++ t->n = n; ++ t->interval = pts_stats_interval(&trk->stats); // guess that next interval is the same as the last ++ av_frame_unref(t->props); ++ av_frame_copy_props(t->props, src); ++ ++ // We now know what the previous interval was, rather than having to guess, ++ // so set it. There is a better than decent chance that this is before ++ // we use it. ++ if (t->interval != 0) { ++ pts_track_el_t * const prev_t = trk->a + ((n - 1) & (PTS_TRACK_SIZE - 1)); ++ prev_t->interval = t->interval; ++ } ++ ++ // In case deinterlace interpolates frames use every other usec ++ return (struct timeval){.tv_sec = n / 500000, .tv_usec = (n % 500000) * 2}; ++} ++ ++static void pts_track_uninit(pts_track_t * const trk) ++{ ++ unsigned int i; ++ for (i = 0; i != PTS_TRACK_SIZE; ++i) { ++ trk->a[i].n = 0; ++ av_frame_free(&trk->a[i].props); ++ } ++} ++ ++static int pts_track_init(pts_track_t * const trk, void *logctx) ++{ ++ unsigned int i; ++ trk->n = 1; ++ pts_stats_init(&trk->stats, logctx, "track"); ++ for (i = 0; i != PTS_TRACK_SIZE; ++i) { ++ trk->a[i].n = 0; ++ if ((trk->a[i].props = av_frame_alloc()) == NULL) { ++ pts_track_uninit(trk); ++ return AVERROR(ENOMEM); ++ } ++ } ++ return 0; ++} ++ +static inline uint32_t +fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n) +{ @@ -26505,6 +15402,12 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; +} + ++static inline uint32_t ++buf_bytesused0(const struct v4l2_buffer * const buf) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused; ++} ++ +static void +init_format(V4L2Queue * const q, const uint32_t format_type) +{ @@ -26514,36 +15417,29 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + q->sel.type = format_type; +} + - static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx) - { - struct v4l2_capability cap; -@@ -311,80 +408,99 @@ static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx) - if (ret < 0) - return ret; - -- if (!(cap.capabilities & V4L2_CAP_STREAMING)) ++static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx) ++{ ++ struct v4l2_capability cap; ++ int ret; ++ ++ memset(&cap, 0, sizeof(cap)); ++ ret = ioctl(ctx->fd, VIDIOC_QUERYCAP, &cap); ++ if (ret < 0) ++ return ret; ++ + if (ctx->filter_type == FILTER_V4L2_SCALE && + strcmp("bcm2835-codec-isp", cap.card) != 0) + { + av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n"); - return AVERROR(EINVAL); ++ return AVERROR(EINVAL); + } - -- if (cap.capabilities & V4L2_CAP_VIDEO_M2M) { -- ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -- ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -- -- return 0; ++ + if (!(cap.capabilities & V4L2_CAP_STREAMING)) { + av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n"); + return AVERROR(EINVAL); - } - - if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) { -- ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; -- ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; -- -- return 0; ++ } ++ ++ if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) { + init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); + init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + } @@ -26554,85 +15450,57 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + else { + av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n"); + return AVERROR(EINVAL); - } - -- return AVERROR(EINVAL); ++ } ++ + return 0; - } - --static int deint_v4l2m2m_try_format(V4L2Queue *queue) ++} ++ +// Just use for probe - doesn't modify q format +static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt) - { -- struct v4l2_format *fmt = &queue->format; ++{ + struct v4l2_format fmt = {.type = queue->format.type}; - DeintV4L2M2MContextShared *ctx = queue->ctx; - int ret, field; ++ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ int ret, field; + // Pick YUV to test with if not otherwise specified + uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt); + enum AVPixelFormat r_avfmt; + - -- ret = ioctl(ctx->fd, VIDIOC_G_FMT, fmt); ++ + ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt); - if (ret) - av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret); - -- if (V4L2_TYPE_IS_OUTPUT(fmt->type)) ++ if (ret) ++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret); ++ + if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type)) - field = V4L2_FIELD_INTERLACED_TB; - else - field = V4L2_FIELD_NONE; - -- if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -- fmt->fmt.pix_mp.pixelformat = V4L2_PIX_FMT_YUV420; -- fmt->fmt.pix_mp.field = field; -- fmt->fmt.pix_mp.width = ctx->width; -- fmt->fmt.pix_mp.height = ctx->height; ++ field = V4L2_FIELD_INTERLACED_TB; ++ else ++ field = V4L2_FIELD_NONE; ++ + if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { + fmt.fmt.pix_mp.pixelformat = pixelformat; + fmt.fmt.pix_mp.field = field; + fmt.fmt.pix_mp.width = width; + fmt.fmt.pix_mp.height = height; - } else { -- fmt->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420; -- fmt->fmt.pix.field = field; -- fmt->fmt.pix.width = ctx->width; -- fmt->fmt.pix.height = ctx->height; ++ } else { + fmt.fmt.pix.pixelformat = pixelformat; + fmt.fmt.pix.field = field; + fmt.fmt.pix.width = width; + fmt.fmt.pix.height = height; - } - -- av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__, -- fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height, -- fmt->fmt.pix_mp.pixelformat, -- fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline); ++ } ++ + av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__, + fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, + fmt.fmt.pix_mp.pixelformat, + fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); - -- ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, fmt); ++ + ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt); - if (ret) - return AVERROR(EINVAL); - -- av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__, -- fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height, -- fmt->fmt.pix_mp.pixelformat, -- fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline); ++ if (ret) ++ return AVERROR(EINVAL); ++ + av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__, + fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, + fmt.fmt.pix_mp.pixelformat, + fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); - -- if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -- if ((fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 && -- fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_NV12) || -- fmt->fmt.pix_mp.field != field) { -- av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type); ++ + r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt)); + if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); @@ -26646,95 +15514,51 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { + if (fmt.fmt.pix_mp.field != field) { + av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); - - return AVERROR(EINVAL); - } - } else { -- if ((fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 && -- fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_NV12) || -- fmt->fmt.pix.field != field) { -- av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type); ++ ++ return AVERROR(EINVAL); ++ } ++ } else { + if (fmt.fmt.pix.field != field) { + av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); - - return AVERROR(EINVAL); - } -@@ -393,68 +509,410 @@ static int deint_v4l2m2m_try_format(V4L2Queue *queue) - return 0; - } - --static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height, int pitch, int ysize) ++ ++ return AVERROR(EINVAL); ++ } ++ } ++ ++ return 0; ++} ++ +static int +do_s_fmt(V4L2Queue * const q) - { -- struct v4l2_format *fmt = &queue->format; -- DeintV4L2M2MContextShared *ctx = queue->ctx; ++{ + DeintV4L2M2MContextShared * const ctx = q->ctx; + const uint32_t pixelformat = fmt_pixelformat(&q->format); - int ret; - -- struct v4l2_selection sel = { -- .type = fmt->type, -- .target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP_BOUNDS : V4L2_SEL_TGT_COMPOSE_BOUNDS, -- }; -- -- // This works for most single object 4:2:0 types -- if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -- fmt->fmt.pix_mp.pixelformat = pixelformat; -- fmt->fmt.pix_mp.field = field; -- fmt->fmt.pix_mp.width = width; -- fmt->fmt.pix_mp.height = ysize / pitch; -- fmt->fmt.pix_mp.plane_fmt[0].bytesperline = pitch; -- fmt->fmt.pix_mp.plane_fmt[0].sizeimage = ysize + (ysize >> 1); -- } else { -- fmt->fmt.pix.pixelformat = pixelformat; -- fmt->fmt.pix.field = field; -- fmt->fmt.pix.width = width; -- fmt->fmt.pix.height = height; -- fmt->fmt.pix.sizeimage = 0; -- fmt->fmt.pix.bytesperline = 0; -- } -- -- ret = ioctl(ctx->fd, VIDIOC_S_FMT, fmt); ++ int ret; ++ + ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format); - if (ret) { - ret = AVERROR(errno); -- av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %d\n", ret); ++ if (ret) { ++ ret = AVERROR(errno); + av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret)); - return ret; - } - -- if (pixelformat != fmt->fmt.pix.pixelformat) { -- av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt->fmt.pix.pixelformat)); ++ return ret; ++ } ++ + if (pixelformat != fmt_pixelformat(&q->format)) { + av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format))); - return AVERROR(EINVAL); - } - -- ret = ioctl(ctx->fd, VIDIOC_G_SELECTION, &sel); ++ return AVERROR(EINVAL); ++ } ++ + q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE, + q->sel.flags = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE; + + ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel); - if (ret) { - ret = AVERROR(errno); -- av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION failed: %d\n", ret); ++ if (ret) { ++ ret = AVERROR(errno); + av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret)); - } - -- sel.r.width = width; -- sel.r.height = height; -- sel.r.left = 0; -- sel.r.top = 0; -- sel.target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE, -- sel.flags = V4L2_SEL_FLAG_LE; ++ } ++ + return 0; +} - -- ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &sel); -- if (ret) { -- ret = AVERROR(errno); -- av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %d\n", ret); ++ +static void +set_fmt_color(struct v4l2_format *const fmt, + const enum AVColorPrimaries avcp, @@ -27017,6 +15841,7 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + h = src->layers[0].planes[1].offset / bpl; + w = bpl; + } ++#if CONFIG_SAND + else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { + if (src->layers[0].nb_planes != 2) + break; @@ -27025,9 +15850,11 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + h = src->layers[0].planes[1].offset / 128; + bpl = fourcc_mod_broadcom_param(mod); + } ++#endif + break; + + case DRM_FORMAT_P030: ++#if CONFIG_SAND + if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { + if (src->layers[0].nb_planes != 2) + break; @@ -27036,6 +15863,7 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + h = src->layers[0].planes[1].offset / 128; + bpl = fourcc_mod_broadcom_param(mod); + } ++#endif + break; + + default: @@ -27061,8 +15889,8 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + pix->height = h; + pix->pixelformat = pix_fmt; + pix->bytesperline = bpl; - } - ++ } ++ + set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc); + set_fmt_color_range(format, frame->color_range); + @@ -27071,9 +15899,9 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + q->sel.r.left = frame->crop_left; + q->sel.r.top = frame->crop_top; + - return 0; - } - ++ return 0; ++} ++ + +static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height) +{ @@ -27108,42 +15936,91 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + return do_s_fmt(queue); +} + - static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node) - { - int ret; -@@ -464,16 +922,22 @@ static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node - return AVERROR(errno); - - ret = deint_v4l2m2m_prepare_context(ctx); -- if (ret) ++static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node) ++{ ++ int ret; ++ ++ ctx->fd = open(node, O_RDWR | O_NONBLOCK, 0); ++ if (ctx->fd < 0) ++ return AVERROR(errno); ++ ++ ret = deint_v4l2m2m_prepare_context(ctx); + if (ret) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n"); - goto fail; ++ goto fail; + } - -- ret = deint_v4l2m2m_try_format(&ctx->capture); -- if (ret) ++ + ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format); + if (ret) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n"); - goto fail; ++ goto fail; + } - -- ret = deint_v4l2m2m_try_format(&ctx->output); -- if (ret) ++ + ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE); + if (ret) { + av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n"); - goto fail; ++ goto fail; + } - - return 0; - -@@ -534,26 +998,118 @@ static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf) - return 0; - } - --static int v4l2_buffer_export_drm(V4L2Buffer* avbuf, const uint32_t pixelformat) ++ ++ return 0; ++ ++fail: ++ close(ctx->fd); ++ ctx->fd = -1; ++ ++ return ret; ++} ++ ++static int deint_v4l2m2m_find_device(DeintV4L2M2MContextShared *ctx) ++{ ++ int ret = AVERROR(EINVAL); ++ struct dirent *entry; ++ char node[PATH_MAX]; ++ DIR *dirp; ++ ++ dirp = opendir("/dev"); ++ if (!dirp) ++ return AVERROR(errno); ++ ++ for (entry = readdir(dirp); entry; entry = readdir(dirp)) { ++ ++ if (strncmp(entry->d_name, "video", 5)) ++ continue; ++ ++ snprintf(node, sizeof(node), "/dev/%s", entry->d_name); ++ av_log(ctx->logctx, AV_LOG_DEBUG, "probing device %s\n", node); ++ ret = deint_v4l2m2m_probe_device(ctx, node); ++ if (!ret) ++ break; ++ } ++ ++ closedir(dirp); ++ ++ if (ret) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "Could not find a valid device\n"); ++ ctx->fd = -1; ++ ++ return ret; ++ } ++ ++ av_log(ctx->logctx, AV_LOG_INFO, "Using device %s\n", node); ++ ++ return 0; ++} ++ ++static int deint_v4l2m2m_enqueue_buffer(V4L2Buffer *buf) ++{ ++ int ret; ++ ++ ret = ioctl(buf->q->ctx->fd, VIDIOC_QBUF, &buf->buffer); ++ if (ret < 0) ++ return AVERROR(errno); ++ ++ buf->enqueued = 1; ++ ++ return 0; ++} ++ +static void +drm_frame_init(AVDRMFrameDescriptor * const d) +{ @@ -27184,26 +16061,15 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 +} + +static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf) - { - struct v4l2_exportbuffer expbuf; - int i, ret; - uint64_t mod = DRM_FORMAT_MOD_LINEAR; -- uint32_t fmt = 0; - -- switch (pixelformat) { -- case V4L2_PIX_FMT_NV12: -- fmt = DRM_FORMAT_NV12; -- break; -- case V4L2_PIX_FMT_YUV420: -- fmt = DRM_FORMAT_YUV420; -- break; -- default: -- return AVERROR(EINVAL); ++{ ++ struct v4l2_exportbuffer expbuf; ++ int i, ret; ++ uint64_t mod = DRM_FORMAT_MOD_LINEAR; ++ + AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame; + AVDRMLayerDescriptor * const layer = &drm_desc->layers[0]; + const struct v4l2_format *const fmt = &q->format; + const uint32_t height = fmt_height(fmt); -+ const uint32_t width = fmt_width(fmt); + ptrdiff_t bpl0; + + /* fill the DRM frame descriptor */ @@ -27214,11 +16080,11 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + layer->planes[i].object_index = i; + layer->planes[i].offset = 0; + layer->planes[i].pitch = fmt_bpl(fmt, i); - } ++ } + bpl0 = layer->planes[0].pitch; + + switch (fmt_pixelformat(fmt)) { -+ ++#if CONFIG_SAND + case V4L2_PIX_FMT_NV12_COL128: + mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0); + layer->format = V4L2_PIX_FMT_NV12; @@ -27229,14 +16095,14 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + layer->nb_planes = 2; + layer->planes[1].object_index = 0; + layer->planes[1].offset = height * 128; -+ layer->planes[0].pitch = width; -+ layer->planes[1].pitch = width; ++ layer->planes[0].pitch = fmt_width(fmt); ++ layer->planes[1].pitch = layer->planes[0].pitch; + break; - -- avbuf->drm_frame.layers[0].format = fmt; ++#endif ++ + case DRM_FORMAT_NV12: + layer->format = V4L2_PIX_FMT_NV12; - ++ + if (avbuf->num_planes > 1) + break; + @@ -27267,241 +16133,379 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + } + + drm_desc->nb_objects = 0; - for (i = 0; i < avbuf->num_planes; i++) { - memset(&expbuf, 0, sizeof(expbuf)); - -@@ -565,19 +1121,11 @@ static int v4l2_buffer_export_drm(V4L2Buffer* avbuf, const uint32_t pixelformat) - if (ret < 0) - return AVERROR(errno); - -- avbuf->fd = expbuf.fd; -- -- if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type)) { -- /* drm frame */ -- avbuf->drm_frame.objects[i].size = avbuf->buffer.m.planes[i].length; -- avbuf->drm_frame.objects[i].fd = expbuf.fd; -- avbuf->drm_frame.objects[i].format_modifier = mod; -- } else { -- /* drm frame */ -- avbuf->drm_frame.objects[0].size = avbuf->buffer.length; -- avbuf->drm_frame.objects[0].fd = expbuf.fd; -- avbuf->drm_frame.objects[0].format_modifier = mod; -- } ++ for (i = 0; i < avbuf->num_planes; i++) { ++ memset(&expbuf, 0, sizeof(expbuf)); ++ ++ expbuf.index = avbuf->buffer.index; ++ expbuf.type = avbuf->buffer.type; ++ expbuf.plane = i; ++ ++ ret = ioctl(avbuf->q->ctx->fd, VIDIOC_EXPBUF, &expbuf); ++ if (ret < 0) ++ return AVERROR(errno); ++ + drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ? + avbuf->buffer.m.planes[i].length : avbuf->buffer.length; + drm_desc->objects[i].fd = expbuf.fd; + drm_desc->objects[i].format_modifier = mod; + drm_desc->nb_objects = i + 1; - } - - return 0; -@@ -588,7 +1136,7 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) - struct v4l2_format *fmt = &queue->format; - DeintV4L2M2MContextShared *ctx = queue->ctx; - struct v4l2_requestbuffers req; -- int ret, i, j, multiplanar; ++ } ++ ++ return 0; ++} ++ ++static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) ++{ ++ struct v4l2_format *fmt = &queue->format; ++ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ struct v4l2_requestbuffers req; + int ret, i, multiplanar; - uint32_t memory; - - memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ? -@@ -617,10 +1165,9 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) - } - - for (i = 0; i < queue->num_buffers; i++) { -- V4L2Buffer *buf = &queue->buffers[i]; ++ uint32_t memory; ++ ++ memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ? ++ V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; ++ ++ multiplanar = V4L2_TYPE_IS_MULTIPLANAR(fmt->type); ++ ++ memset(&req, 0, sizeof(req)); ++ req.count = queue->num_buffers; ++ req.memory = memory; ++ req.type = fmt->type; ++ ++ ret = ioctl(ctx->fd, VIDIOC_REQBUFS, &req); ++ if (ret < 0) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_REQBUFS failed: %s\n", strerror(errno)); ++ ++ return AVERROR(errno); ++ } ++ ++ queue->num_buffers = req.count; ++ queue->buffers = av_mallocz(queue->num_buffers * sizeof(V4L2Buffer)); ++ if (!queue->buffers) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "malloc enomem\n"); ++ ++ return AVERROR(ENOMEM); ++ } ++ ++ for (i = 0; i < queue->num_buffers; i++) { + V4L2Buffer * const buf = &queue->buffers[i]; - - buf->enqueued = 0; -- buf->fd = -1; - buf->q = queue; - - buf->buffer.type = fmt->type; -@@ -632,6 +1179,12 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) - buf->buffer.m.planes = buf->planes; - } - ++ ++ buf->enqueued = 0; ++ buf->q = queue; ++ ++ buf->buffer.type = fmt->type; ++ buf->buffer.memory = memory; ++ buf->buffer.index = i; ++ ++ if (multiplanar) { ++ buf->buffer.length = VIDEO_MAX_PLANES; ++ buf->buffer.m.planes = buf->planes; ++ } ++ + drm_frame_init(&buf->drm_frame); + } + + for (i = 0; i < queue->num_buffers; i++) { + V4L2Buffer * const buf = &queue->buffers[i]; + - ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer); - if (ret < 0) { - ret = AVERROR(errno); -@@ -639,29 +1192,14 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) - goto fail; - } - -- if (multiplanar) -- buf->num_planes = buf->buffer.length; -- else -- buf->num_planes = 1; -- -- for (j = 0; j < buf->num_planes; j++) { -- V4L2PlaneInfo *info = &buf->plane_info[j]; -- -- if (multiplanar) { -- info->bytesperline = fmt->fmt.pix_mp.plane_fmt[j].bytesperline; -- info->length = buf->buffer.m.planes[j].length; -- } else { -- info->bytesperline = fmt->fmt.pix.bytesperline; -- info->length = buf->buffer.length; -- } -- } ++ ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer); ++ if (ret < 0) { ++ ret = AVERROR(errno); ++ ++ goto fail; ++ } ++ + buf->num_planes = multiplanar ? buf->buffer.length : 1; - - if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) { - ret = deint_v4l2m2m_enqueue_buffer(buf); - if (ret) - goto fail; - -- ret = v4l2_buffer_export_drm(buf, multiplanar ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat); ++ ++ if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) { ++ ret = deint_v4l2m2m_enqueue_buffer(buf); ++ if (ret) ++ goto fail; ++ + ret = v4l2_buffer_export_drm(queue, buf); - if (ret) - goto fail; - } -@@ -670,12 +1208,8 @@ static int deint_v4l2m2m_allocate_buffers(V4L2Queue *queue) - return 0; - - fail: -- for (i = 0; i < queue->num_buffers; i++) -- if (queue->buffers[i].fd >= 0) -- close(queue->buffers[i].fd); -- av_free(queue->buffers); -- queue->buffers = NULL; -- ++ if (ret) ++ goto fail; ++ } ++ } ++ ++ return 0; ++ ++fail: + avbufs_delete(&queue->buffers, queue->num_buffers); + queue->num_buffers = 0; - return ret; - } - -@@ -862,7 +1396,6 @@ static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx) - if (atomic_fetch_sub(&ctx->refcount, 1) == 1) { - V4L2Queue *capture = &ctx->capture; - V4L2Queue *output = &ctx->output; -- int i; - - av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__); - -@@ -871,12 +1404,7 @@ static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx) - deint_v4l2m2m_streamoff(output); - } - -- if (capture->buffers) -- for (i = 0; i < capture->num_buffers; i++) { -- capture->buffers[i].q = NULL; -- if (capture->buffers[i].fd >= 0) -- close(capture->buffers[i].fd); -- } ++ return ret; ++} ++ ++static int deint_v4l2m2m_streamon(V4L2Queue *queue) ++{ ++ DeintV4L2M2MContextShared * const ctx = queue->ctx; ++ int type = queue->format.type; ++ int ret; ++ ++ ret = ioctl(ctx->fd, VIDIOC_STREAMON, &type); ++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); ++ if (ret < 0) ++ return AVERROR(errno); ++ ++ return 0; ++} ++ ++static int deint_v4l2m2m_streamoff(V4L2Queue *queue) ++{ ++ DeintV4L2M2MContextShared * const ctx = queue->ctx; ++ int type = queue->format.type; ++ int ret; ++ ++ ret = ioctl(ctx->fd, VIDIOC_STREAMOFF, &type); ++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: type:%d ret:%d errno:%d\n", __func__, type, ret, AVERROR(errno)); ++ if (ret < 0) ++ return AVERROR(errno); ++ ++ return 0; ++} ++ ++// timeout in ms ++static V4L2Buffer* deint_v4l2m2m_dequeue_buffer(V4L2Queue *queue, int timeout) ++{ ++ struct v4l2_plane planes[VIDEO_MAX_PLANES]; ++ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ struct v4l2_buffer buf = { 0 }; ++ V4L2Buffer* avbuf = NULL; ++ struct pollfd pfd; ++ short events; ++ int ret; ++ ++ if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) ++ events = POLLOUT | POLLWRNORM; ++ else ++ events = POLLIN | POLLRDNORM; ++ ++ pfd.events = events; ++ pfd.fd = ctx->fd; ++ ++ for (;;) { ++ ret = poll(&pfd, 1, timeout); ++ if (ret > 0) ++ break; ++ if (errno == EINTR) ++ continue; ++ return NULL; ++ } ++ ++ if (pfd.revents & POLLERR) ++ return NULL; ++ ++ if (pfd.revents & events) { ++ memset(&buf, 0, sizeof(buf)); ++ buf.memory = V4L2_MEMORY_MMAP; ++ buf.type = queue->format.type; ++ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { ++ memset(planes, 0, sizeof(planes)); ++ buf.length = VIDEO_MAX_PLANES; ++ buf.m.planes = planes; ++ } ++ ++ ret = ioctl(ctx->fd, VIDIOC_DQBUF, &buf); ++ if (ret) { ++ if (errno != EAGAIN) ++ av_log(ctx->logctx, AV_LOG_DEBUG, "VIDIOC_DQBUF, errno (%s)\n", ++ av_err2str(AVERROR(errno))); ++ return NULL; ++ } ++ ++ avbuf = &queue->buffers[buf.index]; ++ avbuf->enqueued = 0; ++ avbuf->buffer = buf; ++ if (V4L2_TYPE_IS_MULTIPLANAR(queue->format.type)) { ++ memcpy(avbuf->planes, planes, sizeof(planes)); ++ avbuf->buffer.m.planes = avbuf->planes; ++ } ++ return avbuf; ++ } ++ ++ return NULL; ++} ++ ++static V4L2Buffer *deint_v4l2m2m_find_free_buf(V4L2Queue *queue) ++{ ++ int i; ++ V4L2Buffer *buf = NULL; ++ ++ for (i = 0; i < queue->num_buffers; i++) ++ if (!queue->buffers[i].enqueued) { ++ buf = &queue->buffers[i]; ++ break; ++ } ++ return buf; ++} ++ ++static void deint_v4l2m2m_unref_queued(V4L2Queue *queue) ++{ ++ int i; ++ V4L2Buffer *buf = NULL; ++ ++ if (!queue || !queue->buffers) ++ return; ++ for (i = 0; i < queue->num_buffers; i++) { ++ buf = &queue->buffers[i]; ++ if (queue->buffers[i].enqueued) ++ av_frame_unref(&buf->frame); ++ } ++} ++ ++static void recycle_q(V4L2Queue * const queue) ++{ ++ V4L2Buffer* avbuf; ++ while (avbuf = deint_v4l2m2m_dequeue_buffer(queue, 0), avbuf) { ++ av_frame_unref(&avbuf->frame); ++ } ++} ++ ++static int count_enqueued(V4L2Queue *queue) ++{ ++ int i; ++ int n = 0; ++ ++ if (queue->buffers == NULL) ++ return 0; ++ ++ for (i = 0; i < queue->num_buffers; i++) ++ if (queue->buffers[i].enqueued) ++ ++n; ++ return n; ++} ++ ++static int deint_v4l2m2m_enqueue_frame(V4L2Queue * const queue, AVFrame * const frame) ++{ ++ DeintV4L2M2MContextShared *const ctx = queue->ctx; ++ AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)frame->data[0]; ++ V4L2Buffer *buf; ++ int i; ++ ++ if (V4L2_TYPE_IS_OUTPUT(queue->format.type)) ++ recycle_q(queue); ++ ++ buf = deint_v4l2m2m_find_free_buf(queue); ++ if (!buf) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d finding free buf\n", __func__, 0); ++ return AVERROR(EAGAIN); ++ } ++ if (V4L2_TYPE_IS_MULTIPLANAR(buf->buffer.type)) ++ for (i = 0; i < drm_desc->nb_objects; i++) ++ buf->buffer.m.planes[i].m.fd = drm_desc->objects[i].fd; ++ else ++ buf->buffer.m.fd = drm_desc->objects[0].fd; ++ ++ buf->buffer.field = !frame_is_interlaced(frame) ? V4L2_FIELD_NONE : ++ frame_is_tff(frame) ? V4L2_FIELD_INTERLACED_TB : ++ V4L2_FIELD_INTERLACED_BT; ++ ++ if (ctx->field_order != buf->buffer.field) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Field changed: %d->%d\n", __func__, ctx->field_order, buf->buffer.field); ++ ctx->field_order = buf->buffer.field; ++ } ++ ++ buf->buffer.timestamp = pts_track_add_frame(&ctx->track, frame); ++ ++ buf->drm_frame.objects[0].fd = drm_desc->objects[0].fd; ++ ++ av_frame_move_ref(&buf->frame, frame); ++ ++ return deint_v4l2m2m_enqueue_buffer(buf); ++} ++ ++static void deint_v4l2m2m_destroy_context(DeintV4L2M2MContextShared *ctx) ++{ ++ if (atomic_fetch_sub(&ctx->refcount, 1) == 1) { ++ V4L2Queue *capture = &ctx->capture; ++ V4L2Queue *output = &ctx->output; ++ ++ av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__); ++ ++ if (ctx->fd >= 0) { ++ deint_v4l2m2m_streamoff(capture); ++ deint_v4l2m2m_streamoff(output); ++ } ++ + avbufs_delete(&capture->buffers, capture->num_buffers); - - deint_v4l2m2m_unref_queued(output); - -@@ -908,73 +1436,15 @@ static void v4l2_free_buffer(void *opaque, uint8_t *unused) - deint_v4l2m2m_destroy_context(ctx); - } - --static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height) --{ -- AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; -- AVDRMLayerDescriptor *layer; -- -- /* fill the DRM frame descriptor */ -- drm_desc->nb_objects = avbuf->num_planes; -- drm_desc->nb_layers = 1; -- -- layer = &drm_desc->layers[0]; -- layer->nb_planes = avbuf->num_planes; -- -- for (int i = 0; i < avbuf->num_planes; i++) { -- layer->planes[i].object_index = i; -- layer->planes[i].offset = 0; -- layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; -- } -- -- switch (layer->format) { -- case DRM_FORMAT_YUYV: -- layer->nb_planes = 1; -- break; -- -- case DRM_FORMAT_NV12: -- case DRM_FORMAT_NV21: -- if (avbuf->num_planes > 1) -- break; -- -- layer->nb_planes = 2; -- -- layer->planes[1].object_index = 0; -- layer->planes[1].offset = avbuf->plane_info[0].bytesperline * -- height; -- layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; -- break; -- -- case DRM_FORMAT_YUV420: -- if (avbuf->num_planes > 1) -- break; -- -- layer->nb_planes = 3; -- -- layer->planes[1].object_index = 0; -- layer->planes[1].offset = avbuf->plane_info[0].bytesperline * -- height; -- layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1; -- -- layer->planes[2].object_index = 0; -- layer->planes[2].offset = layer->planes[1].offset + -- ((avbuf->plane_info[0].bytesperline * -- height) >> 2); -- layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1; -- break; -- -- default: -- drm_desc->nb_layers = 0; -- break; -- } -- -- return (uint8_t *) drm_desc; --} -- - // timeout in ms - static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout) - { - DeintV4L2M2MContextShared *ctx = queue->ctx; - V4L2Buffer* avbuf; ++ ++ deint_v4l2m2m_unref_queued(output); ++ ++ av_buffer_unref(&ctx->hw_frames_ctx); ++ ++ if (capture->buffers) ++ av_free(capture->buffers); ++ ++ if (output->buffers) ++ av_free(output->buffers); ++ ++ if (ctx->fd >= 0) { ++ close(ctx->fd); ++ ctx->fd = -1; ++ } ++ ++ av_free(ctx); ++ } ++} ++ ++static void v4l2_free_buffer(void *opaque, uint8_t *unused) ++{ ++ V4L2Buffer *buf = opaque; ++ DeintV4L2M2MContextShared *ctx = buf->q->ctx; ++ ++ if (!ctx->done) ++ deint_v4l2m2m_enqueue_buffer(buf); ++ ++ deint_v4l2m2m_destroy_context(ctx); ++} ++ ++// timeout in ms ++static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout) ++{ ++ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ V4L2Buffer* avbuf; + enum AVColorPrimaries color_primaries; + enum AVColorSpace colorspace; + enum AVColorTransferCharacteristic color_trc; + enum AVColorRange color_range; - - av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__); - -@@ -985,8 +1455,6 @@ static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int tim - } - - // Fill in PTS and anciliary info from src frame -- // we will want to overwrite some fields as only the pts/dts -- // fields are updated with new timing in this fn - pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame); - - frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame, -@@ -999,18 +1467,36 @@ static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int tim - - atomic_fetch_add(&ctx->refcount, 1); - -- frame->data[0] = (uint8_t *)v4l2_get_drm_frame(avbuf, ctx->orig_height); ++ ++ av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__); ++ ++ if (queue->eos) { ++ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__); ++ return AVERROR_EOF; ++ } ++ ++ avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout); ++ if (!avbuf) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout); ++ return AVERROR(EAGAIN); ++ } ++ ++ if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) { ++ if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0) ++ queue->eos = 1; ++ if (buf_bytesused0(&avbuf->buffer) == 0) ++ return queue->eos ? AVERROR_EOF : AVERROR(EINVAL); ++ } ++ ++ // Fill in PTS and anciliary info from src frame ++ pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame); ++ ++ frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame, ++ sizeof(avbuf->drm_frame), v4l2_free_buffer, ++ avbuf, AV_BUFFER_FLAG_READONLY); ++ if (!frame->buf[0]) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "%s: error %d creating buffer\n", __func__, 0); ++ return AVERROR(ENOMEM); ++ } ++ ++ atomic_fetch_add(&ctx->refcount, 1); ++ + frame->data[0] = (uint8_t *)&avbuf->drm_frame; - frame->format = AV_PIX_FMT_DRM_PRIME; - if (ctx->hw_frames_ctx) - frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); -- frame->height = ctx->height; -- frame->width = ctx->width; -- -- // Not interlaced now -- frame->interlaced_frame = 0; -- frame->top_field_first = 0; -- // Pkt duration halved -- frame->pkt_duration /= 2; ++ frame->format = AV_PIX_FMT_DRM_PRIME; ++ if (ctx->hw_frames_ctx) ++ frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); + frame->height = ctx->output_height; + frame->width = ctx->output_width; + @@ -27523,19 +16527,31 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + + if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) { + // Not interlaced now -+ frame->interlaced_frame = 0; // *** Fill in from dst buffer? -+ frame->top_field_first = 0; -+ // Pkt duration halved -+ frame->pkt_duration /= 2; ++ frame_set_progressive(frame); ++ // Duration halved ++ frame->duration /= 2; + } - - if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) { - av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n"); -@@ -1032,15 +1518,34 @@ static int deint_v4l2m2m_config_props(AVFilterLink *outlink) - ctx->height = avctx->inputs[0]->h; - ctx->width = avctx->inputs[0]->w; - -- av_log(priv, AV_LOG_DEBUG, "%s: %dx%d\n", __func__, ctx->width, ctx->height); ++ ++ if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n"); ++ frame->decode_error_flags |= FF_DECODE_ERROR_INVALID_BITSTREAM; ++ } ++ ++ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: PTS=%"PRId64"\n", __func__, frame->pts); ++ return 0; ++} ++ ++static int deint_v4l2m2m_config_props(AVFilterLink *outlink) ++{ ++ AVFilterLink *inlink = outlink->src->inputs[0]; ++ AVFilterContext *avctx = outlink->src; ++ DeintV4L2M2MContext *priv = avctx->priv; ++ DeintV4L2M2MContextShared *ctx = priv->shared; ++ int ret; ++ ++ ctx->height = avctx->inputs[0]->h; ++ ctx->width = avctx->inputs[0]->w; ++ + if (ctx->filter_type == FILTER_V4L2_SCALE) { + if ((ret = ff_scale_eval_dimensions(priv, + priv->w_expr, priv->h_expr, @@ -27551,68 +16567,75 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + ctx->output_height = ctx->height; + } + -+ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d\n", __func__, ctx->width, ctx->height, ctx->output_width, ctx->output_height); - - outlink->time_base = inlink->time_base; -- outlink->w = inlink->w; -- outlink->h = inlink->h; -- outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; ++ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d\n", __func__, ++ ctx->width, ctx->height, ctx->output_width, ctx->output_height); ++ ++ outlink->time_base = inlink->time_base; + outlink->w = ctx->output_width; + outlink->h = ctx->output_height; - outlink->format = inlink->format; - outlink->frame_rate = (AVRational) {1, 0}; // Deny knowledge of frame rate - ++ outlink->format = inlink->format; ++ + if (inlink->sample_aspect_ratio.num) + outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); + else + outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + - ret = deint_v4l2m2m_find_device(ctx); - if (ret) - return ret; -@@ -1055,18 +1560,19 @@ static int deint_v4l2m2m_config_props(AVFilterLink *outlink) - - static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc) - { -- const int is_linear = (drm_desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR || -- drm_desc->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID); ++ return deint_v4l2m2m_find_device(ctx); ++} ++ ++static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc) ++{ + const uint64_t mod = drm_desc->objects[0].format_modifier; + const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID); + + // Only currently support single object things + if (drm_desc->nb_objects != 1) + return 0; - - switch (drm_desc->layers[0].format) { - case DRM_FORMAT_YUV420: -- if (is_linear) -- return drm_desc->nb_objects == 1 ? V4L2_PIX_FMT_YUV420 : 0; -- break; ++ ++ switch (drm_desc->layers[0].format) { ++ case DRM_FORMAT_YUV420: + return is_linear ? V4L2_PIX_FMT_YUV420 : 0; - case DRM_FORMAT_NV12: -- if (is_linear) -- return drm_desc->nb_objects == 1 ? V4L2_PIX_FMT_NV12 : 0; -- break; ++ case DRM_FORMAT_NV12: + return is_linear ? V4L2_PIX_FMT_NV12 : -+ fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : 0; - default: - break; - } -@@ -1089,7 +1595,7 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) - - if (ctx->field_order == V4L2_FIELD_ANY) { - const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0]; -- const uint32_t pixelformat = desc_pixelformat(drm_desc); ++#if CONFIG_SAND ++ fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : ++#endif ++ 0; ++ default: ++ break; ++ } ++ return 0; ++} ++ ++static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterContext *avctx = link->dst; ++ DeintV4L2M2MContext *priv = avctx->priv; ++ DeintV4L2M2MContextShared *ctx = priv->shared; ++ V4L2Queue *capture = &ctx->capture; ++ V4L2Queue *output = &ctx->output; ++ int ret; ++ ++ av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n", ++ __func__, in->pts, in->pkt_dts, frame_is_tff(in), frame_is_interlaced(in), in->sample_aspect_ratio.num, in->sample_aspect_ratio.den); ++ ++ if (ctx->field_order == V4L2_FIELD_ANY) { ++ const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0]; + uint32_t pixelformat = desc_pixelformat(drm_desc); - - if (pixelformat == 0) { - av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n", -@@ -1104,29 +1610,49 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) - av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height, - drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset); - -- ret = deint_v4l2m2m_set_format(output, pixelformat, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); -- if (ret) ++ ++ if (pixelformat == 0) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n", ++ av_fourcc2str(drm_desc->layers[0].format), ++ drm_desc->nb_objects, drm_desc->objects[0].format_modifier); ++ return AVERROR(EINVAL); ++ } ++ ++ ctx->orig_width = drm_desc->layers[0].planes[0].pitch; ++ ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width; ++ ++ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height, ++ drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset); ++ + if ((ret = set_src_fmt(output, in)) != 0) { + av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n", + av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier); @@ -27622,70 +16645,245 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + ret = do_s_fmt(output); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n"); - return ret; ++ return ret; + } - -- ret = deint_v4l2m2m_set_format(capture, pixelformat, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); -- if (ret) ++ + if (ctx->output_format != AV_PIX_FMT_NONE) + pixelformat = fmt_av_to_v4l2(ctx->output_format); + ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n"); - return ret; ++ return ret; + } - - ret = deint_v4l2m2m_allocate_buffers(capture); -- if (ret) ++ ++ ret = deint_v4l2m2m_allocate_buffers(capture); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n"); - return ret; ++ return ret; + } - - ret = deint_v4l2m2m_streamon(capture); -- if (ret) ++ ++ ret = deint_v4l2m2m_streamon(capture); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret)); - return ret; ++ return ret; + } - - ret = deint_v4l2m2m_allocate_buffers(output); -- if (ret) ++ ++ ret = deint_v4l2m2m_allocate_buffers(output); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n"); - return ret; ++ return ret; + } - - ret = deint_v4l2m2m_streamon(output); -- if (ret) ++ ++ ret = deint_v4l2m2m_streamon(output); + if (ret) { + av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret)); - return ret; ++ return ret; + } - - if (in->top_field_first) - ctx->field_order = V4L2_FIELD_INTERLACED_TB; -@@ -1251,7 +1777,7 @@ again: - return did_something ? 0 : FFERROR_NOT_READY; - } - --static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) ++ ++ if (frame_is_tff(in)) ++ ctx->field_order = V4L2_FIELD_INTERLACED_TB; ++ else ++ ctx->field_order = V4L2_FIELD_INTERLACED_BT; ++ ++ { ++ struct v4l2_encoder_cmd ecmd = { ++ .cmd = V4L2_ENC_CMD_STOP ++ }; ++ ctx->has_enc_stop = 0; ++ if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n"); ++ ctx->has_enc_stop = 1; ++ } ++ else { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno))); ++ } ++ ++ } ++ } ++ ++ ret = deint_v4l2m2m_enqueue_frame(output, in); ++ ++ av_log(priv, AV_LOG_TRACE, ">>> %s: %s\n", __func__, av_err2str(ret)); ++ return ret; ++} ++ ++static int ++ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s, ++ AVFilterLink * const inlink) ++{ ++ int instatus; ++ int64_t inpts; ++ ++ if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0) ++ return 0; ++ ++ s->drain = instatus; ++ s->drain_pts = inpts; ++ s->drain_state = DRAIN_TIMEOUT; ++ ++ if (s->field_order == V4L2_FIELD_ANY) { // Not yet started ++ s->drain_state = DRAIN_DONE; ++ } ++ else if (s->one_to_one) { ++ s->drain_state = DRAIN_LAST; ++ } ++ else if (s->has_enc_stop) { ++ struct v4l2_encoder_cmd ecmd = { ++ .cmd = V4L2_ENC_CMD_STOP ++ }; ++ if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) { ++ av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n"); ++ s->drain_state = DRAIN_EOS; ++ } ++ else { ++ av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno))); ++ } ++ } ++ return 1; ++} ++ ++static int deint_v4l2m2m_activate(AVFilterContext *avctx) ++{ ++ DeintV4L2M2MContext * const priv = avctx->priv; ++ DeintV4L2M2MContextShared *const s = priv->shared; ++ AVFilterLink * const outlink = avctx->outputs[0]; ++ AVFilterLink * const inlink = avctx->inputs[0]; ++ int n = 0; ++ int cn = 99; ++ int did_something = 0; ++ ++ av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__); ++ ++ FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx); ++ ++ ack_inlink(avctx, s, inlink); ++ ++ if (s->field_order != V4L2_FIELD_ANY) // Can't DQ if no setup! ++ { ++ AVFrame * frame = av_frame_alloc(); ++ int rv; ++ ++ recycle_q(&s->output); ++ n = count_enqueued(&s->output); ++ ++ if (frame == NULL) { ++ av_log(priv, AV_LOG_ERROR, "%s: error allocating frame\n", __func__); ++ return AVERROR(ENOMEM); ++ } ++ ++ rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, ++ drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0); ++ if (rv != 0) { ++ av_frame_free(&frame); ++ if (rv == AVERROR_EOF) { ++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__); ++ s->drain_state = DRAIN_DONE; ++ } ++ else if (rv == AVERROR(EAGAIN)) { ++ if (s->drain_state != DRAIN_NONE) { ++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__); ++ s->drain_state = DRAIN_DONE; ++ } ++ } ++ else { ++ av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv)); ++ return rv; ++ } ++ } ++ else { ++ frame_set_progressive(frame); ++ // frame is always consumed by filter_frame - even on error despite ++ // a somewhat confusing comment in the header ++ rv = ff_filter_frame(outlink, frame); ++ ++s->frames_tx; ++ ++ av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv)); ++ did_something = 1; ++ ++ if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) { ++ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__); ++ s->drain_state = DRAIN_DONE; ++ } ++ } ++ ++ cn = count_enqueued(&s->capture); ++ } ++ ++ if (s->drain_state == DRAIN_DONE) { ++ ff_outlink_set_status(outlink, s->drain, s->drain_pts); ++ av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain)); ++ return 0; ++ } ++ ++ recycle_q(&s->output); ++ n = count_enqueued(&s->output); ++ ++ while (n < 6 && !s->drain) { ++ AVFrame * frame; ++ int rv; ++ ++ if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { ++ av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); ++ return rv; ++ } ++ ++ if (frame == NULL) { ++ av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); ++ if (!ack_inlink(avctx, s, inlink)) { ++ ff_inlink_request_frame(inlink); ++ av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__); ++ } ++ break; ++ } ++ ++s->frames_rx; ++ ++ rv = deint_v4l2m2m_filter_frame(inlink, frame); ++ av_frame_free(&frame); ++ ++ if (rv != 0) ++ return rv; ++ ++ av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); ++ did_something = 1; ++ ++n; ++ } ++ ++ if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) { ++ ff_filter_set_ready(avctx, 1); ++ did_something = 1; ++ av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__); ++ } ++ ++ av_log(priv, AV_LOG_TRACE, ">>> %s: OK (n=%d, cn=%d)\n", __func__, n, cn); ++ return did_something ? 0 : FFERROR_NOT_READY; ++} ++ +static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type) - { - DeintV4L2M2MContext * const priv = avctx->priv; - DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared)); -@@ -1262,6 +1788,7 @@ static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) - } - priv->shared = ctx; - ctx->logctx = priv; ++{ ++ DeintV4L2M2MContext * const priv = avctx->priv; ++ DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared)); ++ ++ if (!ctx) { ++ av_log(priv, AV_LOG_ERROR, "%s: error %d allocating context\n", __func__, 0); ++ return AVERROR(ENOMEM); ++ } ++ priv->shared = ctx; ++ ctx->logctx = priv; + ctx->filter_type = filter_type; - ctx->fd = -1; - ctx->output.ctx = ctx; - ctx->output.num_buffers = 8; -@@ -1274,9 +1801,52 @@ static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) - - atomic_init(&ctx->refcount, 1); - ++ ctx->fd = -1; ++ ctx->output.ctx = ctx; ++ ctx->output.num_buffers = 8; ++ ctx->output.name = "OUTPUT"; ++ ctx->capture.ctx = ctx; ++ ctx->capture.num_buffers = 12; ++ ctx->capture.name = "CAPTURE"; ++ ctx->done = 0; ++ ctx->field_order = V4L2_FIELD_ANY; ++ ++ pts_track_init(&ctx->track, priv); ++ ++ atomic_init(&ctx->refcount, 1); ++ + if (priv->output_format_string) { + ctx->output_format = av_get_pix_fmt(priv->output_format_string); + if (ctx->output_format == AV_PIX_FMT_NONE) { @@ -27719,9 +16917,9 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + STRING_OPTION(colour_matrix, color_space, AVCOL_SPC_UNSPECIFIED); + STRING_OPTION(chroma_location, chroma_location, AVCHROMA_LOC_UNSPECIFIED); + - return 0; - } - ++ return 0; ++} ++ +static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) +{ + return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE); @@ -27729,16 +16927,39 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + +static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx) +{ -+ return common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE); ++ int rv; ++ DeintV4L2M2MContext * priv; ++ DeintV4L2M2MContextShared * ctx; ++ ++ if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0) ++ return rv; ++ ++ priv = avctx->priv; ++ ctx = priv->shared; ++ ++ ctx->one_to_one = 1; ++ return 0; +} + - static void deint_v4l2m2m_uninit(AVFilterContext *avctx) - { - DeintV4L2M2MContext *priv = avctx->priv; -@@ -1294,6 +1864,51 @@ static const AVOption deinterlace_v4l2m2m_options[] = { - - AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m); - ++static void deint_v4l2m2m_uninit(AVFilterContext *avctx) ++{ ++ DeintV4L2M2MContext *priv = avctx->priv; ++ DeintV4L2M2MContextShared *ctx = priv->shared; ++ ++ av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n", ++ ctx->frames_rx, ctx->frames_tx); ++ ctx->done = 1; ++ ctx->logctx = NULL; // Log to NULL works, log to missing crashes ++ pts_track_uninit(&ctx->track); ++ deint_v4l2m2m_destroy_context(ctx); ++} ++ ++static const AVOption deinterlace_v4l2m2m_options[] = { ++ { NULL }, ++}; ++ ++AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m); ++ +#define OFFSET(x) offsetof(DeintV4L2M2MContext, x) +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) + @@ -27784,13 +17005,33 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + +AVFILTER_DEFINE_CLASS(scale_v4l2m2m); + - static const AVFilterPad deint_v4l2m2m_inputs[] = { - { - .name = "default", -@@ -1321,3 +1936,17 @@ AVFilter ff_vf_deinterlace_v4l2m2m = { - .priv_class = &deinterlace_v4l2m2m_class, - .activate = deint_v4l2m2m_activate, - }; ++static const AVFilterPad deint_v4l2m2m_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ }, ++}; ++ ++static const AVFilterPad deint_v4l2m2m_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .config_props = deint_v4l2m2m_config_props, ++ }, ++}; ++ ++AVFilter ff_vf_deinterlace_v4l2m2m = { ++ .name = "deinterlace_v4l2m2m", ++ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M deinterlacer"), ++ .priv_size = sizeof(DeintV4L2M2MContext), ++ .init = &deint_v4l2m2m_init, ++ .uninit = &deint_v4l2m2m_uninit, ++ FILTER_INPUTS(deint_v4l2m2m_inputs), ++ FILTER_OUTPUTS(deint_v4l2m2m_outputs), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME), ++ .priv_class = &deinterlace_v4l2m2m_class, ++ .activate = deint_v4l2m2m_activate, ++}; + +AVFilter ff_vf_scale_v4l2m2m = { + .name = "scale_v4l2m2m", @@ -27800,4020 +17041,6165 @@ index 1a3bef5bcba6..2df39ec0f19f 100644 + .uninit = &deint_v4l2m2m_uninit, + FILTER_INPUTS(deint_v4l2m2m_inputs), + FILTER_OUTPUTS(deint_v4l2m2m_outputs), -+ FILTER_SINGLE_SAMPLEFMT(AV_PIX_FMT_DRM_PRIME), ++ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_DRM_PRIME), + .priv_class = &scale_v4l2m2m_class, + .activate = deint_v4l2m2m_activate, +}; + - -From ce9d8c33e7d64ba11d1f1df4ee5340b63ab84c2f Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 22 Sep 2022 14:54:46 +0000 -Subject: [PATCH 071/186] v4l2_m2m: Adjust buffer allocation based on min/max - controls - -Clip requested buffer count to min/max declared by driver. -If 0 buffers requested then set to min+2. -This allows encode to keep its src buffer count down to a plausible -minimum which helps with flow control. ---- - libavcodec/v4l2_context.c | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 6b97eab41ed7..ba36689ff3a6 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -1187,6 +1187,7 @@ fail_release: - - int ff_v4l2_context_init(V4L2Context* ctx) - { -+ struct v4l2_queryctrl qctrl; - V4L2m2mContext * const s = ctx_to_m2mctx(ctx); - int ret; - -@@ -1228,6 +1229,24 @@ int ff_v4l2_context_init(V4L2Context* ctx) - goto fail_unref_hwframes; - } - -+ memset(&qctrl, 0, sizeof(qctrl)); -+ qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT; -+ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) { -+ ret = AVERROR(errno); -+ if (ret != AVERROR(EINVAL)) { -+ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret)); -+ goto fail_unref_hwframes; -+ } -+ // Control unsupported - set default if wanted -+ if (ctx->num_buffers < 2) -+ ctx->num_buffers = 4; -+ } -+ else { -+ if (ctx->num_buffers < 2) -+ ctx->num_buffers = qctrl.minimum + 2; -+ ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum); -+ } +diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c +new file mode 100644 +index 000000000000..67750e4f12b8 +--- /dev/null ++++ b/libavfilter/vf_unsand.c +@@ -0,0 +1,227 @@ ++/* ++ * Copyright (c) 2007 Bobby Bingham ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ + - ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); - if (ret < 0) - goto fail_unref_hwframes; - -From d67aed711e1ad85b3d4dfc3d363c0bdd3ac40001 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 22 Sep 2022 15:00:12 +0000 -Subject: [PATCH 072/186] v4l2_m2m_dec: If src Q is full then wait indefinitely - for buffer - -If it is not possible to add another buffer to the src Q then alawys -wait indefinitely for either an output frame or the Q to have space. - -This has issues if the reason that the Q is stalled is due to dst buffer -exhaustion and buffers cannot be returned async by another thread but -the current scheme confuses ffmpegs pipeline scheduling. ---- - libavcodec/v4l2_m2m_dec.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 485a96f4b487..bb183097f6f5 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -456,9 +456,9 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - if (dst_rv != 0 && TRY_DQ(src_rv)) { - // Pick a timeout depending on state - const int t = -+ src_rv == NQ_Q_FULL ? -1 : - src_rv == NQ_DRAINING ? 300 : -- prefer_dq ? 5 : -- src_rv == NQ_Q_FULL ? -1 : 0; -+ prefer_dq ? 5 : 0; - - // Dequeue frame will unref any previous contents of frame - // if it returns success so we don't need an explicit unref - -From 04ed865af7885364c4ae7d5e790a887c2c500275 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 22 Sep 2022 15:12:27 +0000 -Subject: [PATCH 073/186] vf_deinterlace_v4l2m2m: Add Q name to structure for - debug - ---- - libavfilter/vf_deinterlace_v4l2m2m.c | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -index 2df39ec0f19f..4edecc02bff5 100644 ---- a/libavfilter/vf_deinterlace_v4l2m2m.c -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -84,6 +84,7 @@ typedef struct V4L2Queue { - struct v4l2_selection sel; - int num_buffers; - V4L2Buffer *buffers; -+ const char * name; - DeintV4L2M2MContextShared *ctx; - } V4L2Queue; - -@@ -1792,8 +1793,10 @@ static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filt - ctx->fd = -1; - ctx->output.ctx = ctx; - ctx->output.num_buffers = 8; -+ ctx->output.name = "OUTPUT"; - ctx->capture.ctx = ctx; - ctx->capture.num_buffers = 12; -+ ctx->capture.name = "CAPTURE"; - ctx->done = 0; - ctx->field_order = V4L2_FIELD_ANY; - - -From 8194a72e9599b4beacce6676e86b08028a4c979d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 22 Sep 2022 16:08:42 +0000 -Subject: [PATCH 074/186] v4l2_m2m_enc: Set src buffer count to min+2 by - default - -Set output.num_buffers to 0 by default which will then be set to min+2 -by the allocation code. This fixes an issue where the deinterlacer had -fewer dest buffer than the encoder has src buffers and so ran dry -creating deadlock in the ffmpeg filter chain. ---- - libavcodec/v4l2_m2m_enc.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c -index 099ad23928d3..b8ba815c379d 100644 ---- a/libavcodec/v4l2_m2m_enc.c -+++ b/libavcodec/v4l2_m2m_enc.c -@@ -672,9 +672,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx) - #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM - - #define V4L_M2M_CAPTURE_OPTS \ -- V4L_M2M_DEFAULT_OPTS,\ -+ { "num_output_buffers", "Number of buffers in the output context",\ -+ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\ - { "num_capture_buffers", "Number of buffers in the capture context", \ -- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS } -+ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS } - - static const AVOption mpeg4_options[] = { - V4L_M2M_CAPTURE_OPTS, - -From f949fe93ac776f6a2e9cec6c171a8c47c2b00c44 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 22 Sep 2022 16:13:57 +0000 -Subject: [PATCH 075/186] vf_deinterlace_m2m: For deinterlace set outlink FR to - twice inlink - -We used to set the outlink framerate to unknown but it turns out that -ffmpegs filter pipeline copes with that badly. Otherwise leave at 0,0 -which will copy FR from inlink to outlink. ---- - libavfilter/vf_deinterlace_v4l2m2m.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -index 4edecc02bff5..c52dae1c44a8 100644 ---- a/libavfilter/vf_deinterlace_v4l2m2m.c -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -1534,13 +1534,16 @@ static int deint_v4l2m2m_config_props(AVFilterLink *outlink) - ctx->output_height = ctx->height; - } - -- av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d\n", __func__, ctx->width, ctx->height, ctx->output_width, ctx->output_height); -+ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__, -+ ctx->width, ctx->height, ctx->output_width, ctx->output_height, -+ inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den); - - outlink->time_base = inlink->time_base; - outlink->w = ctx->output_width; - outlink->h = ctx->output_height; - outlink->format = inlink->format; -- outlink->frame_rate = (AVRational) {1, 0}; // Deny knowledge of frame rate -+ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0) -+ outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den}; - - if (inlink->sample_aspect_ratio.num) - outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); - -From e207d1dab82d5c1684cb87ed8c957d93b3913a4e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 23 Sep 2022 11:30:56 +0000 -Subject: [PATCH 076/186] v4l2m2m: Add ff_v4l2_dq_all to drain all buffers from - a Q - -Useful for where (encode) we might have drmprime buffers that we want to -return to the source ASAP. ---- - libavcodec/v4l2_context.c | 17 +++++++++++------ - libavcodec/v4l2_context.h | 2 ++ - 2 files changed, 13 insertions(+), 6 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index ba36689ff3a6..4a359bf45e30 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -707,17 +707,22 @@ clean_v4l2_buffer(V4L2Buffer * const avbuf) - return avbuf; - } - -+void -+ff_v4l2_dq_all(V4L2Context *const ctx) ++/** ++ * @file ++ * format and noformat video filters ++ */ ++ ++#include ++ ++#include "libavutil/internal.h" ++#include "libavutil/mem.h" ++#include "libavutil/pixdesc.h" ++#include "libavutil/opt.h" ++#include "libavutil/rpi_sand_fns.h" ++ ++#include "avfilter.h" ++#include "formats.h" ++#include "video.h" ++ ++typedef struct UnsandContext { ++ const AVClass *class; ++} UnsandContext; ++ ++static av_cold void uninit(AVFilterContext *ctx) +{ -+ V4L2Buffer * avbuf; -+ do { -+ get_qbuf(ctx, &avbuf, 0); -+ } while (avbuf); ++// UnsandContext *s = ctx->priv; +} + - static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) - { - int i; - - /* get back as many output buffers as possible */ -- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { -- V4L2Buffer * avbuf; -- do { -- get_qbuf(ctx, &avbuf, 0); -- } while (avbuf); -- } -+ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) -+ ff_v4l2_dq_all(ctx); - - for (i = 0; i < ctx->num_buffers; i++) { - V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 21265f1bd77b..523c53e97dc5 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -218,4 +218,6 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const - */ - int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f); - -+void ff_v4l2_dq_all(V4L2Context *const ctx); -+ - #endif // AVCODEC_V4L2_CONTEXT_H - -From fb8f90688761ae011e9b4cca65a51e13416a498c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 23 Sep 2022 11:38:36 +0000 -Subject: [PATCH 077/186] v4l2_m2m_enc: DQ output more frequently - -Ensure that we DQ any released src buffers on every op to avoid deadlock -with source. - -There is a plausible argument that this patch is inelegant and the drain -should be integrated into dq_buf, but that is a further reaching delta. ---- - libavcodec/v4l2_m2m_enc.c | 12 ++++++++++-- - 1 file changed, 10 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c -index b8ba815c379d..a992a3cccc68 100644 ---- a/libavcodec/v4l2_m2m_enc.c -+++ b/libavcodec/v4l2_m2m_enc.c -@@ -421,6 +421,8 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const output = &s->output; - -+ ff_v4l2_dq_all(output); -+ - // Signal EOF if needed - if (!frame) { - return ff_v4l2_context_enqueue_frame(output, frame); -@@ -492,6 +494,8 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) - AVFrame *frame = s->frame; - int ret; - -+ ff_v4l2_dq_all(output); -+ - if (s->draining) - goto dequeue; - -@@ -528,7 +532,9 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) - } - - dequeue: -- if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0) -+ ret = ff_v4l2_context_dequeue_packet(capture, avpkt); -+ ff_v4l2_dq_all(output); -+ if (ret) - return ret; - - if (capture->first_buf == 1) { -@@ -560,7 +566,9 @@ dequeue: - s->extdata_size = len; - } - -- if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0) -+ ret = ff_v4l2_context_dequeue_packet(capture, avpkt); -+ ff_v4l2_dq_all(output); -+ if (ret) - return ret; - } - - -From c90d17e99a8d66762c890bca316b6f52da2e6278 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 26 Sep 2022 18:20:00 +0100 -Subject: [PATCH 078/186] conf_native: Remove --enable-rpi from all builds - ---- - pi-util/conf_native.sh | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh -index 37cea71756ae..f22d531ca448 100755 ---- a/pi-util/conf_native.sh -+++ b/pi-util/conf_native.sh -@@ -54,9 +54,9 @@ if [ $MMAL ]; then - RPI_LIBDIRS="-L$RPI_OPT_VC/lib" - RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000" - RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group" -- RPIOPTS="--enable-mmal --enable-rpi" -+ RPIOPTS="--enable-mmal" - else -- RPIOPTS="--disable-mmal --enable-sand" -+ RPIOPTS="--disable-mmal" - fi - - C=`lsb_release -sc` -@@ -89,6 +89,7 @@ $FFSRC/configure \ - $MCOPTS\ - --disable-stripping\ - --disable-thumb\ -+ --enable-sand\ - --enable-v4l2-request\ - --enable-libdrm\ - --enable-vout-egl\ - -From 976ada8fe321b41bb2989b83b232b99d3c6720d1 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 29 Sep 2022 19:48:08 +0000 -Subject: [PATCH 079/186] v4l2_m2m_dec: Deal correctly with avcC H264 data in - extradata - -Decoders expect AnnexB style headers, mkv and similar formats have -somewhat oddly wrapped extradata. Convert to annex-b style before use. ---- - libavcodec/v4l2_m2m.h | 2 +- - libavcodec/v4l2_m2m_dec.c | 177 ++++++++++++++++++++++++++++++++++++-- - 2 files changed, 169 insertions(+), 10 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index ee72beb0522b..babf101d650a 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -118,7 +118,7 @@ typedef struct V4L2m2mContext { - /* Ext data sent */ - int extdata_sent; - /* Ext data sent in packet - overrides ctx */ -- uint8_t * extdata_data; -+ void * extdata_data; - size_t extdata_size; - - #define FF_V4L2_QUIRK_REINIT_ALWAYS 1 -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index bb183097f6f5..6bd9926b3f31 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -46,6 +46,71 @@ - #define STATS_LAST_COUNT_MAX 64 - #define STATS_INTERVAL_MAX (1 << 30) - -+#ifndef FF_API_BUFFER_SIZE_T -+#define FF_API_BUFFER_SIZE_T 1 -+#endif -+ -+#define DUMP_FAILED_EXTRADATA 0 -+ -+#if DUMP_FAILED_EXTRADATA -+static inline char hex1(unsigned int x) ++static av_cold int init(AVFilterContext *ctx) +{ -+ x &= 0xf; -+ return x <= 9 ? '0' + x : 'a' + x - 10; -+} -+ -+static inline char * hex2(char * s, unsigned int x) -+{ -+ *s++ = hex1(x >> 4); -+ *s++ = hex1(x); -+ return s; -+} -+ -+static inline char * hex4(char * s, unsigned int x) -+{ -+ s = hex2(s, x >> 8); -+ s = hex2(s, x); -+ return s; -+} -+ -+static inline char * dash2(char * s) -+{ -+ *s++ = '-'; -+ *s++ = '-'; -+ return s; -+} -+ -+static void -+data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len) -+{ -+ size_t i; -+ s = hex4(s, offset); -+ m += offset; -+ for (i = 0; i != 8; ++i) { -+ *s++ = ' '; -+ s = len > i + offset ? hex2(s, *m++) : dash2(s); -+ } -+ *s++ = ' '; -+ *s++ = ':'; -+ for (; i != 16; ++i) { -+ *s++ = ' '; -+ s = len > i + offset ? hex2(s, *m++) : dash2(s); -+ } -+ *s++ = 0; -+} -+ -+static void -+log_dump(void * logctx, int lvl, const void * const data, const size_t len) -+{ -+ size_t i; -+ for (i = 0; i < len; i += 16) { -+ char buf[80]; -+ data16(buf, i, data, len); -+ av_log(logctx, lvl, "%s\n", buf); -+ } -+} -+#endif -+ - static int64_t pts_stats_guess(const pts_stats_t * const stats) - { - if (stats->last_pts == AV_NOPTS_VALUE || -@@ -98,6 +163,98 @@ static void pts_stats_init(pts_stats_t * const stats, void * logctx, const char - }; - } - -+// If abdata == NULL then this just counts space required -+// Unpacks avcC if detected -+static int -+h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata) -+{ -+ const uint8_t * const xdend = extradata + extrasize; -+ const uint8_t * p = extradata; -+ uint8_t * d = abdata; -+ unsigned int n; -+ unsigned int len; -+ const unsigned int hdrlen = 4; -+ unsigned int need_pps = 1; -+ -+ if (extrasize < 8) -+ return AVERROR(EINVAL); -+ -+ if (p[0] == 0 && p[1] == 0) { -+ // Assume a couple of leading zeros are good enough to indicate NAL -+ if (abdata) -+ memcpy(d, p, extrasize); -+ return extrasize; -+ } -+ -+ // avcC starts with a 1 -+ if (p[0] != 1) -+ return AVERROR(EINVAL); -+ -+ p += 5; -+ n = *p++ & 0x1f; -+ -+doxps: -+ while (n--) { -+ if (xdend - p < 2) -+ return AVERROR(EINVAL); -+ len = (p[0] << 8) | p[1]; -+ p += 2; -+ if (xdend - p < (ptrdiff_t)len) -+ return AVERROR(EINVAL); -+ if (abdata) { -+ d[0] = 0; -+ d[1] = 0; -+ d[2] = 0; -+ d[3] = 1; -+ memcpy(d + 4, p, len); -+ } -+ d += len + hdrlen; -+ p += len; -+ } -+ if (need_pps) { -+ need_pps = 0; -+ if (p >= xdend) -+ return AVERROR(EINVAL); -+ n = *p++; -+ goto doxps; -+ } -+ -+ return d - abdata; -+} -+ -+static int -+copy_extradata(AVCodecContext * const avctx, -+ const void * const src_data, const int src_len, -+ void ** const pdst_data, size_t * const pdst_len) -+{ -+ int len; -+ -+ *pdst_len = 0; -+ av_freep(pdst_data); -+ -+ if (avctx->codec_id == AV_CODEC_ID_H264) -+ len = h264_xd_copy(src_data, src_len, NULL); -+ else -+ len = src_len < 0 ? AVERROR(EINVAL) : src_len; -+ -+ // Zero length is OK but we swant to stop - -ve is error val -+ if (len <= 0) -+ return len; -+ -+ if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) -+ return AVERROR(ENOMEM); -+ -+ if (avctx->codec_id == AV_CODEC_ID_H264) -+ h264_xd_copy(src_data, src_len, *pdst_data); -+ else -+ memcpy(*pdst_data, src_data, len); -+ *pdst_len = len; ++// UnsandContext *s = ctx->priv; + + return 0; +} + + ++static int filter_frame(AVFilterLink *link, AVFrame *in) ++{ ++ AVFilterLink * const outlink = link->dst->outputs[0]; ++ AVFrame *out = NULL; ++ int rv = 0; + - static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s) - { - int ret; -@@ -277,13 +434,8 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); - if (side_data) { - av_log(avctx, AV_LOG_DEBUG, "New extradata\n"); -- av_freep(&s->extdata_data); -- if ((s->extdata_data = av_malloc(side_size ? side_size : 1)) == NULL) { -- av_log(avctx, AV_LOG_ERROR, "Failed to alloc %zd bytes of extra data\n", side_size); -- return AVERROR(ENOMEM); -- } -- memcpy(s->extdata_data, side_data, side_size); -- s->extdata_size = side_size; -+ if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0) -+ av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret)); - s->extdata_sent = 0; - } - -@@ -359,8 +511,6 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); - else if (s->extdata_data) - ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); -- else -- ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, avctx->extradata, avctx->extradata_size); - - if (ret == AVERROR(EAGAIN)) { - // Out of input buffers - keep packet -@@ -770,6 +920,15 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - return ret; - } - -+ if (avctx->extradata && -+ (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret)); -+#if DUMP_FAILED_EXTRADATA -+ log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size); -+#endif -+ return ret; ++ if (outlink->format == in->format) { ++ // If nothing to do then do nothing ++ out = in; ++ } ++ else ++ { ++ if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL) ++ { ++ rv = AVERROR(ENOMEM); ++ goto fail; ++ } ++ if (av_rpi_sand_to_planar_frame(out, in) != 0) ++ { ++ rv = -1; ++ goto fail; ++ } ++ ++ av_frame_free(&in); + } + - if ((ret = v4l2_prepare_decoder(s)) < 0) - return ret; - - -From 4c7e2544e1bb6a5517ef45b9520cf1a50a2f04c3 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 30 Sep 2022 14:20:23 +0000 -Subject: [PATCH 080/186] v4l2_request_hevc: Fix up - V4L2_CID_CODEC_STATELESS_BASE if missing - ---- - libavcodec/hevc-ctrls-v4.h | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h -index 7829d8208435..c02fdbe5a8e9 100644 ---- a/libavcodec/hevc-ctrls-v4.h -+++ b/libavcodec/hevc-ctrls-v4.h -@@ -53,6 +53,13 @@ - #include - #include - -+#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS -+#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000 /* Stateless codecs controls */ -+#endif -+#ifndef V4L2_CID_CODEC_STATELESS_BASE -+#define V4L2_CID_CODEC_STATELESS_BASE (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900) ++ return ff_filter_frame(outlink, out); ++ ++fail: ++ av_frame_free(&out); ++ av_frame_free(&in); ++ return rv; ++} ++ ++#if 0 ++static void dump_fmts(const AVFilterFormats * fmts) ++{ ++ int i; ++ if (fmts== NULL) { ++ printf("NULL\n"); ++ return; ++ } ++ for (i = 0; i < fmts->nb_formats; ++i) { ++ printf(" %d", fmts->formats[i]); ++ } ++ printf("\n"); ++} +#endif + - #define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++static int query_formats(AVFilterContext *ctx) ++{ ++// UnsandContext *s = ctx->priv; ++ int ret; ++ ++ // If we aren't connected at both ends then just do nothing ++ if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL) ++ return 0; ++ ++ // Our output formats depend on our input formats and we can't/don't ++ // want to convert between bit depths so we need to wait for the source ++ // to have an opinion before we do ++ if (ctx->inputs[0]->incfg.formats == NULL) ++ return AVERROR(EAGAIN); ++ ++ // Accept anything ++ if (ctx->inputs[0]->outcfg.formats == NULL && ++ (ret = ff_formats_ref(ctx->inputs[0]->incfg.formats, &ctx->inputs[0]->outcfg.formats)) < 0) ++ return ret; ++ ++ // Filter out sand formats ++ ++ // Generate a container if we don't already have one ++ if (ctx->outputs[0]->incfg.formats == NULL) ++ { ++ // Somewhat rubbish way of ensuring we have a good structure ++ const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE}; ++ AVFilterFormats *formats = ff_make_format_list(out_fmts); ++ ++ if (formats == NULL) ++ return AVERROR(ENOMEM); ++ if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->incfg.formats)) < 0) ++ return ret; ++ } ++ ++ // Replace old format list with new filtered list derived from what our ++ // input says it can do ++ { ++ const AVFilterFormats * const src_ff = ctx->inputs[0]->outcfg.formats; ++ AVFilterFormats * const dst_ff = ctx->outputs[0]->incfg.formats; ++ enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats); ++ int i; ++ int n = 0; ++ int seen_420p = 0; ++ int seen_420p10 = 0; ++ ++ for (i = 0; i < src_ff->nb_formats; ++i) { ++ const enum AVPixelFormat f = src_ff->formats[i]; ++ ++ switch (f){ ++ case AV_PIX_FMT_YUV420P: ++ case AV_PIX_FMT_SAND128: ++ case AV_PIX_FMT_RPI4_8: ++ if (!seen_420p) { ++ seen_420p = 1; ++ dst_fmts[n++] = AV_PIX_FMT_YUV420P; ++ } ++ break; ++ case AV_PIX_FMT_SAND64_10: ++ case AV_PIX_FMT_YUV420P10: ++ case AV_PIX_FMT_RPI4_10: ++ if (!seen_420p10) { ++ seen_420p10 = 1; ++ dst_fmts[n++] = AV_PIX_FMT_YUV420P10; ++ } ++ break; ++ default: ++ dst_fmts[n++] = f; ++ break; ++ } ++ } ++ ++ av_freep(&dst_ff->formats); ++ dst_ff->formats = dst_fmts; ++ dst_ff->nb_formats = n; ++ } ++ ++// printf("Unsand: %s calc: ", __func__); ++// dump_fmts(ctx->outputs[0]->incfg.formats); ++ ++ return 0; ++} ++ ++ ++#define OFFSET(x) offsetof(UnsandContext, x) ++static const AVOption unsand_options[] = { ++ { NULL } ++}; ++ ++ ++AVFILTER_DEFINE_CLASS(unsand); ++ ++static const AVFilterPad avfilter_vf_unsand_inputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO, ++ .filter_frame = filter_frame, ++ }, ++ { NULL } ++}; ++ ++static const AVFilterPad avfilter_vf_unsand_outputs[] = { ++ { ++ .name = "default", ++ .type = AVMEDIA_TYPE_VIDEO ++ }, ++}; ++ ++AVFilter ff_vf_unsand = { ++ .name = "unsand", ++ .description = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"), ++ ++ .init = init, ++ .uninit = uninit, ++ ++ FILTER_QUERY_FUNC(query_formats), ++ ++ .priv_size = sizeof(UnsandContext), ++ .priv_class = &unsand_class, ++ ++ FILTER_INPUTS(avfilter_vf_unsand_inputs), ++ FILTER_OUTPUTS(avfilter_vf_unsand_outputs), ++}; ++ +diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c +index 60c896e964eb..3967be997e86 100644 +--- a/libavformat/matroskaenc.c ++++ b/libavformat/matroskaenc.c +@@ -84,6 +84,10 @@ - #define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) - -From af32492d37a1d898c05422ba405423ced65adbd7 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Sat, 1 Oct 2022 13:40:57 +0000 -Subject: [PATCH 081/186] vf_deinterlace_v4l2m2m: Fix compile on m/c without - V4L2 SAND - ---- - libavfilter/vf_deinterlace_v4l2m2m.c | 33 +++++++++++++++++++++++----- - 1 file changed, 28 insertions(+), 5 deletions(-) - -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -index c52dae1c44a8..716789f9881c 100644 ---- a/libavfilter/vf_deinterlace_v4l2m2m.c -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -35,6 +35,8 @@ - #include - #include + #define IS_WEBM(mkv) (CONFIG_WEBM_MUXER && CONFIG_MATROSKA_MUXER ? \ + ((mkv)->mode == MODE_WEBM) : CONFIG_WEBM_MUXER) ++ ++/* Reserved size for H264 headers if not extant at init time */ ++#define MAX_H264_HEADER_SIZE 1024 ++ + #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \ + !(mkv)->is_live) + +@@ -1136,8 +1140,12 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn + case AV_CODEC_ID_WAVPACK: + return put_wv_codecpriv(dyn_cp, extradata, extradata_size); + case AV_CODEC_ID_H264: +- return ff_isom_write_avcc(dyn_cp, extradata, +- extradata_size); ++ if (extradata_size) ++ return ff_isom_write_avcc(dyn_cp, extradata, ++ extradata_size); ++ else ++ *size_to_reserve = MAX_H264_HEADER_SIZE; ++ break; + case AV_CODEC_ID_HEVC: + return ff_isom_write_hvcc(dyn_cp, extradata, + extradata_size, 0); +@@ -2966,8 +2974,8 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) + } + break; + #endif +- // FIXME: Remove the following once libaom starts propagating proper extradata during init() +- // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2208 ++ // FIXME: Remove the following once libaom starts propagating extradata during init() ++ // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012 + case AV_CODEC_ID_AV1: + if (side_data_size && mkv->track.bc && !par->extradata_size) { + // If the reserved space doesn't suffice, only write +@@ -2979,6 +2987,16 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) + } else if (!par->extradata_size) + return AVERROR_INVALIDDATA; + break; ++ // H264 V4L2 has a similar issue ++ case AV_CODEC_ID_H264: ++ if (side_data_size && mkv->track.bc && !par->extradata_size) { ++ ret = mkv_update_codecprivate(s, mkv, side_data, side_data_size, ++ par, mkv->track.bc, track, 0); ++ if (ret < 0) ++ return ret; ++ } else if (!par->extradata_size) ++ return AVERROR_INVALIDDATA; ++ break; + default: + if (side_data_size) + av_log(s, AV_LOG_DEBUG, "Ignoring new extradata in a packet for stream %d.\n", pkt->stream_index); +@@ -3440,9 +3458,15 @@ static int mkv_init(struct AVFormatContext *s) + track->reformat = mkv_reformat_wavpack; + break; + case AV_CODEC_ID_H264: ++ // Default to reformat if no extradata as the only current ++ // encoder which does this is v4l2m2m which needs reformat ++ if (par->extradata_size == 0 || ++ (par->extradata_size > 3 && ++ (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))) ++ track->reformat = mkv_reformat_h2645; ++ break; + case AV_CODEC_ID_HEVC: +- if ((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 || +- par->codec_id == AV_CODEC_ID_HEVC && par->extradata_size > 6) && ++ if (par->extradata_size > 6 && + (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)) + track->reformat = mkv_reformat_h2645; + break; +diff --git a/libavformat/movenc.c b/libavformat/movenc.c +index d20e45cf8107..079015da9ac4 100644 +--- a/libavformat/movenc.c ++++ b/libavformat/movenc.c +@@ -6902,6 +6902,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt) + if (trk->par->codec_id == AV_CODEC_ID_MP4ALS || + trk->par->codec_id == AV_CODEC_ID_AAC || + trk->par->codec_id == AV_CODEC_ID_AV1 || ++ trk->par->codec_id == AV_CODEC_ID_H264 || + trk->par->codec_id == AV_CODEC_ID_FLAC) { + size_t side_size; + uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); +diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c +index 7b4ae37d1376..b026ed5bed4f 100644 +--- a/libavformat/rtpenc.c ++++ b/libavformat/rtpenc.c +@@ -19,6 +19,7 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include "avc.h" + #include "avformat.h" + #include "mpegts.h" + #include "internal.h" +@@ -586,8 +587,25 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt) + ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0); + break; + case AV_CODEC_ID_H264: ++ { ++ uint8_t *side_data; ++ size_t side_data_size = 0; ++ ++ side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, ++ &side_data_size); ++ ++ if (side_data_size != 0) { ++ int ps_size = side_data_size; ++ uint8_t * ps_buf = NULL; ++ ++ ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size); ++ av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size); ++ ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size); ++ av_free(ps_buf); ++ } + ff_rtp_send_h264_hevc(s1, pkt->data, size); + break; ++ } + case AV_CODEC_ID_H261: + ff_rtp_send_h261(s1, pkt->data, size); + break; +diff --git a/libavutil/Makefile b/libavutil/Makefile +index 6e6fa8d800ca..8fa408850998 100644 +--- a/libavutil/Makefile ++++ b/libavutil/Makefile +@@ -76,6 +76,7 @@ HEADERS = adler32.h \ + rational.h \ + replaygain.h \ + ripemd.h \ ++ rpi_sand_fns.h \ + samplefmt.h \ + sha.h \ + sha512.h \ +@@ -201,6 +202,7 @@ OBJS-$(CONFIG_MACOS_KPERF) += macos_kperf.o + OBJS-$(CONFIG_MEDIACODEC) += hwcontext_mediacodec.o + OBJS-$(CONFIG_OPENCL) += hwcontext_opencl.o + OBJS-$(CONFIG_QSV) += hwcontext_qsv.o ++OBJS-$(CONFIG_SAND) += rpi_sand_fns.o + OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o + OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o + OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o +@@ -222,6 +224,7 @@ SKIPHEADERS-$(CONFIG_D3D12VA) += hwcontext_d3d12va.h + SKIPHEADERS-$(CONFIG_DXVA2) += hwcontext_dxva2.h + SKIPHEADERS-$(CONFIG_QSV) += hwcontext_qsv.h + SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h ++SKIPHEADERS-$(CONFIG-RPI) += rpi_sand_fn_pw.h + SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h + SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h + SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h +diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile +index eba01513379a..1b44beab3942 100644 +--- a/libavutil/aarch64/Makefile ++++ b/libavutil/aarch64/Makefile +@@ -4,3 +4,5 @@ OBJS += aarch64/cpu.o \ + + NEON-OBJS += aarch64/float_dsp_neon.o \ + aarch64/tx_float_neon.o \ ++ aarch64/rpi_sand_neon.o \ ++ +diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S +new file mode 100644 +index 000000000000..3a6bc3de74e5 +--- /dev/null ++++ b/libavutil/aarch64/rpi_sand_neon.S +@@ -0,0 +1,672 @@ ++/* ++Copyright (c) 2021 Michael Eiler ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: Michael Eiler ++*/ ++ ++#include "asm.S" ++ ++// void ff_rpi_sand8_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++ ++function ff_rpi_sand8_lines_to_planar_y8, export=1 ++ // w15 contains the number of rows we need to process ++ ldr w15, [sp, #0] ++ ++ // w8 will contain the number of blocks per row ++ // w8 = floor(_w/stride1) ++ // stride1 is assumed to always be 128 ++ mov w8, w1 ++ lsr w8, w8, #7 ++ ++ // in case the width of the image is not a multiple of 128, there will ++ // be an incomplete block at the end of every row ++ // w9 contains the number of pixels stored within this block ++ // w9 = _w - w8 * 128 ++ lsl w9, w8, #7 ++ sub w9, w7, w9 ++ ++ // this is the value we have to add to the src pointer after reading a complete block ++ // it will move the address to the start of the next block ++ // w10 = stride2 * stride1 - stride1 ++ mov w10, w4 ++ lsl w10, w10, #7 ++ sub w10, w10, #128 ++ ++ // w11 is the row offset, meaning the start offset of the first block of every collumn ++ // this will be increased with stride1 within every iteration of the row_loop ++ eor w11, w11, w11 ++ ++ // w12 = 0, processed row count ++ eor w12, w12, w12 ++row_loop: ++ // start of the first block within the current row ++ // x13 = row offset + src ++ mov x13, x2 ++ add x13, x13, x11 ++ ++ // w14 = 0, processed block count ++ eor w14, w14, w14 ++ ++ cmp w8, #0 ++ beq no_main_y8 ++ ++block_loop: ++ // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128 ++ // fortunately these aren't callee saved ones, meaning we don't need to backup them ++ ld1 { v0.16b, v1.16b, v2.16b, v3.16b}, [x13], #64 ++ ld1 { v4.16b, v5.16b, v6.16b, v7.16b}, [x13], #64 ++ ++ // write these registers back to the destination vector and increase the dst address by 128 ++ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 ++ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x0], #64 ++ ++ // move the source register to the beginning of the next block (x13 = src + block offset) ++ add x13, x13, x10 ++ // increase the block counter ++ add w14, w14, #1 ++ ++ // continue with the block_loop if we haven't copied all full blocks yet ++ cmp w8, w14 ++ bgt block_loop ++ ++ // handle the last block at the end of each row ++ // at most 127 byte values copied from src to dst ++no_main_y8: ++ eor w5, w5, w5 // i = 0 ++incomplete_block_loop_y8: ++ cmp w5, w9 ++ bge incomplete_block_loop_end_y8 ++ ++ ldrb w6, [x13] ++ strb w6, [x0] ++ add x13, x13, #1 ++ add x0, x0, #1 ++ ++ add w5, w5, #1 ++ b incomplete_block_loop_y8 ++incomplete_block_loop_end_y8: ++ ++ ++ // increase the row offset by 128 (stride1) ++ add w11, w11, #128 ++ // increment the row counter ++ add w12, w12, #1 ++ ++ // process the next row if we haven't finished yet ++ cmp w15, w12 ++ bgt row_loop ++ ++ ret ++endfunc ++ ++ ++ ++// void ff_rpi_sand8_lines_to_planar_c8( ++// uint8_t * dst_u, : x0 ++// unsigned int dst_stride_u, : w1 == width ++// uint8_t * dst_v, : x2 ++// unsigned int dst_stride_v, : w3 == width ++// const uint8_t * src, : x4 ++// unsigned int stride1, : w5 == 128 ++// unsigned int stride2, : w6 ++// unsigned int _x, : w7 ++// unsigned int y, : [sp, #0] ++// unsigned int _w, : [sp, #8] ++// unsigned int h); : [sp, #16] ++ ++function ff_rpi_sand8_lines_to_planar_c8, export=1 ++ // w7 = width ++ ldr w7, [sp, #8] ++ ++ // w15 contains the number of rows we need to process ++ // counts down ++ ldr w15, [sp, #16] ++ ++ // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6 ++ mov w8, w7 ++ lsr w8, w8, #6 ++ ++ // number of pixels in block at the end of every row ++ // w9 = _w - (w8 * 64) ++ lsl w9, w8, #6 ++ sub w9, w7, w9 ++ ++ // Skip at the end of the line to account for stride ++ sub w12, w1, w7 ++ ++ // address delta to the beginning of the next block ++ // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128 ++ lsl w10, w6, #7 ++ sub w10, w10, #128 ++ ++ // w11 = row address start offset = 0 ++ eor w11, w11, w11 ++ ++row_loop_c8: ++ // start of the first block within the current row ++ // x13 = row offset + src ++ mov x13, x4 ++ add x13, x13, x11 ++ ++ // w14 = 0, processed block count ++ eor w14, w14, w14 ++ ++ cmp w8, #0 ++ beq no_main_c8 ++ ++block_loop_c8: ++ // load the full block -> 128 bytes, the block contains 64 interleaved U and V values ++ ld2 { v0.16b, v1.16b }, [x13], #32 ++ ld2 { v2.16b, v3.16b }, [x13], #32 ++ ld2 { v4.16b, v5.16b }, [x13], #32 ++ ld2 { v6.16b, v7.16b }, [x13], #32 ++ ++ // swap register so that we can write them out with a single instruction ++ mov v16.16b, v1.16b ++ mov v17.16b, v3.16b ++ mov v18.16b, v5.16b ++ mov v1.16b, v2.16b ++ mov v2.16b, v4.16b ++ mov v3.16b, v6.16b ++ mov v4.16b, v16.16b ++ mov v5.16b, v17.16b ++ mov v6.16b, v18.16b ++ ++ st1 { v0.16b, v1.16b, v2.16b, v3.16b }, [x0], #64 ++ st1 { v4.16b, v5.16b, v6.16b, v7.16b }, [x2], #64 ++ ++ // increment row counter and move src to the beginning of the next block ++ add w14, w14, #1 ++ add x13, x13, x10 ++ ++ // jump to block_loop_c8 iff the block count is smaller than the number of full blocks ++ cmp w8, w14 ++ bgt block_loop_c8 ++ ++no_main_c8: ++ // handle incomplete block at the end of every row ++ eor w5, w5, w5 // point counter, this might be ++incomplete_block_loop_c8: ++ cmp w5, w9 ++ bge incomplete_block_loop_end_c8 ++ ++ ldrb w1, [x13] ++ strb w1, [x0] ++ add x13, x13, #1 ++ ++ ldrb w1, [x13] ++ strb w1, [x2] ++ add x13, x13, #1 ++ ++ add x0, x0, #1 ++ add x2, x2, #1 ++ ++ add w5, w5, #1 ++ b incomplete_block_loop_c8 ++incomplete_block_loop_end_c8: ++ ++ // increase row_offset by stride1 ++ add w11, w11, #128 ++ add x0, x0, w12, sxtw ++ add x2, x2, w12, sxtw ++ ++ // jump to row_Loop_c8 iff the row count is small than the height ++ subs w15, w15, #1 ++ bgt row_loop_c8 ++ ++ ret ++endfunc ++ ++// Unzip chroma ++// ++// On entry: ++// a0 = V0, U2, ... ++// a1 = U0, V1, ... ++// a2 = U1, V2, ... ++// b0 = V8, U10, ... ++// b1 = U8, V9, ... ++// b2 = U9, V10, ... ++// ++// On exit: ++// d0 = U0, U3, ... ++// ... ++// a0 = V0, V3, .. ++// ... ++// ++// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs) ++ ++.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2 ++ uzp1 \d0\().8h, \a1\().8h, \b1\().8h ++ uzp1 \d1\().8h, \a2\().8h, \b2\().8h ++ uzp2 \d2\().8h, \a0\().8h, \b0\().8h ++ ++ uzp1 \a0\().8h, \a0\().8h, \b0\().8h ++ uzp2 \a1\().8h, \a1\().8h, \b1\().8h ++ uzp2 \a2\().8h, \a2\().8h, \b2\().8h ++.endm ++ ++// SAND30 -> 10bit ++.macro USAND10 d0, d1, d2, a0, a1 ++ shrn \d2\().4h, \a0\().4s, #14 ++ shrn \d1\().4h, \a0\().4s, #10 ++ ++ shrn2 \d2\().8h, \a1\().4s, #14 ++ shrn2 \d1\().8h, \a1\().4s, #10 ++ uzp1 \d0\().8h, \a0\().8h, \a1\().8h ++ ++ ushr \d2\().8h, \d2\().8h, #6 ++ bic \d0\().8h, #0xfc, lsl #8 ++ bic \d1\().8h, #0xfc, lsl #8 ++.endm ++ ++// SAND30 -> 8bit ++.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2 ++ shrn \d1\().4h, \a0\().4s, #12 ++ shrn2 \d1\().8h, \a1\().4s, #12 ++ uzp1 \d0\().8h, \a0\().8h, \a1\().8h ++ uzp2 \d2\().8h, \a0\().8h, \a1\().8h ++ ++ shrn \t1\().4h, \a2\().4s, #12 ++ shrn2 \t1\().8h, \a3\().4s, #12 ++ uzp1 \t0\().8h, \a2\().8h, \a3\().8h ++ uzp2 \t2\().8h, \a2\().8h, \a3\().8h ++ ++ shrn \d0\().8b, \d0\().8h, #2 ++ shrn2 \d0\().16b, \t0\().8h, #2 ++ shrn \d2\().8b, \d2\().8h, #6 ++ shrn2 \d2\().16b, \t2\().8h, #6 ++ uzp1 \d1\().16b, \d1\().16b, \t1\().16b ++.endm ++ ++ ++// void ff_rpi_sand30_lines_to_planar_c16( ++// uint8_t * dst_u, // [x0] ++// unsigned int dst_stride_u, // [w1] ++// uint8_t * dst_v, // [x2] ++// unsigned int dst_stride_v, // [w3] ++// const uint8_t * src, // [x4] ++// unsigned int stride1, // [w5] 128 ++// unsigned int stride2, // [w6] ++// unsigned int _x, // [w7] 0 ++// unsigned int y, // [sp, #0] ++// unsigned int _w, // [sp, #8] w9 ++// unsigned int h); // [sp, #16] w10 ++ ++function ff_rpi_sand30_lines_to_planar_c16, export=1 ++ ldr w7, [sp, #0] // y ++ ldr w8, [sp, #8] // _w ++ ldr w10, [sp, #16] // h ++ lsl w6, w6, #7 // Fixup stride2 ++ sub w6, w6, #64 ++ uxtw x6, w6 ++ sub w1, w1, w8, LSL #1 // Fixup chroma strides ++ sub w3, w3, w8, LSL #1 ++ lsl w7, w7, #7 // Add y to src ++ add x4, x4, w7, UXTW ++10: ++ mov w13, #0 ++ mov x5, x4 ++ mov w9, w8 ++1: ++ ld1 {v0.4s-v3.4s}, [x5], #64 ++ ld1 {v4.4s-v7.4s}, [x5], x6 ++ subs w9, w9, #48 ++ ++ USAND10 v17, v16, v18, v0, v1 ++ USAND10 v20, v19, v21, v2, v3 ++ UZPH_C v0, v1, v2, v16, v17, v18, v19, v20, v21 ++ USAND10 v23, v22, v24, v4, v5 ++ USAND10 v26, v25, v27, v6, v7 ++ UZPH_C v4, v5, v6, v22, v23, v24, v25, v26, v27 ++ ++ blt 2f ++ ++ st3 {v0.8h-v2.8h}, [x0], #48 ++ st3 {v4.8h-v6.8h}, [x0], #48 ++ st3 {v16.8h-v18.8h}, [x2], #48 ++ st3 {v22.8h-v24.8h}, [x2], #48 ++ ++ bne 1b ++11: ++ subs w10, w10, #1 ++ add x4, x4, #128 ++ add x0, x0, w1, UXTW ++ add x2, x2, w3, UXTW ++ bne 10b ++99: ++ ret ++ ++// Partial final write ++2: ++ cmp w9, #24-48 ++ blt 1f ++ st3 {v0.8h - v2.8h}, [x0], #48 ++ st3 {v16.8h - v18.8h}, [x2], #48 ++ beq 11b ++ mov v0.16b, v4.16b ++ mov v1.16b, v5.16b ++ sub w9, w9, #24 ++ mov v2.16b, v6.16b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ mov v18.16b, v24.16b ++1: ++ cmp w9, #12-48 ++ blt 1f ++ st3 {v0.4h - v2.4h}, [x0], #24 ++ st3 {v16.4h - v18.4h}, [x2], #24 ++ beq 11b ++ mov v0.d[0], v0.d[1] ++ sub w9, w9, #12 ++ mov v1.d[0], v1.d[1] ++ mov v2.d[0], v2.d[1] ++ mov v16.d[0], v16.d[1] ++ mov v17.d[0], v17.d[1] ++ mov v18.d[0], v18.d[1] ++1: ++ cmp w9, #6-48 ++ blt 1f ++ st3 {v0.h - v2.h}[0], [x0], #6 ++ st3 {v0.h - v2.h}[1], [x0], #6 ++ st3 {v16.h - v18.h}[0], [x2], #6 ++ st3 {v16.h - v18.h}[1], [x2], #6 ++ beq 11b ++ mov v0.s[0], v0.s[1] ++ sub w9, w9, #6 ++ mov v1.s[0], v1.s[1] ++ mov v2.s[0], v2.s[1] ++ mov v16.s[0], v16.s[1] ++ mov v17.s[0], v17.s[1] ++ mov v18.s[0], v18.s[1] ++1: ++ cmp w9, #3-48 ++ blt 1f ++ st3 {v0.h - v2.h}[0], [x0], #6 ++ st3 {v16.h - v18.h}[0], [x2], #6 ++ beq 11b ++ mov v0.h[0], v0.h[1] ++ sub w9, w9, #3 ++ mov v1.h[0], v1.h[1] ++ mov v16.h[0], v16.h[1] ++ mov v17.h[0], v17.h[1] ++1: ++ cmp w9, #2-48 ++ blt 1f ++ st2 {v0.h - v1.h}[0], [x0], #4 ++ st2 {v16.h - v17.h}[0], [x2], #4 ++ b 11b ++1: ++ st1 {v0.h}[0], [x0], #2 ++ st1 {v16.h}[0], [x2], #2 ++ b 11b ++endfunc ++ ++ ++//void ff_rpi_sand30_lines_to_planar_p010( ++// uint8_t * dest, ++// unsigned int dst_stride, ++// const uint8_t * src, ++// unsigned int src_stride1, ++// unsigned int src_stride2, ++// unsigned int _x, ++// unsigned int y, ++// unsigned int _w, ++// unsigned int h); ++ ++// void ff_rpi_sand30_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++// ++// Assumes that we are starting on a stripe boundary and that overreading ++// within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y16, export=1 ++ lsl w4, w4, #7 ++ sub w4, w4, #64 ++ uxtw x4, w4 ++ sub w1, w1, w7, lsl #1 ++ uxtw x6, w6 ++ add x8, x2, x6, lsl #7 ++ ldr w6, [sp, #0] ++ ++10: ++ mov x2, x8 ++ mov w5, w7 ++1: ++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 ++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 ++ ++ subs w5, w5, #96 ++ ++ USAND10 v16, v17, v18, v0, v1 ++ USAND10 v19, v20, v21, v2, v3 ++ USAND10 v22, v23, v24, v4, v5 ++ USAND10 v25, v26, v27, v6, v7 ++ ++ blt 2f ++ ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 ++ st3 {v22.8h, v23.8h, v24.8h}, [x0], #48 ++ st3 {v25.8h, v26.8h, v27.8h}, [x0], #48 ++ ++ bne 1b ++ ++11: ++ subs w6, w6, #1 ++ add x0, x0, w1, uxtw ++ add x8, x8, #128 ++ bne 10b ++ ++ ret ++ ++// Partial final write ++2: ++ cmp w5, #48-96 ++ blt 1f ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 ++ beq 11b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ sub w5, w5, #48 ++ mov v18.16b, v24.16b ++ mov v19.16b, v25.16b ++ mov v20.16b, v26.16b ++ mov v21.16b, v27.16b ++1: ++ cmp w5, #24-96 ++ blt 1f ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ beq 11b ++ mov v16.16b, v19.16b ++ mov v17.16b, v20.16b ++ sub w5, w5, #24 ++ mov v18.16b, v21.16b ++1: ++ cmp w5, #12-96 ++ blt 1f ++ st3 {v16.4h, v17.4h, v18.4h}, [x0], #24 ++ beq 11b ++ mov v16.d[0], v16.d[1] ++ sub w5, w5, #12 ++ mov v17.d[0], v17.d[1] ++ mov v18.d[0], v18.d[1] ++1: ++ cmp w5, #6-96 ++ blt 1f ++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 ++ st3 {v16.h, v17.h, v18.h}[1], [x0], #6 ++ beq 11b ++ mov v16.s[0], v16.s[1] ++ sub w5, w5, #6 ++ mov v17.s[0], v17.s[1] ++ mov v18.s[0], v18.s[1] ++1: ++ cmp w5, #3-96 ++ blt 1f ++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 ++ beq 11b ++ mov v16.h[0], v16.h[1] ++ sub w5, w5, #3 ++ mov v17.h[0], v17.h[1] ++1: ++ cmp w5, #2-96 ++ blt 1f ++ st2 {v16.h, v17.h}[0], [x0], #4 ++ b 11b ++1: ++ st1 {v16.h}[0], [x0], #2 ++ b 11b ++ ++endfunc ++ ++// void ff_rpi_sand30_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++// ++// Assumes that we are starting on a stripe boundary and that overreading ++// within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y8, export=1 ++ lsl w4, w4, #7 ++ sub w4, w4, #64 ++ uxtw x4, w4 ++ sub w1, w1, w7 ++ uxtw x6, w6 ++ add x8, x2, x6, lsl #7 ++ ldr w6, [sp, #0] ++ ++10: ++ mov x2, x8 ++ mov w5, w7 ++1: ++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 ++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 ++ ++ subs w5, w5, #96 ++ ++ // v0, v1 ++ USAND8 v16, v17, v18, v0, v1, v2, v3, v22, v23, v24 ++ USAND8 v19, v20, v21, v4, v5, v6, v7, v22, v23, v24 ++ ++ blt 2f ++ ++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 ++ st3 {v19.16b, v20.16b, v21.16b}, [x0], #48 ++ ++ bne 1b ++ ++11: ++ subs w6, w6, #1 ++ add x0, x0, w1, uxtw ++ add x8, x8, #128 ++ bne 10b ++ ++ ret ++ ++// Partial final write ++2: ++ cmp w5, #48-96 ++ blt 1f ++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 ++ beq 11b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ sub w5, w5, #48 ++ mov v18.16b, v24.16b ++1: ++ cmp w5, #24-96 ++ blt 1f ++ st3 {v16.8b, v17.8b, v18.8b}, [x0], #24 ++ beq 11b ++ mov v16.d[0], v16.d[1] ++ sub w5, w5, #24 ++ mov v17.d[0], v17.d[1] ++ mov v18.d[0], v18.d[1] ++1: ++ cmp w5, #12-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[2], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[3], [x0], #3 ++ beq 11b ++ mov v16.s[0], v16.s[1] ++ sub w5, w5, #12 ++ mov v17.s[0], v17.s[1] ++ mov v18.s[0], v18.s[1] ++1: ++ cmp w5, #6-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 ++ beq 11b ++ mov v16.h[0], v16.h[1] ++ sub w5, w5, #6 ++ mov v17.h[0], v17.h[1] ++ mov v18.h[0], v18.h[1] ++1: ++ cmp w5, #3-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ beq 11b ++ mov v16.b[0], v16.b[1] ++ sub w5, w5, #3 ++ mov v17.b[0], v17.b[1] ++1: ++ cmp w5, #2-96 ++ blt 1f ++ st2 {v16.b, v17.b}[0], [x0], #2 ++ b 11b ++1: ++ st1 {v16.b}[0], [x0], #1 ++ b 11b ++ ++endfunc ++ +diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h +new file mode 100644 +index 000000000000..e0e94f6aa1a2 +--- /dev/null ++++ b/libavutil/aarch64/rpi_sand_neon.h +@@ -0,0 +1,61 @@ ++/* ++Copyright (c) 2021 Michael Eiler ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: Michael Eiler ++*/ ++ ++#ifndef AVUTIL_AARCH64_RPI_SAND_NEON_H ++#define AVUTIL_AARCH64_RPI_SAND_NEON_H ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u, ++ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u, ++ uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1, ++ unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* AVCODEC_SAND_NEON_H */ +diff --git a/libavutil/arm/Makefile b/libavutil/arm/Makefile +index 5da44b05427a..b74b7c4e2f25 100644 +--- a/libavutil/arm/Makefile ++++ b/libavutil/arm/Makefile +@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o \ + + NEON-OBJS += arm/float_dsp_init_neon.o \ + arm/float_dsp_neon.o \ ++ arm/rpi_sand_neon.o \ +diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S +new file mode 100644 +index 000000000000..60e697f6819b +--- /dev/null ++++ b/libavutil/arm/rpi_sand_neon.S +@@ -0,0 +1,925 @@ ++/* ++Copyright (c) 2018 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++#include "libavutil/arm/asm.S" ++ ++ ++@ General notes: ++@ Having done some timing on this in sand8->y8 (Pi4) ++@ vst1 (680fps) is a bit faster than vstm (660fps) ++@ vldm (680fps) is noticably faster than vld1 (480fps) ++@ (or it might be that a mix is what is required) ++@ ++@ At least on a Pi4 it is no more expensive to have a single auto-inc register ++@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted ++@ the latter was better) ++@ ++@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless ++@ the memory is uncached. ++@ As these are Sand -> planar we can assume that src is going to be aligned but ++@ it is possible that dest isn't (converting to .yuv or other packed format). ++@ Luckily vst1 is faster than vstm :-) so all is well ++@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4 ++@ .8 stores would let us do non-word aligned stores into uncached but it ++@ probably isn't worth it. ++ ++ ++ ++ ++@ void ff_rpi_sand128b_stripe_to_8_10( ++@ uint8_t * dest, // [r0] ++@ const uint8_t * src1, // [r1] ++@ const uint8_t * src2, // [r2] ++@ unsigned int lines); // [r3] ++ ++.macro stripe2_to_8, bit_depth ++ vpush {q4-q7} ++1: ++ vldm r1!, {q0-q7} ++ subs r3, #1 ++ vldm r2!, {q8-q15} ++ vqrshrn.u16 d0, q0, #\bit_depth - 8 ++ vqrshrn.u16 d1, q1, #\bit_depth - 8 ++ vqrshrn.u16 d2, q2, #\bit_depth - 8 ++ vqrshrn.u16 d3, q3, #\bit_depth - 8 ++ vqrshrn.u16 d4, q4, #\bit_depth - 8 ++ vqrshrn.u16 d5, q5, #\bit_depth - 8 ++ vqrshrn.u16 d6, q6, #\bit_depth - 8 ++ vqrshrn.u16 d7, q7, #\bit_depth - 8 ++ vqrshrn.u16 d8, q8, #\bit_depth - 8 ++ vqrshrn.u16 d9, q9, #\bit_depth - 8 ++ vqrshrn.u16 d10, q10, #\bit_depth - 8 ++ vqrshrn.u16 d11, q11, #\bit_depth - 8 ++ vqrshrn.u16 d12, q12, #\bit_depth - 8 ++ vqrshrn.u16 d13, q13, #\bit_depth - 8 ++ vqrshrn.u16 d14, q14, #\bit_depth - 8 ++ vqrshrn.u16 d15, q15, #\bit_depth - 8 ++ vstm r0!, {q0-q7} ++ bne 1b ++ vpop {q4-q7} ++ bx lr ++.endm ++ ++function ff_rpi_sand128b_stripe_to_8_10, export=1 ++ stripe2_to_8 10 ++endfunc ++ ++@ void ff_rpi_sand8_lines_to_planar_y8( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand8_lines_to_planar_y8, export=1 ++ push {r4-r8, lr} @ +24 L ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ lsl r3, #7 ++ sub r1, r6 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++ mov lr, #0 ++1: ++ vldm r2, {q8-q15} ++ add r2, r3 ++ subs r5, #128 ++ blt 2f ++ vst1.8 {d16, d17, d18, d19}, [r0]! ++ vst1.8 {d20, d21, d22, d23}, [r0]! ++ vst1.8 {d24, d25, d26, d27}, [r0]! ++ vst1.8 {d28, d29, d30, d31}, [r0]! ++ bne 1b ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #64-128 ++ blt 1f ++ vst1.8 {d16, d17, d18, d19}, [r0]! ++ vst1.8 {d20, d21, d22, d23}, [r0]! ++ beq 11b ++ vmov q8, q12 ++ vmov q9, q13 ++ sub r5, #64 ++ vmov q10, q14 ++ vmov q11, q15 ++1: ++ cmp r5, #32-128 ++ blt 1f ++ vst1.8 {d16, d17, d18, d19}, [r0]! ++ beq 11b ++ vmov q8, q10 ++ sub r5, #32 ++ vmov q9, q11 ++1: ++ cmp r5, #16-128 ++ blt 1f ++ vst1.8 {d16, d17}, [r0]! ++ beq 11b ++ sub r5, #16 ++ vmov q8, q9 ++1: ++ cmp r5, #8-128 ++ blt 1f ++ vst1.8 {d16}, [r0]! ++ beq 11b ++ sub r5, #8 ++ vmov d16, d17 ++1: ++ cmp r5, #4-128 ++ blt 1f ++ vst1.32 {d16[0]}, [r0]! ++ beq 11b ++ sub r5, #4 ++ vshr.u64 d16, #32 ++1: ++ cmp r5, #2-128 ++ blt 1f ++ vst1.16 {d16[0]}, [r0]! ++ beq 11b ++ vst1.8 {d16[2]}, [r0]! ++ b 11b ++1: ++ vst1.8 {d16[0]}, [r0]! ++ b 11b ++endfunc ++ ++@ void ff_rpi_sand8_lines_to_planar_c8( ++@ uint8_t * dst_u, // [r0] ++@ unsigned int dst_stride_u, // [r1] ++@ uint8_t * dst_v, // [r2] ++@ unsigned int dst_stride_v, // [r3] ++@ const uint8_t * src, // [sp, #0] -> r4, r5 ++@ unsigned int stride1, // [sp, #4] 128 ++@ unsigned int stride2, // [sp, #8] -> r8 ++@ unsigned int _x, // [sp, #12] 0 ++@ unsigned int y, // [sp, #16] (r7 in prefix) ++@ unsigned int _w, // [sp, #20] -> r12, r6 ++@ unsigned int h); // [sp, #24] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand8_lines_to_planar_c8, export=1 ++ push {r4-r8, lr} @ +24 ++ ++ ldr r5, [sp, #24] ++ ldr r8, [sp, #32] ++ ldr r7, [sp, #40] ++ ldr r6, [sp, #44] ++ lsl r8, #7 ++ add r5, r5, r7, lsl #7 ++ sub r1, r1, r6 ++ sub r3, r3, r6 ++ ldr r7, [sp, #48] ++ vpush {q4-q7} ++ ++10: ++ mov r4, r5 ++ mov r12, r6 ++1: ++ subs r12, #64 ++ vldm r4, {q0-q7} ++ add r4, r8 ++ it gt ++ vldmgt r4, {q8-q15} ++ add r4, r8 ++ ++ vuzp.8 q0, q1 ++ vuzp.8 q2, q3 ++ vuzp.8 q4, q5 ++ vuzp.8 q6, q7 ++ ++ vuzp.8 q8, q9 ++ vuzp.8 q10, q11 ++ vuzp.8 q12, q13 ++ vuzp.8 q14, q15 ++ subs r12, #64 ++ ++ @ Rearrange regs so we can use vst1 with 4 regs ++ vswp q1, q2 ++ vswp q5, q6 ++ vswp q9, q10 ++ vswp q13, q14 ++ blt 2f ++ ++ vst1.8 {d0, d1, d2, d3 }, [r0]! ++ vst1.8 {d8, d9, d10, d11}, [r0]! ++ vst1.8 {d16, d17, d18, d19}, [r0]! ++ vst1.8 {d24, d25, d26, d27}, [r0]! ++ ++ vst1.8 {d4, d5, d6, d7 }, [r2]! ++ vst1.8 {d12, d13, d14, d15}, [r2]! ++ vst1.8 {d20, d21, d22, d23}, [r2]! ++ vst1.8 {d28, d29, d30, d31}, [r2]! ++ bne 1b ++11: ++ subs r7, #1 ++ add r5, #128 ++ add r0, r1 ++ add r2, r3 ++ bne 10b ++ vpop {q4-q7} ++ pop {r4-r8,pc} ++ ++2: ++ cmp r12, #64-128 ++ blt 1f ++ vst1.8 {d0, d1, d2, d3 }, [r0]! ++ vst1.8 {d8, d9, d10, d11}, [r0]! ++ vst1.8 {d4, d5, d6, d7 }, [r2]! ++ vst1.8 {d12, d13, d14, d15}, [r2]! ++ beq 11b ++ sub r12, #64 ++ vmov q0, q8 ++ vmov q1, q9 ++ vmov q2, q10 ++ vmov q3, q11 ++ vmov q4, q12 ++ vmov q5, q13 ++ vmov q6, q14 ++ vmov q7, q15 ++1: ++ cmp r12, #32-128 ++ blt 1f ++ vst1.8 {d0, d1, d2, d3 }, [r0]! ++ vst1.8 {d4, d5, d6, d7 }, [r2]! ++ beq 11b ++ sub r12, #32 ++ vmov q0, q4 ++ vmov q1, q5 ++ vmov q2, q6 ++ vmov q3, q7 ++1: ++ cmp r12, #16-128 ++ blt 1f ++ vst1.8 {d0, d1 }, [r0]! ++ vst1.8 {d4, d5 }, [r2]! ++ beq 11b ++ sub r12, #16 ++ vmov q0, q1 ++ vmov q2, q3 ++1: ++ cmp r12, #8-128 ++ blt 1f ++ vst1.8 {d0}, [r0]! ++ vst1.8 {d4}, [r2]! ++ beq 11b ++ sub r12, #8 ++ vmov d0, d1 ++ vmov d4, d5 ++1: ++ cmp r12, #4-128 ++ blt 1f ++ vst1.32 {d0[0]}, [r0]! ++ vst1.32 {d4[0]}, [r2]! ++ beq 11b ++ sub r12, #4 ++ vmov s0, s1 ++ vmov s8, s9 ++1: ++ cmp r12, #2-128 ++ blt 1f ++ vst1.16 {d0[0]}, [r0]! ++ vst1.16 {d4[0]}, [r2]! ++ beq 11b ++ vst1.8 {d0[2]}, [r0]! ++ vst1.8 {d4[2]}, [r2]! ++ b 11b ++1: ++ vst1.8 {d0[0]}, [r0]! ++ vst1.8 {d4[0]}, [r2]! ++ b 11b ++endfunc ++ ++ ++ ++@ void ff_rpi_sand30_lines_to_planar_y16( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand30_lines_to_planar_y16, export=1 ++ push {r4-r8, lr} @ +24 ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ mov r12, #48 ++ sub r3, #1 ++ lsl r3, #7 ++ sub r1, r1, r6, lsl #1 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++ mov lr, #0 ++1: ++ vldm r2!, {q10-q13} ++ add lr, #64 ++ ++ vshrn.u32 d4 , q10, #14 @ Cannot vshrn.u32 #20! ++ ands lr, #127 ++ vshrn.u32 d2, q10, #10 ++ vmovn.u32 d0, q10 ++ ++ vshrn.u32 d5, q11, #14 ++ it eq ++ addeq r2, r3 ++ vshrn.u32 d3, q11, #10 ++ vmovn.u32 d1, q11 ++ ++ subs r5, #48 ++ vshr.u16 q2, #6 ++ vbic.u16 q0, #0xfc00 ++ vbic.u16 q1, #0xfc00 ++ ++ vshrn.u32 d20, q12, #14 ++ vshrn.u32 d18, q12, #10 ++ vmovn.u32 d16, q12 ++ ++ vshrn.u32 d21, q13, #14 ++ vshrn.u32 d19, q13, #10 ++ vmovn.u32 d17, q13 ++ ++ vshr.u16 q10, #6 ++ vbic.u16 q8, #0xfc00 ++ vbic.u16 q9 , #0xfc00 ++ blt 2f ++ ++ vst3.16 {d0, d2, d4}, [r0], r12 ++ vst3.16 {d1, d3, d5}, [r4], r12 ++ vst3.16 {d16, d18, d20}, [r0], r12 ++ vst3.16 {d17, d19, d21}, [r4], r12 ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #24-48 ++ blt 1f ++ vst3.16 {d0, d2, d4}, [r0], r12 ++ vst3.16 {d1, d3, d5}, [r4] ++ beq 11b ++ vmov q0, q8 ++ sub r5, #24 ++ vmov q1, q9 ++ vmov q2, q10 ++1: ++ cmp r5, #12-48 ++ blt 1f ++ vst3.16 {d0, d2, d4}, [r0]! ++ beq 11b ++ vmov d0, d1 ++ sub r5, #12 ++ vmov d2, d3 ++ vmov d4, d5 ++1: ++ cmp r5, #6-48 ++ add r4, r0, #6 @ avoid [r0]! on sequential instructions ++ blt 1f ++ vst3.16 {d0[0], d2[0], d4[0]}, [r0] ++ vst3.16 {d0[1], d2[1], d4[1]}, [r4] ++ add r0, #12 ++ beq 11b ++ vmov s0, s1 ++ sub r5, #6 ++ vmov s4, s5 ++ vmov s8, s9 ++1: ++ cmp r5, #3-48 ++ blt 1f ++ vst3.16 {d0[0], d2[0], d4[0]}, [r0]! ++ beq 11b ++ sub r5, #3 ++ vshr.u32 d0, #16 ++ vshr.u32 d2, #16 ++1: ++ cmp r5, #2-48 ++ blt 1f ++ vst2.16 {d0[0], d2[0]}, [r0]! ++ b 11b ++1: ++ vst1.16 {d0[0]}, [r0]! ++ b 11b ++ ++endfunc ++ ++ ++@ void ff_rpi_sand30_lines_to_planar_c16( ++@ uint8_t * dst_u, // [r0] ++@ unsigned int dst_stride_u, // [r1] ++@ uint8_t * dst_v, // [r2] ++@ unsigned int dst_stride_v, // [r3] ++@ const uint8_t * src, // [sp, #0] -> r4, r5 ++@ unsigned int stride1, // [sp, #4] 128 ++@ unsigned int stride2, // [sp, #8] -> r8 ++@ unsigned int _x, // [sp, #12] 0 ++@ unsigned int y, // [sp, #16] (r7 in prefix) ++@ unsigned int _w, // [sp, #20] -> r6, r9 ++@ unsigned int h); // [sp, #24] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand30_lines_to_planar_c16, export=1 ++ push {r4-r10, lr} @ +32 ++ ldr r5, [sp, #32] ++ ldr r8, [sp, #40] ++ ldr r7, [sp, #48] ++ ldr r9, [sp, #52] ++ mov r12, #48 ++ sub r8, #1 ++ lsl r8, #7 ++ add r5, r5, r7, lsl #7 ++ sub r1, r1, r9, lsl #1 ++ sub r3, r3, r9, lsl #1 ++ ldr r7, [sp, #56] ++10: ++ mov lr, #0 ++ mov r4, r5 ++ mov r6, r9 ++1: ++ vldm r4!, {q0-q3} ++ add lr, #64 ++ ++ @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2 ++ vshrn.u32 d20, q0, #14 ++ vmovn.u32 d18, q0 ++ vshrn.u32 d0, q0, #10 ++ ands lr, #127 ++ ++ vshrn.u32 d21, q1, #14 ++ vmovn.u32 d19, q1 ++ vshrn.u32 d1, q1, #10 ++ ++ vshrn.u32 d22, q2, #10 ++ vmovn.u32 d2, q2 ++ vshrn.u32 d4, q2, #14 ++ ++ add r10, r0, #24 ++ vshrn.u32 d23, q3, #10 ++ vmovn.u32 d3, q3 ++ vshrn.u32 d5, q3, #14 ++ ++ it eq ++ addeq r4, r8 ++ vuzp.16 q0, q11 ++ vuzp.16 q9, q1 ++ vuzp.16 q10, q2 ++ ++ @ q0 V0, V3,.. ++ @ q9 U0, U3... ++ @ q10 U1, U4... ++ @ q11 U2, U5,.. ++ @ q1 V1, V4, ++ @ q2 V2, V5,.. ++ ++ subs r6, #24 ++ vbic.u16 q11, #0xfc00 ++ vbic.u16 q9, #0xfc00 ++ vshr.u16 q10, #6 ++ vshr.u16 q2, #6 ++ vbic.u16 q0, #0xfc00 ++ vbic.u16 q1, #0xfc00 ++ ++ blt 2f ++ ++ vst3.16 {d18, d20, d22}, [r0], r12 ++ vst3.16 {d19, d21, d23}, [r10] ++ add r10, r2, #24 ++ vst3.16 {d0, d2, d4}, [r2], r12 ++ vst3.16 {d1, d3, d5}, [r10] ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r5, #128 ++ add r0, r1 ++ add r2, r3 ++ bne 10b ++ ++ pop {r4-r10, pc} ++ ++@ Partial final write ++2: ++ cmp r6, #-12 ++ blt 1f ++ vst3.16 {d18, d20, d22}, [r0]! ++ vst3.16 {d0, d2, d4}, [r2]! ++ beq 11b ++ vmov d18, d19 ++ vmov d20, d21 ++ vmov d22, d23 ++ sub r6, #12 ++ vmov d0, d1 ++ vmov d2, d3 ++ vmov d4, d5 ++1: ++ cmp r6, #-18 ++ @ Rezip here as it makes the remaining tail handling easier ++ vzip.16 d0, d18 ++ vzip.16 d2, d20 ++ vzip.16 d4, d22 ++ blt 1f ++ vst3.16 {d0[1], d2[1], d4[1]}, [r0]! ++ vst3.16 {d0[0], d2[0], d4[0]}, [r2]! ++ vst3.16 {d0[3], d2[3], d4[3]}, [r0]! ++ vst3.16 {d0[2], d2[2], d4[2]}, [r2]! ++ beq 11b ++ vmov d0, d18 ++ vmov d2, d20 ++ sub r6, #6 ++ vmov d4, d22 ++1: ++ cmp r6, #-21 ++ blt 1f ++ vst3.16 {d0[1], d2[1], d4[1]}, [r0]! ++ vst3.16 {d0[0], d2[0], d4[0]}, [r2]! ++ beq 11b ++ vmov s4, s5 ++ sub r6, #3 ++ vmov s0, s1 ++1: ++ cmp r6, #-22 ++ blt 1f ++ vst2.16 {d0[1], d2[1]}, [r0]! ++ vst2.16 {d0[0], d2[0]}, [r2]! ++ b 11b ++1: ++ vst1.16 {d0[1]}, [r0]! ++ vst1.16 {d0[0]}, [r2]! ++ b 11b ++ ++endfunc ++ ++@ void ff_rpi_sand30_lines_to_planar_p010( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for writing ++ ++function ff_rpi_sand30_lines_to_planar_p010, export=1 ++ push {r4-r8, lr} @ +24 ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ mov r12, #48 ++ vmov.u16 q15, #0xffc0 ++ sub r3, #1 ++ lsl r3, #7 ++ sub r1, r1, r6, lsl #1 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++ mov lr, #0 ++1: ++ vldm r2!, {q10-q13} ++ add lr, #64 ++ ++ vshl.u32 q14, q10, #6 ++ ands lr, #127 ++ vshrn.u32 d4, q10, #14 ++ vshrn.u32 d2, q10, #4 ++ vmovn.u32 d0, q14 ++ ++ vshl.u32 q14, q11, #6 ++ it eq ++ addeq r2, r3 ++ vshrn.u32 d5, q11, #14 ++ vshrn.u32 d3, q11, #4 ++ vmovn.u32 d1, q14 ++ ++ subs r5, #48 ++ vand q2, q15 ++ vand q1, q15 ++ vand q0, q15 ++ ++ vshl.u32 q14, q12, #6 ++ vshrn.u32 d20, q12, #14 ++ vshrn.u32 d18, q12, #4 ++ vmovn.u32 d16, q14 ++ ++ vshl.u32 q14, q13, #6 ++ vshrn.u32 d21, q13, #14 ++ vshrn.u32 d19, q13, #4 ++ vmovn.u32 d17, q14 ++ ++ vand q10, q15 ++ vand q9, q15 ++ vand q8, q15 ++ blt 2f ++ ++ vst3.16 {d0, d2, d4}, [r0], r12 ++ vst3.16 {d1, d3, d5}, [r4], r12 ++ vst3.16 {d16, d18, d20}, [r0], r12 ++ vst3.16 {d17, d19, d21}, [r4], r12 ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #24-48 ++ blt 1f ++ vst3.16 {d0, d2, d4}, [r0], r12 ++ vst3.16 {d1, d3, d5}, [r4] ++ beq 11b ++ vmov q0, q8 ++ sub r5, #24 ++ vmov q1, q9 ++ vmov q2, q10 ++1: ++ cmp r5, #12-48 ++ blt 1f ++ vst3.16 {d0, d2, d4}, [r0]! ++ beq 11b ++ vmov d0, d1 ++ sub r5, #12 ++ vmov d2, d3 ++ vmov d4, d5 ++1: ++ cmp r5, #6-48 ++ add r4, r0, #6 @ avoid [r0]! on sequential instructions ++ blt 1f ++ vst3.16 {d0[0], d2[0], d4[0]}, [r0] ++ vst3.16 {d0[1], d2[1], d4[1]}, [r4] ++ add r0, #12 ++ beq 11b ++ vmov s0, s1 ++ sub r5, #6 ++ vmov s4, s5 ++ vmov s8, s9 ++1: ++ cmp r5, #3-48 ++ blt 1f ++ vst3.16 {d0[0], d2[0], d4[0]}, [r0]! ++ beq 11b ++ sub r5, #3 ++ vshr.u32 d0, #16 ++ vshr.u32 d2, #16 ++1: ++ cmp r5, #2-48 ++ blt 1f ++ vst2.16 {d0[0], d2[0]}, [r0]! ++ b 11b ++1: ++ vst1.16 {d0[0]}, [r0]! ++ b 11b ++ ++endfunc ++ ++ ++@ void ff_rpi_sand30_lines_to_planar_y8( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y8, export=1 ++ push {r4-r8, lr} @ +24 ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ mov r12, #48 ++ lsl r3, #7 ++ sub r1, r1, r6 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++1: ++ vldm r2, {q8-q15} ++ ++ subs r5, #96 ++ ++ vmovn.u32 d0, q8 ++ vshrn.u32 d2, q8, #12 ++ vshrn.u32 d4, q8, #16 @ Cannot vshrn.u32 #20! ++ ++ add r2, r3 ++ ++ vmovn.u32 d1, q9 ++ vshrn.u32 d3, q9, #12 ++ vshrn.u32 d5, q9, #16 ++ ++ pld [r2, #0] ++ ++ vshrn.u16 d0, q0, #2 ++ vmovn.u16 d1, q1 ++ vshrn.u16 d2, q2, #6 ++ ++ vmovn.u32 d16, q10 ++ vshrn.u32 d18, q10, #12 ++ vshrn.u32 d20, q10, #16 ++ ++ vmovn.u32 d17, q11 ++ vshrn.u32 d19, q11, #12 ++ vshrn.u32 d21, q11, #16 ++ ++ pld [r2, #64] ++ ++ vshrn.u16 d4, q8, #2 ++ vmovn.u16 d5, q9 ++ vshrn.u16 d6, q10, #6 ++ ++ vmovn.u32 d16, q12 ++ vshrn.u32 d18, q12, #12 ++ vshrn.u32 d20, q12, #16 ++ ++ vmovn.u32 d17, q13 ++ vshrn.u32 d19, q13, #12 ++ vshrn.u32 d21, q13, #16 ++ ++ vshrn.u16 d16, q8, #2 ++ vmovn.u16 d17, q9 ++ vshrn.u16 d18, q10, #6 ++ ++ vmovn.u32 d20, q14 ++ vshrn.u32 d22, q14, #12 ++ vshrn.u32 d24, q14, #16 ++ ++ vmovn.u32 d21, q15 ++ vshrn.u32 d23, q15, #12 ++ vshrn.u32 d25, q15, #16 ++ ++ vshrn.u16 d20, q10, #2 ++ vmovn.u16 d21, q11 ++ vshrn.u16 d22, q12, #6 ++ ++ blt 2f ++ ++ vst3.8 {d0, d1, d2}, [r0], r12 ++ vst3.8 {d4, d5, d6}, [r4], r12 ++ vst3.8 {d16, d17, d18}, [r0], r12 ++ vst3.8 {d20, d21, d22}, [r4], r12 ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #48-96 ++ blt 1f ++ vst3.8 {d0, d1, d2}, [r0], r12 ++ vst3.8 {d4, d5, d6}, [r4], r12 ++ beq 11b ++ vmov q0, q8 ++ vmov q2, q10 ++ sub r5, #48 ++ vmov d2, d18 ++ vmov d6, d22 ++1: ++ cmp r5, #24-96 ++ blt 1f ++ vst3.8 {d0, d1, d2}, [r0]! ++ beq 11b ++ vmov q0, q2 ++ sub r5, #24 ++ vmov d2, d6 ++1: ++ cmp r5, #12-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! ++ vst3.8 {d0[2], d1[2], d2[2]}, [r0]! ++ vst3.8 {d0[3], d1[3], d2[3]}, [r0]! ++ beq 11b ++ vmov s0, s1 ++ sub r5, #12 ++ vmov s2, s3 ++ vmov s4, s5 ++1: ++ cmp r5, #6-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! ++ add r0, #12 ++ beq 11b ++ vshr.u32 d0, #16 ++ sub r5, #6 ++ vshr.u32 d1, #16 ++ vshr.u32 d2, #16 ++1: ++ cmp r5, #3-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ beq 11b ++ sub r5, #3 ++ vshr.u32 d0, #8 ++ vshr.u32 d1, #8 ++1: ++ cmp r5, #2-96 ++ blt 1f ++ vst2.8 {d0[0], d1[0]}, [r0]! ++ b 11b ++1: ++ vst1.8 {d0[0]}, [r0]! ++ b 11b ++ ++endfunc ++ ++ +diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h +new file mode 100644 +index 000000000000..d8126676ee0c +--- /dev/null ++++ b/libavutil/arm/rpi_sand_neon.h +@@ -0,0 +1,110 @@ ++/* ++Copyright (c) 2020 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++#ifndef AVUTIL_ARM_RPI_SAND_NEON_H ++#define AVUTIL_ARM_RPI_SAND_NEON_H ++ ++void ff_rpi_sand128b_stripe_to_8_10( ++ uint8_t * dest, // [r0] ++ const uint8_t * src1, // [r1] ++ const uint8_t * src2, // [r2] ++ unsigned int lines); // [r3] ++ ++void ff_rpi_sand8_lines_to_planar_y8( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ ++void ff_rpi_sand8_lines_to_planar_c8( ++ uint8_t * dst_u, // [r0] ++ unsigned int dst_stride_u, // [r1] ++ uint8_t * dst_v, // [r2] ++ unsigned int dst_stride_v, // [r3] ++ const uint8_t * src, // [sp, #0] -> r4, r5 ++ unsigned int stride1, // [sp, #4] 128 ++ unsigned int stride2, // [sp, #8] -> r8 ++ unsigned int _x, // [sp, #12] 0 ++ unsigned int y, // [sp, #16] (r7 in prefix) ++ unsigned int _w, // [sp, #20] -> r12, r6 ++ unsigned int h); // [sp, #24] -> r7 ++ ++void ff_rpi_sand30_lines_to_planar_y16( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ ++void ff_rpi_sand30_lines_to_planar_c16( ++ uint8_t * dst_u, // [r0] ++ unsigned int dst_stride_u, // [r1] ++ uint8_t * dst_v, // [r2] ++ unsigned int dst_stride_v, // [r3] ++ const uint8_t * src, // [sp, #0] -> r4, r5 ++ unsigned int stride1, // [sp, #4] 128 ++ unsigned int stride2, // [sp, #8] -> r8 ++ unsigned int _x, // [sp, #12] 0 ++ unsigned int y, // [sp, #16] (r7 in prefix) ++ unsigned int _w, // [sp, #20] -> r6, r9 ++ unsigned int h); // [sp, #24] -> r7 ++ ++void ff_rpi_sand30_lines_to_planar_p010( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ ++void ff_rpi_sand30_lines_to_planar_y8( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ ++#endif // AVUTIL_ARM_SAND_NEON_H ++ +diff --git a/libavutil/frame.c b/libavutil/frame.c +index f0a0dba018a6..38458696c6ae 100644 +--- a/libavutil/frame.c ++++ b/libavutil/frame.c +@@ -16,6 +16,8 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "config.h" + - #include "libavutil/avassert.h" - #include "libavutil/avstring.h" - #include "libavutil/common.h" -@@ -59,6 +61,16 @@ - #define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ - #endif - -+// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined -+// in drm_fourcc.h hopefully will be sometime in the future but until then... -+#ifndef V4L2_PIX_FMT_NV12_10_COL128 -+#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') -+#endif -+ -+#ifndef V4L2_PIX_FMT_NV12_COL128 -+#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ -+#endif -+ - typedef struct V4L2Queue V4L2Queue; - typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared; - -@@ -176,9 +188,11 @@ fmt_av_to_v4l2(const enum AVPixelFormat avfmt) - return V4L2_PIX_FMT_YUV420; - case AV_PIX_FMT_NV12: - return V4L2_PIX_FMT_NV12; + #include "channel_layout.h" + #include "avassert.h" + #include "buffer.h" +@@ -27,6 +29,9 @@ + #include "mem.h" + #include "samplefmt.h" + #include "hwcontext.h" +#if CONFIG_SAND - case AV_PIX_FMT_RPI4_8: - case AV_PIX_FMT_SAND128: - return V4L2_PIX_FMT_NV12_COL128; ++#include "rpi_sand_fns.h" +#endif - default: - break; - } -@@ -193,8 +207,10 @@ fmt_v4l2_to_av(const uint32_t pixfmt) - return AV_PIX_FMT_YUV420P; - case V4L2_PIX_FMT_NV12: - return AV_PIX_FMT_NV12; + + static const AVSideDataDescriptor sd_props[] = { + [AV_FRAME_DATA_PANSCAN] = { "AVPanScan" }, +@@ -1077,6 +1082,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags) + (frame->crop_top + frame->crop_bottom) >= frame->height) + return AVERROR(ERANGE); + +#if CONFIG_SAND - case V4L2_PIX_FMT_NV12_COL128: - return AV_PIX_FMT_RPI4_8; -+#endif - default: - break; - } -@@ -823,6 +839,7 @@ static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame) - h = src->layers[0].planes[1].offset / bpl; - w = bpl; - } -+#if CONFIG_SAND - else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { - if (src->layers[0].nb_planes != 2) - break; -@@ -831,9 +848,11 @@ static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame) - h = src->layers[0].planes[1].offset / 128; - bpl = fourcc_mod_broadcom_param(mod); - } -+#endif - break; - - case DRM_FORMAT_P030: -+#if CONFIG_SAND - if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { - if (src->layers[0].nb_planes != 2) - break; -@@ -842,6 +861,7 @@ static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame) - h = src->layers[0].planes[1].offset / 128; - bpl = fourcc_mod_broadcom_param(mod); - } -+#endif - break; - - default: -@@ -1048,7 +1068,6 @@ static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf) - AVDRMLayerDescriptor * const layer = &drm_desc->layers[0]; - const struct v4l2_format *const fmt = &q->format; - const uint32_t height = fmt_height(fmt); -- const uint32_t width = fmt_width(fmt); - ptrdiff_t bpl0; - - /* fill the DRM frame descriptor */ -@@ -1063,7 +1082,7 @@ static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf) - bpl0 = layer->planes[0].pitch; - - switch (fmt_pixelformat(fmt)) { -- -+#if CONFIG_SAND - case V4L2_PIX_FMT_NV12_COL128: - mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0); - layer->format = V4L2_PIX_FMT_NV12; -@@ -1074,9 +1093,10 @@ static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf) - layer->nb_planes = 2; - layer->planes[1].object_index = 0; - layer->planes[1].offset = height * 128; -- layer->planes[0].pitch = width; -- layer->planes[1].pitch = width; -+ layer->planes[0].pitch = fmt_width(fmt); -+ layer->planes[1].pitch = layer->planes[0].pitch; - break; -+#endif - - case DRM_FORMAT_NV12: - layer->format = V4L2_PIX_FMT_NV12; -@@ -1576,7 +1596,10 @@ static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc) - return is_linear ? V4L2_PIX_FMT_YUV420 : 0; - case DRM_FORMAT_NV12: - return is_linear ? V4L2_PIX_FMT_NV12 : -- fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : 0; -+#if CONFIG_SAND -+ fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : -+#endif -+ 0; - default: - break; - } - -From d13ef67d14056374de1d66962e067633a5ca1e95 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Sun, 2 Oct 2022 12:36:43 +0000 -Subject: [PATCH 082/186] configure: Fix v4l2_req_hevc_vx setup; set after deps - fixups - ---- - configure | 9 +++------ - 1 file changed, 3 insertions(+), 6 deletions(-) - -diff --git a/configure b/configure -index 055944934476..c57b24bf8946 100755 ---- a/configure -+++ b/configure -@@ -6914,12 +6914,6 @@ fi - check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns - check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;" - disable v4l2_req_hevc_vx --if enabled hevc_v4l2request_hwaccel; then -- enable v4l2_req_hevc_vx --fi --if enabled hevc_v4l2_request; then -- disable v4l2_req_hevc_vx --fi - - check_headers sys/videoio.h - test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete -@@ -7415,6 +7409,9 @@ check_deps $CONFIG_LIST \ - - enabled threads && ! enabled pthreads && ! enabled atomics_native && die "non pthread threading without atomics not supported, try adding --enable-pthreads or --cpu=i486 or higher if you are on x86" - -+# Sub-feature of hevc_v4l2request_hwaccel - can only be set once deps are done -+enabled hevc_v4l2request_hwaccel && disabled hevc_v4l2_request && enable v4l2_req_hevc_vx -+ - case $target_os in - haiku) - disable memalign - -From ea9b52297f42fd55758be7944de7c5b24c77d900 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Sat, 1 Oct 2022 12:39:45 +0000 -Subject: [PATCH 083/186] vf_deinterlace_v4l2m2m: Ensure we get consistent - final frames - -On getting EOS at the input of the filster do not simply drop everything -in transit on the floor but attempt to retrieve everything possible from -the capture Q before on-signalling EOS. -If we know that we expect 1 frame in to always produce 1 frame out then -match CAPTURE frame to the last OUTPUT frame Qed (scale) -If frames out have an unknown relation to source frames (deinterlace) try -an encode stop and wait for the last frame marker to emerge from CAPTURE ---- - libavfilter/vf_deinterlace_v4l2m2m.c | 172 +++++++++++++++++++++++---- - 1 file changed, 148 insertions(+), 24 deletions(-) - -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -index 716789f9881c..ce875c2c619c 100644 ---- a/libavfilter/vf_deinterlace_v4l2m2m.c -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -94,6 +94,7 @@ typedef struct V4L2Buffer { - typedef struct V4L2Queue { - struct v4l2_format format; - struct v4l2_selection sel; -+ int eos; - int num_buffers; - V4L2Buffer *buffers; - const char * name; -@@ -127,20 +128,41 @@ typedef struct pts_track_s - pts_track_el_t a[PTS_TRACK_SIZE]; - } pts_track_t; - -+typedef enum drain_state_e -+{ -+ DRAIN_NONE = 0, // Not draining -+ DRAIN_TIMEOUT, // Drain until normal timeout setup yields no frame -+ DRAIN_LAST, // Drain with long timeout last_frame in received on output expected -+ DRAIN_EOS, // Drain with long timeout EOS expected -+ DRAIN_DONE // Drained -+} drain_state_t; -+ - typedef struct DeintV4L2M2MContextShared { - void * logctx; // For logging - will be NULL when done - filter_type_v4l2_t filter_type; - - int fd; -- int done; -+ int done; // fd closed - awating all refs dropped - int width; - int height; - -+ int drain; // EOS received (inlink status) -+ drain_state_t drain_state; -+ int64_t drain_pts; // PTS associated with inline status -+ -+ unsigned int frames_rx; -+ unsigned int frames_tx; -+ - // from options - int output_width; - int output_height; - enum AVPixelFormat output_format; - -+ int has_enc_stop; -+ // We expect to get exactly the same number of frames out as we put in -+ // We can drain by matching input to output -+ int one_to_one; -+ - int orig_width; - int orig_height; - atomic_uint refcount; -@@ -179,6 +201,12 @@ typedef struct DeintV4L2M2MContext { - enum AVChromaLocation chroma_location; - } DeintV4L2M2MContext; - -+ -+static inline int drain_frame_expected(const drain_state_t d) -+{ -+ return d == DRAIN_EOS || d == DRAIN_LAST; -+} -+ - // These just list the ones we know we can cope with - static uint32_t - fmt_av_to_v4l2(const enum AVPixelFormat avfmt) -@@ -334,6 +362,13 @@ fail: - return 0; - } - -+// We are only ever expecting in-order frames so nothing more clever is required -+static unsigned int -+pts_track_count(const pts_track_t * const trk) -+{ -+ return (trk->n - trk->last_n) & (PTS_TRACK_SIZE - 1); -+} -+ - static struct timeval pts_track_add_frame(pts_track_t * const trk, const AVFrame * const src) - { - const uint32_t n = pts_track_next_n(trk); -@@ -406,6 +441,12 @@ fmt_pixelformat(const struct v4l2_format * const fmt) - return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; - } - -+static inline uint32_t -+buf_bytesused0(const struct v4l2_buffer * const buf) -+{ -+ return V4L2_TYPE_IS_MULTIPLANAR(buf->type) ? buf->m.planes[0].bytesused : buf->bytesused; -+} -+ - static void - init_format(V4L2Queue * const q, const uint32_t format_type) - { -@@ -1469,12 +1510,24 @@ static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int tim - - av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__); - -+ if (queue->eos) { -+ av_log(ctx->logctx, AV_LOG_TRACE, ">>> %s: EOS\n", __func__); -+ return AVERROR_EOF; -+ } -+ - avbuf = deint_v4l2m2m_dequeue_buffer(queue, timeout); - if (!avbuf) { - av_log(ctx->logctx, AV_LOG_DEBUG, "%s: No buffer to dequeue (timeout=%d)\n", __func__, timeout); - return AVERROR(EAGAIN); - } - -+ if (V4L2_TYPE_IS_CAPTURE(avbuf->buffer.type)) { -+ if ((avbuf->buffer.flags & V4L2_BUF_FLAG_LAST) != 0) -+ queue->eos = 1; -+ if (buf_bytesused0(&avbuf->buffer) == 0) -+ return queue->eos ? AVERROR_EOF : AVERROR(EINVAL); -+ } -+ - // Fill in PTS and anciliary info from src frame - pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame); - -@@ -1686,6 +1739,20 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) - else - ctx->field_order = V4L2_FIELD_INTERLACED_BT; - -+ { -+ struct v4l2_encoder_cmd ecmd = { -+ .cmd = V4L2_ENC_CMD_STOP -+ }; -+ ctx->has_enc_stop = 0; -+ if (ioctl(ctx->fd, VIDIOC_TRY_ENCODER_CMD, &ecmd) == 0) { -+ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop succeeded\n"); -+ ctx->has_enc_stop = 1; -+ } -+ else { -+ av_log(ctx->logctx, AV_LOG_DEBUG, "Test encode stop fail: %s\n", av_err2str(AVERROR(errno))); -+ } -+ -+ } - } - - ret = deint_v4l2m2m_enqueue_frame(output, in); -@@ -1694,6 +1761,41 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) - return ret; - } - -+static int -+ack_inlink(AVFilterContext * const avctx, DeintV4L2M2MContextShared *const s, -+ AVFilterLink * const inlink) -+{ -+ int instatus; -+ int64_t inpts; -+ -+ if (ff_inlink_acknowledge_status(inlink, &instatus, &inpts) <= 0) ++ // Sand cannot be cropped - do not try ++ if (av_rpi_is_sand_format(frame->format)) + return 0; ++#endif + -+ s->drain = instatus; -+ s->drain_pts = inpts; -+ s->drain_state = DRAIN_TIMEOUT; -+ -+ if (s->field_order == V4L2_FIELD_ANY) { // Not yet started -+ s->drain_state = DRAIN_DONE; -+ } -+ else if (s->one_to_one) { -+ s->drain_state = DRAIN_LAST; -+ } -+ else if (s->has_enc_stop) { -+ struct v4l2_encoder_cmd ecmd = { -+ .cmd = V4L2_ENC_CMD_STOP -+ }; -+ if (ioctl(s->fd, VIDIOC_ENCODER_CMD, &ecmd) == 0) { -+ av_log(avctx->priv, AV_LOG_DEBUG, "Do Encode stop\n"); -+ s->drain_state = DRAIN_EOS; -+ } -+ else { -+ av_log(avctx->priv, AV_LOG_WARNING, "Encode stop fail: %s\n", av_err2str(AVERROR(errno))); -+ } -+ } -+ return 1; -+} -+ - static int deint_v4l2m2m_activate(AVFilterContext *avctx) - { - DeintV4L2M2MContext * const priv = avctx->priv; -@@ -1702,15 +1804,13 @@ static int deint_v4l2m2m_activate(AVFilterContext *avctx) - AVFilterLink * const inlink = avctx->inputs[0]; - int n = 0; - int cn = 99; -- int instatus = 0; -- int64_t inpts = 0; - int did_something = 0; - - av_log(priv, AV_LOG_TRACE, "<<< %s\n", __func__); - - FF_FILTER_FORWARD_STATUS_BACK_ALL(outlink, avctx); - -- ff_inlink_acknowledge_status(inlink, &instatus, &inpts); -+ ack_inlink(avctx, s, inlink); - - if (!ff_outlink_frame_wanted(outlink)) { - av_log(priv, AV_LOG_TRACE, "%s: Not wanted out\n", __func__); -@@ -1720,7 +1820,6 @@ static int deint_v4l2m2m_activate(AVFilterContext *avctx) - AVFrame * frame = av_frame_alloc(); - int rv; - --again: - recycle_q(&s->output); - n = count_enqueued(&s->output); - -@@ -1729,10 +1828,21 @@ again: - return AVERROR(ENOMEM); - } - -- rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, n > 4 ? 300 : 0); -+ rv = deint_v4l2m2m_dequeue_frame(&s->capture, frame, -+ drain_frame_expected(s->drain_state) || n > 4 ? 300 : 0); - if (rv != 0) { - av_frame_free(&frame); -- if (rv != AVERROR(EAGAIN)) { -+ if (rv == AVERROR_EOF) { -+ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ EOF\n", __func__); -+ s->drain_state = DRAIN_DONE; -+ } -+ else if (rv == AVERROR(EAGAIN)) { -+ if (s->drain_state != DRAIN_NONE) { -+ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ empty - drain done\n", __func__); -+ s->drain_state = DRAIN_DONE; -+ } -+ } -+ else { - av_log(priv, AV_LOG_ERROR, ">>> %s: DQ fail: %s\n", __func__, av_err2str(rv)); - return rv; - } -@@ -1742,29 +1852,30 @@ again: - // frame is always consumed by filter_frame - even on error despite - // a somewhat confusing comment in the header - rv = ff_filter_frame(outlink, frame); -- -- if (instatus != 0) { -- av_log(priv, AV_LOG_TRACE, "%s: eof loop\n", __func__); -- goto again; -- } -+ ++s->frames_tx; - - av_log(priv, AV_LOG_TRACE, "%s: Filtered: %s\n", __func__, av_err2str(rv)); - did_something = 1; -+ -+ if (s->drain_state != DRAIN_NONE && pts_track_count(&s->track) == 0) { -+ av_log(priv, AV_LOG_DEBUG, "%s: --- DQ last - drain done\n", __func__); -+ s->drain_state = DRAIN_DONE; -+ } - } - - cn = count_enqueued(&s->capture); - } - -- if (instatus != 0) { -- ff_outlink_set_status(outlink, instatus, inpts); -- av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(instatus)); -+ if (s->drain_state == DRAIN_DONE) { -+ ff_outlink_set_status(outlink, s->drain, s->drain_pts); -+ av_log(priv, AV_LOG_TRACE, ">>> %s: Status done: %s\n", __func__, av_err2str(s->drain)); - return 0; - } - - recycle_q(&s->output); - n = count_enqueued(&s->output); - -- while (n < 6) { -+ while (n < 6 && !s->drain) { - AVFrame * frame; - int rv; - -@@ -1775,8 +1886,13 @@ again: - - if (frame == NULL) { - av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); -+ if (!ack_inlink(avctx, s, inlink)) { -+ ff_inlink_request_frame(inlink); -+ av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__); -+ } - break; - } -+ ++s->frames_rx; - - rv = deint_v4l2m2m_filter_frame(inlink, frame); - av_frame_free(&frame); -@@ -1785,16 +1901,11 @@ again: - return rv; - - av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); -- ++n; -- } -- -- if (n < 6) { -- ff_inlink_request_frame(inlink); - did_something = 1; -- av_log(priv, AV_LOG_TRACE, "%s: req frame\n", __func__); -+ ++n; - } - -- if (n > 4 && ff_outlink_frame_wanted(outlink)) { -+ if ((n > 4 || s->drain) && ff_outlink_frame_wanted(outlink)) { - ff_filter_set_ready(avctx, 1); - did_something = 1; - av_log(priv, AV_LOG_TRACE, "%s: ready\n", __func__); -@@ -1873,7 +1984,18 @@ static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) - - static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx) - { -- return common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE); -+ int rv; -+ DeintV4L2M2MContext * priv; -+ DeintV4L2M2MContextShared * ctx; -+ -+ if ((rv = common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE)) != 0) -+ return rv; -+ -+ priv = avctx->priv; -+ ctx = priv->shared; -+ -+ ctx->one_to_one = 1; -+ return 0; - } - - static void deint_v4l2m2m_uninit(AVFilterContext *avctx) -@@ -1881,6 +2003,8 @@ static void deint_v4l2m2m_uninit(AVFilterContext *avctx) - DeintV4L2M2MContext *priv = avctx->priv; - DeintV4L2M2MContextShared *ctx = priv->shared; - -+ av_log(priv, AV_LOG_VERBOSE, "Frames Rx: %u, Frames Tx: %u\n", -+ ctx->frames_rx, ctx->frames_tx); - ctx->done = 1; - ctx->logctx = NULL; // Log to NULL works, log to missing crashes - pts_track_uninit(&ctx->track); - -From 87f4f5cf8cadb5db52d474138e20dbcf53d865ed Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 5 Oct 2022 16:12:02 +0000 -Subject: [PATCH 084/186] v4l2_m2m_dec: Rework decode pending heuristic - -The old code measured the length of the entire Q in the decoder and -attempted to dynamically guess an appropriate length. This was prone to -failure when the guesswork became confused. -The new code attempts to measure the Q length before insertion into decode -which, after all, is what we actually care about. It does this by -asserting that the decoder must have consumed all packets that came -before the one associated with the most recent CAPTURE frame. This -avoids all need for reorder buffer size guesswork. ---- - libavcodec/v4l2_m2m.h | 2 - - libavcodec/v4l2_m2m_dec.c | 77 +++++++++++++++++---------------------- - 2 files changed, 34 insertions(+), 45 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index babf101d650a..26a7161042b5 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -107,8 +107,6 @@ typedef struct V4L2m2mContext { - - /* Frame tracking */ - xlat_track_t xlat; -- int pending_hw; -- int pending_n; - - pts_stats_t pts_stat; - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 6bd9926b3f31..bec9b22fcf3f 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -349,41 +349,54 @@ static void - xlat_flush(xlat_track_t * const x) - { - unsigned int i; -+ // Do not reset track_no - this ensures that any frames left in the decoder -+ // that turn up later get discarded. -+ -+ x->last_pts = AV_NOPTS_VALUE; -+ x->last_opaque = 0; - for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i) { - x->track_els[i].pending = 0; - x->track_els[i].discard = 1; - } -- x->last_pts = AV_NOPTS_VALUE; -+} -+ -+static void -+xlat_init(xlat_track_t * const x) -+{ -+ memset(x, 0, sizeof(*x)); -+ xlat_flush(x); - } - - static int - xlat_pending(const xlat_track_t * const x) - { - unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE; -- unsigned int i; -- int r = 0; -- int64_t now = AV_NOPTS_VALUE; -+ int i; -+ const int64_t now = x->last_pts; - -- for (i = 0; i < 32; ++i, n = (n - 1) % FF_V4L2_M2M_TRACK_SIZE) { -+ for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) { - const V4L2m2mTrackEl * const t = x->track_els + n; - -+ // Discard only set on never-set or flushed entries -+ // So if we get here we've never successfully decoded a frame so allow -+ // more frames into the buffer before stalling -+ if (t->discard) -+ return i - 16; -+ -+ // If we've got this frame out then everything before this point -+ // must have entered the decoder - if (!t->pending) -- continue; -+ break; - -+ // If we've never seen a pts all we can do is count frames - if (now == AV_NOPTS_VALUE) -- now = t->dts; -+ continue; - -- if (t->pts == AV_NOPTS_VALUE || -- ((now == AV_NOPTS_VALUE || t->pts <= now) && -- (x->last_pts == AV_NOPTS_VALUE || t->pts > x->last_pts))) -- ++r; -+ if (t->dts != AV_NOPTS_VALUE && now >= t->dts) -+ break; - } - -- // If we never get any ideas about PTS vs DTS allow a lot more buffer -- if (now == AV_NOPTS_VALUE) -- r -= 16; -- -- return r; -+ return i; - } - - static inline int stream_started(const V4L2m2mContext * const s) { -@@ -557,18 +570,6 @@ static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx) - return rv; - } - --// Number of frames over what xlat_pending returns that we keep *16 --// This is a min value - if it appears to be too small the threshold should --// adjust dynamically. --#define PENDING_HW_MIN (3 * 16) --// Offset to use when setting dynamically --// Set to %16 == 15 to avoid the threshold changing immediately as we relax --#define PENDING_HW_OFFSET (PENDING_HW_MIN - 1) --// Number of consecutive times we've failed to get a frame when we prefer it --// before we increase the prefer threshold (5ms * N = max expected decode --// time) --#define PENDING_N_THRESHOLD 6 -- - static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - { - V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; -@@ -578,9 +579,11 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - - do { - const int pending = xlat_pending(&s->xlat); -- const int prefer_dq = (pending > s->pending_hw / 16); -+ const int prefer_dq = (pending > 3); - const int last_src_rv = src_rv; - -+ av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt); -+ - // Enqueue another pkt for decode if - // (a) We don't have a lot of stuff in the buffer already OR - // (b) ... we (think we) do but we've failed to get a frame already OR -@@ -625,20 +628,8 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - } - } - -- // Adjust dynamic pending threshold -- if (dst_rv == 0) { -- if (--s->pending_hw < PENDING_HW_MIN) -- s->pending_hw = PENDING_HW_MIN; -- s->pending_n = 0; -- -+ if (dst_rv == 0) - set_best_effort_pts(avctx, &s->pts_stat, frame); -- } -- else if (dst_rv == AVERROR(EAGAIN)) { -- if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) { -- s->pending_hw = pending * 16 + PENDING_HW_OFFSET; -- s->pending_n = 0; -- } -- } - - if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { - av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); -@@ -857,8 +848,8 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - if (ret < 0) - return ret; - -+ xlat_init(&s->xlat); - pts_stats_init(&s->pts_stat, avctx, "decoder"); -- s->pending_hw = PENDING_HW_MIN; - - capture = &s->capture; - output = &s->output; - -From a7bdc67135c3d0e69b0f8b58bb317194bbabe105 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 21 Oct 2022 13:48:07 +0000 -Subject: [PATCH 085/186] pthread_frame: Fix MT hwaccel. Recent change broke - it. - -Revert the effects of 35aa7e70e7ec350319e7634a30d8d8aa1e6ecdda if the -hwaccel is marked MT_SAFE. ---- - libavcodec/pthread_frame.c | 48 ++++++++++++++++++++++++++++---------- - 1 file changed, 36 insertions(+), 12 deletions(-) - -diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c -index 2cc89a41f55f..b14f8e9360b5 100644 ---- a/libavcodec/pthread_frame.c -+++ b/libavcodec/pthread_frame.c -@@ -231,7 +231,7 @@ static attribute_align_arg void *frame_worker_thread(void *arg) - p->hwaccel_serializing = 0; - pthread_mutex_unlock(&p->parent->hwaccel_mutex); - } -- av_assert0(!avctx->hwaccel); -+ av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); - - if (p->async_serializing) { - p->async_serializing = 0; -@@ -319,6 +319,12 @@ FF_ENABLE_DEPRECATION_WARNINGS - } - - dst->hwaccel_flags = src->hwaccel_flags; -+ if (src->hwaccel && -+ (src->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { -+ dst->hwaccel = src->hwaccel; -+ dst->hwaccel_context = src->hwaccel_context; -+ dst->internal->hwaccel_priv_data = src->internal->hwaccel_priv_data; -+ } - - err = av_buffer_replace(&dst->internal->pool, src->internal->pool); - if (err < 0) -@@ -434,10 +440,13 @@ static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx, - } - - /* transfer the stashed hwaccel state, if any */ -- av_assert0(!p->avctx->hwaccel); -- FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); -- FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); -- FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); -+ av_assert0(!p->avctx->hwaccel || (p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); -+ if (p->avctx->hwaccel && -+ !(p->avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { -+ FFSWAP(const AVHWAccel*, p->avctx->hwaccel, fctx->stash_hwaccel); -+ FFSWAP(void*, p->avctx->hwaccel_context, fctx->stash_hwaccel_context); -+ FFSWAP(void*, p->avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); -+ } - - av_packet_unref(p->avpkt); - ret = av_packet_ref(p->avpkt, avpkt); -@@ -610,9 +619,12 @@ void ff_thread_finish_setup(AVCodecContext *avctx) { - * this is done here so that this worker thread can wipe its own hwaccel - * state after decoding, without requiring synchronization */ - av_assert0(!p->parent->stash_hwaccel); -- p->parent->stash_hwaccel = avctx->hwaccel; -- p->parent->stash_hwaccel_context = avctx->hwaccel_context; -- p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; -+ if (avctx->hwaccel && -+ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { -+ p->parent->stash_hwaccel = avctx->hwaccel; -+ p->parent->stash_hwaccel_context = avctx->hwaccel_context; -+ p->parent->stash_hwaccel_priv = avctx->internal->hwaccel_priv_data; -+ } - - pthread_mutex_lock(&p->progress_mutex); - if(atomic_load(&p->state) == STATE_SETUP_FINISHED){ -@@ -667,6 +679,15 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count) - - park_frame_worker_threads(fctx, thread_count); - -+ if (fctx->prev_thread && -+ avctx->hwaccel && (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) && -+ avctx->internal->hwaccel_priv_data != -+ fctx->prev_thread->avctx->internal->hwaccel_priv_data) { -+ if (update_context_from_thread(avctx, fctx->prev_thread->avctx, 1) < 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to update user thread.\n"); -+ } -+ } -+ - for (i = 0; i < thread_count; i++) { - PerThreadContext *p = &fctx->threads[i]; - AVCodecContext *ctx = p->avctx; -@@ -710,10 +731,13 @@ void ff_frame_thread_free(AVCodecContext *avctx, int thread_count) - - /* if we have stashed hwaccel state, move it to the user-facing context, - * so it will be freed in avcodec_close() */ -- av_assert0(!avctx->hwaccel); -- FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel); -- FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context); -- FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); -+ av_assert0(!avctx->hwaccel || (avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)); -+ if (avctx->hwaccel && -+ !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) { -+ FFSWAP(const AVHWAccel*, avctx->hwaccel, fctx->stash_hwaccel); -+ FFSWAP(void*, avctx->hwaccel_context, fctx->stash_hwaccel_context); -+ FFSWAP(void*, avctx->internal->hwaccel_priv_data, fctx->stash_hwaccel_priv); -+ } - - av_freep(&avctx->internal->thread_ctx); - } - -From 9d2cf061d17a54ac854fb53574fa1006968cedeb Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 18 Oct 2022 13:18:27 +0000 -Subject: [PATCH 086/186] v4l2_req: Add swfmt to init logging - -(cherry picked from commit dfa03b702baaf2952bcd2bbf8badcc2f9c961ddf) ---- - libavcodec/v4l2_request_hevc.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -index 614a1b4d99e4..767ecb036ad2 100644 ---- a/libavcodec/v4l2_request_hevc.c -+++ b/libavcodec/v4l2_request_hevc.c -@@ -26,6 +26,7 @@ - #include "v4l2_request_hevc.h" - - #include "libavutil/hwcontext_drm.h" -+#include "libavutil/pixdesc.h" - - #include "v4l2_req_devscan.h" - #include "v4l2_req_dmabufs.h" -@@ -306,10 +307,11 @@ retry_src_memtype: - // Set our s/w format - avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format; - -- av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s\n", -+ av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s; buffers: src %s, dst %s; swfmt=%s\n", - ctx->fns->name, - decdev_media_path(decdev), decdev_video_path(decdev), -- mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype)); -+ mediabufs_memory_name(src_memtype), mediabufs_memory_name(dst_memtype), -+ av_get_pix_fmt_name(avctx->sw_pix_fmt)); - - return 0; - - -From c848f442ed183086c17ca83a9401f33e6b36ce80 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 18 Oct 2022 13:39:54 +0000 -Subject: [PATCH 087/186] v4l2_m2m: Avoid polling on a queue that is streamoff - -(cherry picked from commit b2658bc56d3034a17db7f39597fc7d71bfe9a43b) ---- - libavcodec/v4l2_context.c | 13 +++++++++---- - 1 file changed, 9 insertions(+), 4 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 4a359bf45e30..b296dc111c1c 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -578,6 +578,11 @@ get_event(V4L2m2mContext * const m) - return 0; - } - -+static inline int -+dq_ok(const V4L2Context * const c) -+{ -+ return c->streamon && atomic_load(&c->q_count) != 0; -+} - - // Get a buffer - // If output then just gets the buffer in the expected way -@@ -613,13 +618,13 @@ get_qbuf(V4L2Context * const ctx, V4L2Buffer ** const ppavbuf, const int timeout - } - - // If capture && timeout == -1 then also wait for rx buffer free -- if (is_cap && timeout == -1 && m->output.streamon && !m->draining) -+ if (is_cap && timeout == -1 && dq_ok(&m->output) && !m->draining) - pfd.events |= poll_out; - - // If nothing Qed all we will get is POLLERR - avoid that -- if ((pfd.events == poll_out && atomic_load(&m->output.q_count) == 0) || -- (pfd.events == poll_cap && atomic_load(&m->capture.q_count) == 0) || -- (pfd.events == (poll_cap | poll_out) && atomic_load(&m->capture.q_count) == 0 && atomic_load(&m->output.q_count) == 0)) { -+ if ((pfd.events == poll_out && !dq_ok(&m->output)) || -+ (pfd.events == poll_cap && !dq_ok(&m->capture)) || -+ (pfd.events == (poll_cap | poll_out) && !dq_ok(&m->capture) && !dq_ok(&m->output))) { - av_log(avctx, AV_LOG_TRACE, "V4L2 poll %s empty\n", ctx->name); - return AVERROR(ENOSPC); - } - -From 3e501d4ca4266ef8ffbf19808c42ccb3e40d6392 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 18 Oct 2022 14:07:04 +0000 -Subject: [PATCH 088/186] v4l2_m2m: Add function to get number of queued - buffers - -(cherry picked from commit f9ac6485c00b4531dcff354222aef450b29728f4) ---- - libavcodec/v4l2_context.h | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 523c53e97dc5..8e4f68164351 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -220,4 +220,15 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f); - - void ff_v4l2_dq_all(V4L2Context *const ctx); - -+/** -+ * Returns the number of buffers currently queued -+ * -+ * @param[in] ctx The V4L2Context to evaluate -+ */ -+static inline int -+ff_v4l2_context_q_count(const V4L2Context* const ctx) -+{ -+ return atomic_load(&ctx->q_count); -+} -+ - #endif // AVCODEC_V4L2_CONTEXT_H - -From c0eac42165afdf7a7efad03bd140506263948940 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 18 Oct 2022 14:48:20 +0000 -Subject: [PATCH 089/186] v4l2_m2m: Add timeouts to dq_all and dequeue_packet - -Add timeouts and use them to have better flow control in encode - -(cherry picked from commit c6173cad7f21697e12887982bda796de9719bb32) ---- - libavcodec/v4l2_context.c | 16 +++++++++++----- - libavcodec/v4l2_context.h | 15 +++++++++++++-- - libavcodec/v4l2_m2m_enc.c | 28 +++++++++++++++++++--------- - 3 files changed, 43 insertions(+), 16 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index b296dc111c1c..7031f3d3409d 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -712,13 +712,19 @@ clean_v4l2_buffer(V4L2Buffer * const avbuf) - return avbuf; - } - --void --ff_v4l2_dq_all(V4L2Context *const ctx) -+int -+ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1) - { - V4L2Buffer * avbuf; -+ if (timeout1 != 0) { -+ int rv = get_qbuf(ctx, &avbuf, timeout1); -+ if (rv != 0) -+ return rv; -+ } - do { - get_qbuf(ctx, &avbuf, 0); - } while (avbuf); -+ return 0; - } - - static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) -@@ -727,7 +733,7 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) - - /* get back as many output buffers as possible */ - if (V4L2_TYPE_IS_OUTPUT(ctx->type)) -- ff_v4l2_dq_all(ctx); -+ ff_v4l2_dq_all(ctx, 0); - - for (i = 0; i < ctx->num_buffers; i++) { - V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data; -@@ -1047,7 +1053,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) - return 0; - } - --int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) -+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout) - { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); - AVCodecContext *const avctx = s->avctx; -@@ -1055,7 +1061,7 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) - int rv; - - do { -- if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) -+ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) - return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC - if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0) - return rv; -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 8e4f68164351..5afed3e6ecb4 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -179,7 +179,7 @@ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd); - * @param[inout] pkt The AVPacket to dequeue to. - * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error. + desc = av_pix_fmt_desc_get(frame->format); + if (!desc) + return AVERROR_BUG; +diff --git a/libavutil/frame.h b/libavutil/frame.h +index f7806566d54c..00c5c925e31d 100644 +--- a/libavutil/frame.h ++++ b/libavutil/frame.h +@@ -1037,6 +1037,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags); */ --int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); -+int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout); + const char *av_frame_side_data_name(enum AVFrameSideDataType type); ++ ++static inline int av_frame_cropped_width(const AVFrame * const frame) ++{ ++ return frame->width - (frame->crop_left + frame->crop_right); ++} ++static inline int av_frame_cropped_height(const AVFrame * const frame) ++{ ++ return frame->height - (frame->crop_top + frame->crop_bottom); ++} ++ /** - * Dequeues a buffer from a V4L2Context to an AVFrame. -@@ -218,7 +218,18 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const - */ - int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f); - --void ff_v4l2_dq_all(V4L2Context *const ctx); -+/** -+ * Dequeue all buffers on this queue -+ * -+ * Used to recycle output buffers -+ * -+ * @param[in] ctx The V4L2Context to dequeue from. -+ * @param[in] timeout1 A timeout on dequeuing the 1st buffer, -+ * all others have a timeout of zero -+ * @return AVERROR(EAGAIN) if timeout1 non-zero then the return -+ * of the first dequeue operation, 0 otherwise. -+ */ -+int ff_v4l2_dq_all(V4L2Context *const ctx, int timeout1); - - /** - * Returns the number of buffers currently queued -diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c -index a992a3cccc68..d0d27e5bc2f4 100644 ---- a/libavcodec/v4l2_m2m_enc.c -+++ b/libavcodec/v4l2_m2m_enc.c -@@ -420,16 +420,24 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) - { - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const output = &s->output; -+ int rv; -+ int needs_slot = ff_v4l2_context_q_count(output) == output->num_buffers; - -- ff_v4l2_dq_all(output); -+ av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot); - -- // Signal EOF if needed -+ // Signal EOF if needed (doesn't need q slot) - if (!frame) { - return ff_v4l2_context_enqueue_frame(output, frame); - } - -+ if ((rv = ff_v4l2_dq_all(output, needs_slot? 500 : 0)) != 0) { -+ // We should be able to return AVERROR(EAGAIN) to indicate buffer -+ // exhaustion, but ffmpeg currently treats that as fatal. -+ av_log(avctx, AV_LOG_WARNING, "Failed to get buffer for src frame: %s\n", av_err2str(rv)); -+ return rv; -+ } -+ - if (s->input_drm && !output->streamon) { -- int rv; - struct v4l2_format req_format = {.type = output->format.type}; - - // Set format when we first get a buffer -@@ -494,7 +502,9 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) - AVFrame *frame = s->frame; - int ret; - -- ff_v4l2_dq_all(output); -+ av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); -+ -+ ff_v4l2_dq_all(output, 0); - - if (s->draining) - goto dequeue; -@@ -532,10 +542,10 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) - } - - dequeue: -- ret = ff_v4l2_context_dequeue_packet(capture, avpkt); -- ff_v4l2_dq_all(output); -+ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, s->draining ? 300 : 0); -+ ff_v4l2_dq_all(output, 0); - if (ret) -- return ret; -+ return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret; - - if (capture->first_buf == 1) { - uint8_t * data; -@@ -566,8 +576,8 @@ dequeue: - s->extdata_size = len; - } - -- ret = ff_v4l2_context_dequeue_packet(capture, avpkt); -- ff_v4l2_dq_all(output); -+ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, 0); -+ ff_v4l2_dq_all(output, 0); - if (ret) - return ret; - } - -From f09618a055068582ebe7f6e704212b04f8d00bc7 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 18 Oct 2022 14:23:32 +0000 -Subject: [PATCH 090/186] v4l2_m2m_enc: Improve debug trace - -(cherry picked from commit 113e89daffb329a0cd3d920abd483a4025664bf5) ---- - libavcodec/v4l2_m2m_enc.c | 13 ++++++++++--- - 1 file changed, 10 insertions(+), 3 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c -index d0d27e5bc2f4..c8c2de3d4706 100644 ---- a/libavcodec/v4l2_m2m_enc.c -+++ b/libavcodec/v4l2_m2m_enc.c -@@ -427,6 +427,7 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) - - // Signal EOF if needed (doesn't need q slot) - if (!frame) { -+ av_log(avctx, AV_LOG_TRACE, "--- %s: EOS\n", __func__); - return ff_v4l2_context_enqueue_frame(output, frame); - } - -@@ -491,7 +492,12 @@ static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) - v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1); - #endif - -- return ff_v4l2_context_enqueue_frame(output, frame); -+ rv = ff_v4l2_context_enqueue_frame(output, frame); -+ if (rv) { -+ av_log(avctx, AV_LOG_ERROR, "Enqueue frame failed: %s\n", av_err2str(rv)); -+ } -+ -+ return rv; - } - - static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) -@@ -502,7 +508,8 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) - AVFrame *frame = s->frame; - int ret; - -- av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); -+ av_log(avctx, AV_LOG_TRACE, "<<< %s: qlen out %d cap %d\n", __func__, -+ ff_v4l2_context_q_count(output), ff_v4l2_context_q_count(capture)); - - ff_v4l2_dq_all(output, 0); - -@@ -615,11 +622,11 @@ dequeue: - avpkt->size = newlen; - } - --// av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret); - capture->first_buf = 0; - return 0; - - fail_no_mem: -+ av_log(avctx, AV_LOG_ERROR, "Rx pkt failed: No memory\n"); - ret = AVERROR(ENOMEM); - av_packet_unref(avpkt); - return ret; - -From 739483231401288794ff9e4acc253ef1129436cf Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 18 Oct 2022 13:22:36 +0000 -Subject: [PATCH 091/186] v4l2_m2m_enc: Copy dest packets to memory if short of - v4l2 buffers - -(cherry picked from commit aa4ebbda400b42db952fc713b26927fc8636b0e5) ---- - libavcodec/v4l2_m2m_enc.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c -index c8c2de3d4706..c23187e6e67a 100644 ---- a/libavcodec/v4l2_m2m_enc.c -+++ b/libavcodec/v4l2_m2m_enc.c -@@ -621,6 +621,22 @@ dequeue: - avpkt->data = buf->data; - avpkt->size = newlen; - } -+ else if (ff_v4l2_context_q_count(capture) < 2) { -+ // Avoid running out of capture buffers -+ // In most cases the buffers will be returned quickly in which case -+ // we don't copy and can use the v4l2 buffers directly but sometimes -+ // ffmpeg seems to hold onto all of them for a long time (.mkv -+ // creation?) so avoid deadlock in those cases. -+ AVBufferRef * const buf = av_buffer_alloc(avpkt->size + AV_INPUT_BUFFER_PADDING_SIZE); -+ if (buf == NULL) -+ goto fail_no_mem; -+ -+ memcpy(buf->data, avpkt->data, avpkt->size); -+ av_buffer_unref(&avpkt->buf); // Will recycle the V4L2 buffer -+ -+ avpkt->buf = buf; -+ avpkt->data = buf->data; -+ } - - capture->first_buf = 0; - return 0; - -From 680669b95cae532061fef61d972fb3c9c2b92d67 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 19 Oct 2022 11:00:16 +0000 -Subject: [PATCH 092/186] v4l2_m2m_dec: Fix pts_best_effort guessing for - initial pts - -(cherry picked from commit 1af32e5c87586a0f7e76cdf19a012ddbcf3eac67) ---- - libavcodec/v4l2_m2m_dec.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index bec9b22fcf3f..47b2735f8252 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -113,6 +113,8 @@ log_dump(void * logctx, int lvl, const void * const data, const size_t len) - - static int64_t pts_stats_guess(const pts_stats_t * const stats) - { -+ if (stats->last_count <= 1) -+ return stats->last_pts; - if (stats->last_pts == AV_NOPTS_VALUE || - stats->last_interval == 0 || - stats->last_count >= STATS_LAST_COUNT_MAX) - -From 3da063bb46e1d4ed5804d97230d74762076e1b13 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 19 Oct 2022 14:47:04 +0000 -Subject: [PATCH 093/186] v4l2_m2m_enc: Wait for frame or space in src Q in - rx_pkt - -If receive_packet we should ensure that there is space in the source Q -if we return EAGAIN so wait for either an output packet or space if -the source Q is currently full. - -(cherry picked from commit 82f0c55782a67a8cc665d937647706c2a75f5548) ---- - libavcodec/v4l2_m2m_enc.c | 22 +++++++++++++++++++--- - 1 file changed, 19 insertions(+), 3 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c -index c23187e6e67a..524e9424a5e8 100644 ---- a/libavcodec/v4l2_m2m_enc.c -+++ b/libavcodec/v4l2_m2m_enc.c -@@ -415,13 +415,17 @@ static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * - return 1; - } - -+static inline int q_full(const V4L2Context *const output) -+{ -+ return ff_v4l2_context_q_count(output) == output->num_buffers; -+} - - static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) - { - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const output = &s->output; - int rv; -- int needs_slot = ff_v4l2_context_q_count(output) == output->num_buffers; -+ const int needs_slot = q_full(output); - - av_log(avctx, AV_LOG_TRACE, "<<< %s; needs_slot=%d\n", __func__, needs_slot); - -@@ -549,8 +553,20 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) - } - - dequeue: -- ret = ff_v4l2_context_dequeue_packet(capture, avpkt, s->draining ? 300 : 0); -- ff_v4l2_dq_all(output, 0); -+ // Dequeue a frame -+ for (;;) { -+ int t = q_full(output) ? -1 : s->draining ? 300 : 0; -+ int rv2; -+ -+ // If output is full wait for either a packet or output to become not full -+ ret = ff_v4l2_context_dequeue_packet(capture, avpkt, t); -+ -+ // If output was full retry packet dequeue -+ t = (ret != AVERROR(EAGAIN) || t != -1) ? 0 : 300; -+ rv2 = ff_v4l2_dq_all(output, t); -+ if (t == 0 || rv2 != 0) -+ break; -+ } - if (ret) - return (s->draining && ret == AVERROR(EAGAIN)) ? AVERROR_EOF : ret; - - -From 90a2740b29653aaddb67b309899f50475b76c330 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 19 Oct 2022 14:54:29 +0000 -Subject: [PATCH 094/186] vf_deinterlace_v4l2m2m: Print dts rather that NOPTS - in trace - -(cherry picked from commit e9b468f35f0c6ad9bfe96f5a05e449afa8ae074a) ---- - libavfilter/vf_deinterlace_v4l2m2m.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -index ce875c2c619c..7c6751b69c65 100644 ---- a/libavfilter/vf_deinterlace_v4l2m2m.c -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -1668,8 +1668,8 @@ static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) - V4L2Queue *output = &ctx->output; - int ret; - -- av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" (%"PRId64") field :%d interlaced: %d aspect:%d/%d\n", -- __func__, in->pts, AV_NOPTS_VALUE, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den); -+ av_log(priv, AV_LOG_DEBUG, "<<< %s: input pts: %"PRId64" dts: %"PRId64" field :%d interlaced: %d aspect:%d/%d\n", -+ __func__, in->pts, in->pkt_dts, in->top_field_first, in->interlaced_frame, in->sample_aspect_ratio.num, in->sample_aspect_ratio.den); - av_log(priv, AV_LOG_DEBUG, "--- %s: in status in %d/ot %d; out status in %d/out %d\n", __func__, - avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out); - - -From 56e2c1564e5288ad8642bb4e1954505586566765 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 19 Oct 2022 14:55:21 +0000 -Subject: [PATCH 095/186] vf_deinterlace_v4l2m2m: Ignore "wanted" when - processing input - -If we gate send a frame to the outlink on its frame_wanted flag then we -will sometimes stall as the flag may not get set by ffmpeg's filter -processing. So stuff the output whether or not it wants it which works -much better. - -(cherry picked from commit 808254cc04e5e6574cbab9af254b6c2f3d4142e3) ---- - libavfilter/vf_deinterlace_v4l2m2m.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - -diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c -index 7c6751b69c65..a173a291f837 100644 ---- a/libavfilter/vf_deinterlace_v4l2m2m.c -+++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -1812,10 +1812,7 @@ static int deint_v4l2m2m_activate(AVFilterContext *avctx) - - ack_inlink(avctx, s, inlink); - -- if (!ff_outlink_frame_wanted(outlink)) { -- av_log(priv, AV_LOG_TRACE, "%s: Not wanted out\n", __func__); -- } -- else if (s->field_order != V4L2_FIELD_ANY) // Can't DQ if no setup! -+ if (s->field_order != V4L2_FIELD_ANY) // Can't DQ if no setup! - { - AVFrame * frame = av_frame_alloc(); - int rv; - -From ec50574d7b8ae1188e3d1f4f526e09a827485052 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 19 Oct 2022 15:00:43 +0000 -Subject: [PATCH 096/186] conf_native: Add --enable-gpl - -(cherry picked from commit bab9bf4a2e39391940d88af2ce5d70236ac21f15) ---- - pi-util/conf_native.sh | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh -index f22d531ca448..082d9b58320e 100755 ---- a/pi-util/conf_native.sh -+++ b/pi-util/conf_native.sh -@@ -94,6 +94,7 @@ $FFSRC/configure \ - --enable-libdrm\ - --enable-vout-egl\ - --enable-vout-drm\ -+ --enable-gpl\ - $SHARED_LIBS\ - $RPIOPTS\ - --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\ - -From 491ca2cc58d75d0fe16ba6be000e92c47219e7f2 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 15 Nov 2022 13:33:00 +0000 -Subject: [PATCH 097/186] egl_vout: Make formatting consistent - no code - changes - ---- - libavdevice/egl_vout.c | 741 ++++++++++++++++++++--------------------- - 1 file changed, 369 insertions(+), 372 deletions(-) - -diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c -index 7b9c610ace28..a52cabb082e9 100644 ---- a/libavdevice/egl_vout.c -+++ b/libavdevice/egl_vout.c -@@ -48,20 +48,20 @@ - #define TRACE_ALL 0 - - struct egl_setup { -- int conId; -- -- Display *dpy; -- EGLDisplay egl_dpy; -- EGLContext ctx; -- EGLSurface surf; -- Window win; -- -- uint32_t crtcId; -- int crtcIdx; -- uint32_t planeId; -- struct { -- int x, y, width, height; -- } compose; -+ int conId; -+ -+ Display *dpy; -+ EGLDisplay egl_dpy; -+ EGLContext ctx; -+ EGLSurface surf; -+ Window win; -+ -+ uint32_t crtcId; -+ int crtcIdx; -+ uint32_t planeId; -+ struct { -+ int x, y, width, height; -+ } compose; - }; - - typedef struct egl_aux_s { -@@ -70,8 +70,7 @@ typedef struct egl_aux_s { - - } egl_aux_t; - --typedef struct egl_display_env_s --{ -+typedef struct egl_display_env_s { - AVClass *class; - - struct egl_setup setup; -@@ -89,8 +88,8 @@ typedef struct egl_display_env_s - sem_t display_start_sem; - sem_t q_sem; - int q_terminate; -- AVFrame * q_this; -- AVFrame * q_next; -+ AVFrame *q_this; -+ AVFrame *q_next; - - } egl_display_env_t; - -@@ -99,45 +98,44 @@ typedef struct egl_display_env_s - * Remove window border/decorations. - */ - static void --no_border( Display *dpy, Window w) -+no_border(Display *dpy, Window w) - { -- static const unsigned MWM_HINTS_DECORATIONS = (1 << 1); -- static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5; -- -- typedef struct -- { -- unsigned long flags; -- unsigned long functions; -- unsigned long decorations; -- long inputMode; -- unsigned long status; -- } PropMotifWmHints; -- -- PropMotifWmHints motif_hints; -- Atom prop, proptype; -- unsigned long flags = 0; -- -- /* setup the property */ -- motif_hints.flags = MWM_HINTS_DECORATIONS; -- motif_hints.decorations = flags; -- -- /* get the atom for the property */ -- prop = XInternAtom( dpy, "_MOTIF_WM_HINTS", True ); -- if (!prop) { -- /* something went wrong! */ -- return; -- } -- -- /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */ -- proptype = prop; -- -- XChangeProperty( dpy, w, /* display, window */ -+ static const unsigned MWM_HINTS_DECORATIONS = (1 << 1); -+ static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5; -+ -+ typedef struct { -+ unsigned long flags; -+ unsigned long functions; -+ unsigned long decorations; -+ long inputMode; -+ unsigned long status; -+ } PropMotifWmHints; -+ -+ PropMotifWmHints motif_hints; -+ Atom prop, proptype; -+ unsigned long flags = 0; -+ -+ /* setup the property */ -+ motif_hints.flags = MWM_HINTS_DECORATIONS; -+ motif_hints.decorations = flags; -+ -+ /* get the atom for the property */ -+ prop = XInternAtom(dpy, "_MOTIF_WM_HINTS", True); -+ if (!prop) { -+ /* something went wrong! */ -+ return; -+ } -+ -+ /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */ -+ proptype = prop; -+ -+ XChangeProperty(dpy, w, /* display, window */ - prop, proptype, /* property, type */ - 32, /* format: 32-bit datums */ - PropModeReplace, /* mode */ -- (unsigned char *) &motif_hints, /* data */ -+ (unsigned char *)&motif_hints, /* data */ - PROP_MOTIF_WM_HINTS_ELEMENTS /* nelements */ -- ); -+ ); - } - - -@@ -146,247 +144,247 @@ no_border( Display *dpy, Window w) - * Return the window and context handles. - */ - static int --make_window(struct AVFormatContext * const s, -- egl_display_env_t * const de, -+make_window(struct AVFormatContext *const s, -+ egl_display_env_t *const de, - Display *dpy, EGLDisplay egl_dpy, const char *name, - Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet) - { -- int scrnum = DefaultScreen( dpy ); -- XSetWindowAttributes attr; -- unsigned long mask; -- Window root = RootWindow( dpy, scrnum ); -- Window win; -- EGLContext ctx; -- const int fullscreen = de->fullscreen; -- EGLConfig config; -- int x = de->window_x; -- int y = de->window_y; -- int width = de->window_width ? de->window_width : 1280; -- int height = de->window_height ? de->window_height : 720; -- -- -- if (fullscreen) { -- int scrnum = DefaultScreen(dpy); -- -- x = 0; y = 0; -- width = DisplayWidth(dpy, scrnum); -- height = DisplayHeight(dpy, scrnum); -- } -- -- { -- EGLint num_configs; -- static const EGLint attribs[] = { -- EGL_RED_SIZE, 1, -- EGL_GREEN_SIZE, 1, -- EGL_BLUE_SIZE, 1, -- EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, -- EGL_NONE -- }; -- -- if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { -- av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); -- return -1; -- } -- } -- -- { -- EGLint vid; -- if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { -- av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); -- return -1; -- } -- -- { -- XVisualInfo visTemplate = { -- .visualid = vid, -- }; -- int num_visuals; -- XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, -- &visTemplate, &num_visuals); -- -- /* window attributes */ -- attr.background_pixel = 0; -- attr.border_pixel = 0; -- attr.colormap = XCreateColormap( dpy, root, visinfo->visual, AllocNone); -- attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; -- /* XXX this is a bad way to get a borderless window! */ -- mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; -- -- win = XCreateWindow( dpy, root, x, y, width, height, -- 0, visinfo->depth, InputOutput, -- visinfo->visual, mask, &attr ); -- XFree(visinfo); -- } -- } -- -- if (fullscreen) -- no_border(dpy, win); -- -- /* set hints and properties */ -- { -- XSizeHints sizehints; -- sizehints.x = x; -- sizehints.y = y; -- sizehints.width = width; -- sizehints.height = height; -- sizehints.flags = USSize | USPosition; -- XSetNormalHints(dpy, win, &sizehints); -- XSetStandardProperties(dpy, win, name, name, -- None, (char **)NULL, 0, &sizehints); -- } -- -- eglBindAPI(EGL_OPENGL_ES_API); -- -- { -- static const EGLint ctx_attribs[] = { -- EGL_CONTEXT_CLIENT_VERSION, 2, -- EGL_NONE -- }; -- ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs ); -- if (!ctx) { -- av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); -- return -1; -- } -- } -- -- -- XMapWindow(dpy, win); -- -- { -- EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL); -- if (!surf) { -- av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); -- return -1; -- } -- -- if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { -- av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); -- return -1; -- } -- -- *winRet = win; -- *ctxRet = ctx; -- *surfRet = surf; -- } -- -- return 0; -+ int scrnum = DefaultScreen(dpy); -+ XSetWindowAttributes attr; -+ unsigned long mask; -+ Window root = RootWindow(dpy, scrnum); -+ Window win; -+ EGLContext ctx; -+ const int fullscreen = de->fullscreen; -+ EGLConfig config; -+ int x = de->window_x; -+ int y = de->window_y; -+ int width = de->window_width ? de->window_width : 1280; -+ int height = de->window_height ? de->window_height : 720; -+ -+ -+ if (fullscreen) { -+ int scrnum = DefaultScreen(dpy); -+ -+ x = 0; y = 0; -+ width = DisplayWidth(dpy, scrnum); -+ height = DisplayHeight(dpy, scrnum); -+ } -+ -+ { -+ EGLint num_configs; -+ static const EGLint attribs[] = { -+ EGL_RED_SIZE, 1, -+ EGL_GREEN_SIZE, 1, -+ EGL_BLUE_SIZE, 1, -+ EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, -+ EGL_NONE -+ }; -+ -+ if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) { -+ av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n"); -+ return -1; -+ } -+ } -+ -+ { -+ EGLint vid; -+ if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) { -+ av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n"); -+ return -1; -+ } -+ -+ { -+ XVisualInfo visTemplate = { -+ .visualid = vid, -+ }; -+ int num_visuals; -+ XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask, -+ &visTemplate, &num_visuals); -+ -+ /* window attributes */ -+ attr.background_pixel = 0; -+ attr.border_pixel = 0; -+ attr.colormap = XCreateColormap(dpy, root, visinfo->visual, AllocNone); -+ attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; -+ /* XXX this is a bad way to get a borderless window! */ -+ mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; -+ -+ win = XCreateWindow(dpy, root, x, y, width, height, -+ 0, visinfo->depth, InputOutput, -+ visinfo->visual, mask, &attr); -+ XFree(visinfo); -+ } -+ } -+ -+ if (fullscreen) -+ no_border(dpy, win); -+ -+ /* set hints and properties */ -+ { -+ XSizeHints sizehints; -+ sizehints.x = x; -+ sizehints.y = y; -+ sizehints.width = width; -+ sizehints.height = height; -+ sizehints.flags = USSize | USPosition; -+ XSetNormalHints(dpy, win, &sizehints); -+ XSetStandardProperties(dpy, win, name, name, -+ None, (char **)NULL, 0, &sizehints); -+ } -+ -+ eglBindAPI(EGL_OPENGL_ES_API); -+ -+ { -+ static const EGLint ctx_attribs[] = { -+ EGL_CONTEXT_CLIENT_VERSION, 2, -+ EGL_NONE -+ }; -+ ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs); -+ if (!ctx) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); -+ return -1; -+ } -+ } -+ -+ -+ XMapWindow(dpy, win); -+ -+ { -+ EGLSurface surf = eglCreateWindowSurface(egl_dpy, config, (EGLNativeWindowType)win, NULL); -+ if (!surf) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n"); -+ return -1; -+ } -+ -+ if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) { -+ av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n"); -+ return -1; -+ } -+ -+ *winRet = win; -+ *ctxRet = ctx; -+ *surfRet = surf; -+ } -+ -+ return 0; - } - - static GLint --compile_shader(struct AVFormatContext * const avctx, GLenum target, const char *source) -+compile_shader(struct AVFormatContext *const avctx, GLenum target, const char *source) - { -- GLuint s = glCreateShader(target); -+ GLuint s = glCreateShader(target); - -- if (s == 0) { -- av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n"); -- return 0; -- } -+ if (s == 0) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n"); -+ return 0; -+ } - -- glShaderSource(s, 1, (const GLchar **) &source, NULL); -- glCompileShader(s); -+ glShaderSource(s, 1, (const GLchar **)&source, NULL); -+ glCompileShader(s); - -- { -- GLint ok; -- glGetShaderiv(s, GL_COMPILE_STATUS, &ok); -+ { -+ GLint ok; -+ glGetShaderiv(s, GL_COMPILE_STATUS, &ok); - -- if (!ok) { -- GLchar *info; -- GLint size; -+ if (!ok) { -+ GLchar *info; -+ GLint size; - -- glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); -- info = malloc(size); -+ glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size); -+ info = malloc(size); - -- glGetShaderInfoLog(s, size, NULL, info); -- av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); -+ glGetShaderInfoLog(s, size, NULL, info); -+ av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source); - -- return 0; -- } -- } -+ return 0; -+ } -+ } - -- return s; -+ return s; - } - --static GLuint link_program(struct AVFormatContext * const s, GLint vs, GLint fs) -+static GLuint link_program(struct AVFormatContext *const s, GLint vs, GLint fs) - { -- GLuint prog = glCreateProgram(); -- -- if (prog == 0) { -- av_log(s, AV_LOG_ERROR, "Failed to create program\n"); -- return 0; -- } -- -- glAttachShader(prog, vs); -- glAttachShader(prog, fs); -- glLinkProgram(prog); -- -- { -- GLint ok; -- glGetProgramiv(prog, GL_LINK_STATUS, &ok); -- if (!ok) { -- /* Some drivers return a size of 1 for an empty log. This is the size -- * of a log that contains only a terminating NUL character. -- */ -- GLint size; -- GLchar *info = NULL; -- glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); -- if (size > 1) { -- info = malloc(size); -- glGetProgramInfoLog(prog, size, NULL, info); -- } -+ GLuint prog = glCreateProgram(); - -- av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", -- (info != NULL) ? info : ""); -- return 0; -- } -- } -+ if (prog == 0) { -+ av_log(s, AV_LOG_ERROR, "Failed to create program\n"); -+ return 0; -+ } -+ -+ glAttachShader(prog, vs); -+ glAttachShader(prog, fs); -+ glLinkProgram(prog); -+ -+ { -+ GLint ok; -+ glGetProgramiv(prog, GL_LINK_STATUS, &ok); -+ if (!ok) { -+ /* Some drivers return a size of 1 for an empty log. This is the size -+ * of a log that contains only a terminating NUL character. -+ */ -+ GLint size; -+ GLchar *info = NULL; -+ glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size); -+ if (size > 1) { -+ info = malloc(size); -+ glGetProgramInfoLog(prog, size, NULL, info); -+ } - -- return prog; -+ av_log(s, AV_LOG_ERROR, "Failed to link: %s\n", -+ (info != NULL) ? info : ""); -+ return 0; -+ } -+ } -+ -+ return prog; - } - - static int --gl_setup(struct AVFormatContext * const s) -+gl_setup(struct AVFormatContext *const s) - { -- const char *vs = -- "attribute vec4 pos;\n" -- "varying vec2 texcoord;\n" -- "\n" -- "void main() {\n" -- " gl_Position = pos;\n" -- " texcoord.x = (pos.x + 1.0) / 2.0;\n" -- " texcoord.y = (-pos.y + 1.0) / 2.0;\n" -- "}\n"; -- const char *fs = -- "#extension GL_OES_EGL_image_external : enable\n" -- "precision mediump float;\n" -- "uniform samplerExternalOES s;\n" -- "varying vec2 texcoord;\n" -- "void main() {\n" -- " gl_FragColor = texture2D(s, texcoord);\n" -- "}\n"; -- -- GLuint vs_s; -- GLuint fs_s; -- GLuint prog; -- -- if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) || -- !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) || -- !(prog = link_program(s, vs_s, fs_s))) -- return -1; -- -- glUseProgram(prog); -- -- { -- static const float verts[] = { -- -1, -1, -- 1, -1, -- 1, 1, -- -1, 1, -- }; -- glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); -- } -- -- glEnableVertexAttribArray(0); -- return 0; -+ const char *vs = -+ "attribute vec4 pos;\n" -+ "varying vec2 texcoord;\n" -+ "\n" -+ "void main() {\n" -+ " gl_Position = pos;\n" -+ " texcoord.x = (pos.x + 1.0) / 2.0;\n" -+ " texcoord.y = (-pos.y + 1.0) / 2.0;\n" -+ "}\n"; -+ const char *fs = -+ "#extension GL_OES_EGL_image_external : enable\n" -+ "precision mediump float;\n" -+ "uniform samplerExternalOES s;\n" -+ "varying vec2 texcoord;\n" -+ "void main() {\n" -+ " gl_FragColor = texture2D(s, texcoord);\n" -+ "}\n"; -+ -+ GLuint vs_s; -+ GLuint fs_s; -+ GLuint prog; -+ -+ if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) || -+ !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) || -+ !(prog = link_program(s, vs_s, fs_s))) -+ return -1; -+ -+ glUseProgram(prog); -+ -+ { -+ static const float verts[] = { -+ -1, -1, -+ 1, -1, -+ 1, 1, -+ -1, 1, -+ }; -+ glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts); -+ } -+ -+ glEnableVertexAttribArray(0); -+ return 0; - } - - static int egl_vout_write_trailer(AVFormatContext *s) -@@ -400,12 +398,12 @@ static int egl_vout_write_trailer(AVFormatContext *s) - - static int egl_vout_write_header(AVFormatContext *s) - { -- const AVCodecParameters * const par = s->streams[0]->codecpar; -+ const AVCodecParameters *const par = s->streams[0]->codecpar; - - #if TRACE_ALL - av_log(s, AV_LOG_INFO, "%s\n", __func__); - #endif -- if ( s->nb_streams > 1 -+ if (s->nb_streams > 1 - || par->codec_type != AVMEDIA_TYPE_VIDEO - || par->codec_id != AV_CODEC_ID_WRAPPED_AVFRAME) { - av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n"); -@@ -416,10 +414,10 @@ static int egl_vout_write_header(AVFormatContext *s) - } - - --static int do_display(AVFormatContext * const s, egl_display_env_t * const de, AVFrame * const frame) -+static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVFrame *const frame) - { -- const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0]; -- egl_aux_t * da = NULL; -+ const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)frame->data[0]; -+ egl_aux_t *da = NULL; - unsigned int i; - - #if TRACE_ALL -@@ -440,26 +438,26 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A - - if (da->texture == 0) { - EGLint attribs[50]; -- EGLint * a = attribs; -+ EGLint *a = attribs; - int i, j; - static const EGLint anames[] = { -- EGL_DMA_BUF_PLANE0_FD_EXT, -- EGL_DMA_BUF_PLANE0_OFFSET_EXT, -- EGL_DMA_BUF_PLANE0_PITCH_EXT, -- EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, -- EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, -- EGL_DMA_BUF_PLANE1_FD_EXT, -- EGL_DMA_BUF_PLANE1_OFFSET_EXT, -- EGL_DMA_BUF_PLANE1_PITCH_EXT, -- EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT, -- EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT, -- EGL_DMA_BUF_PLANE2_FD_EXT, -- EGL_DMA_BUF_PLANE2_OFFSET_EXT, -- EGL_DMA_BUF_PLANE2_PITCH_EXT, -- EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT, -- EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT, -+ EGL_DMA_BUF_PLANE0_FD_EXT, -+ EGL_DMA_BUF_PLANE0_OFFSET_EXT, -+ EGL_DMA_BUF_PLANE0_PITCH_EXT, -+ EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT, -+ EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT, -+ EGL_DMA_BUF_PLANE1_FD_EXT, -+ EGL_DMA_BUF_PLANE1_OFFSET_EXT, -+ EGL_DMA_BUF_PLANE1_PITCH_EXT, -+ EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT, -+ EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT, -+ EGL_DMA_BUF_PLANE2_FD_EXT, -+ EGL_DMA_BUF_PLANE2_OFFSET_EXT, -+ EGL_DMA_BUF_PLANE2_PITCH_EXT, -+ EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT, -+ EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT, - }; -- const EGLint * b = anames; -+ const EGLint *b = anames; - - *a++ = EGL_WIDTH; - *a++ = av_frame_cropped_width(frame); -@@ -470,8 +468,8 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A - - for (i = 0; i < desc->nb_layers; ++i) { - for (j = 0; j < desc->layers[i].nb_planes; ++j) { -- const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j; -- const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index; -+ const AVDRMPlaneDescriptor *const p = desc->layers[i].planes + j; -+ const AVDRMObjectDescriptor *const obj = desc->objects + p->object_index; - *a++ = *b++; - *a++ = obj->fd; - *a++ = *b++; -@@ -479,13 +477,13 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A - *a++ = *b++; - *a++ = p->pitch; - if (obj->format_modifier == 0) { -- b += 2; -+ b += 2; - } - else { -- *a++ = *b++; -- *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF); -- *a++ = *b++; -- *a++ = (EGLint)(obj->format_modifier >> 32); -+ *a++ = *b++; -+ *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF); -+ *a++ = *b++; -+ *a++ = (EGLint)(obj->format_modifier >> 32); - } - } - } -@@ -494,26 +492,26 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A - - #if TRACE_ALL - for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) { -- av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]); -+ av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]); - } - #endif - { -- const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, -- EGL_NO_CONTEXT, -- EGL_LINUX_DMA_BUF_EXT, -- NULL, attribs); -- if (!image) { -- av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd); -- return -1; -- } -- -- glGenTextures(1, &da->texture); -- glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); -- glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -- glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -- glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); -- -- eglDestroyImageKHR(de->setup.egl_dpy, image); -+ const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy, -+ EGL_NO_CONTEXT, -+ EGL_LINUX_DMA_BUF_EXT, -+ NULL, attribs); -+ if (!image) { -+ av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd); -+ return -1; -+ } -+ -+ glGenTextures(1, &da->texture); -+ glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture); -+ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); -+ glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); -+ glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image); -+ -+ eglDestroyImageKHR(de->setup.egl_dpy, image); - } - - da->fd = desc->objects[0].fd; -@@ -540,7 +538,7 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A - (long long)modifiers[1], - (long long)modifiers[2], - (long long)modifiers[3] -- ); -+ ); - #endif - } - -@@ -558,55 +556,55 @@ static int do_display(AVFormatContext * const s, egl_display_env_t * const de, A - return 0; - } - --static void * display_thread(void * v) -+static void* display_thread(void *v) - { -- AVFormatContext * const s = v; -- egl_display_env_t * const de = s->priv_data; -+ AVFormatContext *const s = v; -+ egl_display_env_t *const de = s->priv_data; - - #if TRACE_ALL - av_log(s, AV_LOG_INFO, "<<< %s\n", __func__); - #endif - { -- EGLint egl_major, egl_minor; -- -- de->setup.dpy = XOpenDisplay(NULL); -- if (!de->setup.dpy) { -- av_log(s, AV_LOG_ERROR, "Couldn't open X display\n"); -- goto fail; -- } -- -- de->setup.egl_dpy = eglGetDisplay(de->setup.dpy); -- if (!de->setup.egl_dpy) { -- av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n"); -- goto fail; -- } -- -- if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) { -- av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n"); -- goto fail; -- } -- -- av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor); -- -- if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) { -- av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n"); -- goto fail; -- } -+ EGLint egl_major, egl_minor; -+ -+ de->setup.dpy = XOpenDisplay(NULL); -+ if (!de->setup.dpy) { -+ av_log(s, AV_LOG_ERROR, "Couldn't open X display\n"); -+ goto fail; -+ } -+ -+ de->setup.egl_dpy = eglGetDisplay(de->setup.dpy); -+ if (!de->setup.egl_dpy) { -+ av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n"); -+ goto fail; -+ } -+ -+ if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) { -+ av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n"); -+ goto fail; -+ } -+ -+ av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor); -+ -+ if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) { -+ av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n"); -+ goto fail; -+ } - } - - if (!de->window_width || !de->window_height) { -- de->window_width = 1280; -- de->window_height = 720; -+ de->window_width = 1280; -+ de->window_height = 720; - } - if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout", - &de->setup.win, &de->setup.ctx, &de->setup.surf)) { -- av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__); -- goto fail; -+ av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__); -+ goto fail; - } - - if (gl_setup(s)) { -- av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__); -- goto fail; -+ av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__); -+ goto fail; - } - - #if TRACE_ALL -@@ -615,7 +613,7 @@ static void * display_thread(void * v) - sem_post(&de->display_start_sem); - - for (;;) { -- AVFrame * frame; -+ AVFrame *frame; - - while (sem_wait(&de->q_sem) != 0) { - av_assert0(errno == EINTR); -@@ -653,9 +651,9 @@ fail: - - static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) - { -- const AVFrame * const src_frame = (AVFrame *)pkt->data; -- AVFrame * frame; -- egl_display_env_t * const de = s->priv_data; -+ const AVFrame *const src_frame = (AVFrame *)pkt->data; -+ AVFrame *frame; -+ egl_display_env_t *const de = s->priv_data; - - #if TRACE_ALL - av_log(s, AV_LOG_INFO, "%s\n", __func__); -@@ -668,8 +666,7 @@ static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) - else if (src_frame->format == AV_PIX_FMT_VAAPI) { - frame = av_frame_alloc(); - frame->format = AV_PIX_FMT_DRM_PRIME; -- if (av_hwframe_map(frame, src_frame, 0) != 0) -- { -+ if (av_hwframe_map(frame, src_frame, 0) != 0) { - av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format); - av_frame_free(&frame); - return AVERROR(EINVAL); -@@ -682,12 +679,12 @@ static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) - - // Really hacky sync - while (de->show_all && de->q_next) { -- usleep(3000); -+ usleep(3000); - } - - pthread_mutex_lock(&de->q_lock); - { -- AVFrame * const t = de->q_next; -+ AVFrame *const t = de->q_next; - de->q_next = frame; - frame = t; - } -@@ -702,7 +699,7 @@ static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt) - } - - static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe, -- unsigned flags) -+ unsigned flags) - { - av_log(s, AV_LOG_ERROR, "%s: NIF: idx=%d, flags=%#x\n", __func__, stream_index, flags); - return AVERROR_PATCHWELCOME; -@@ -713,7 +710,7 @@ static int egl_vout_control_message(AVFormatContext *s, int type, void *data, si - #if TRACE_ALL - av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type); - #endif -- switch(type) { -+ switch (type) { - case AV_APP_TO_DEV_WINDOW_REPAINT: - return 0; - default: -@@ -723,14 +720,14 @@ static int egl_vout_control_message(AVFormatContext *s, int type, void *data, si - } - - // deinit is called if init fails so no need to clean up explicity here --static int egl_vout_init(struct AVFormatContext * s) -+static int egl_vout_init(struct AVFormatContext *s) - { -- egl_display_env_t * const de = s->priv_data; -+ egl_display_env_t *const de = s->priv_data; - unsigned int i; - - av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); - -- de->setup = (struct egl_setup){0}; -+ de->setup = (struct egl_setup) { 0 }; - - for (i = 0; i != 32; ++i) { - de->aux[i].fd = -1; -@@ -744,8 +741,8 @@ static int egl_vout_init(struct AVFormatContext * s) - - sem_wait(&de->display_start_sem); - if (de->q_terminate) { -- av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__); -- return -1; -+ av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__); -+ return -1; - } - - av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__); -@@ -753,9 +750,9 @@ static int egl_vout_init(struct AVFormatContext * s) - return 0; - } - --static void egl_vout_deinit(struct AVFormatContext * s) -+static void egl_vout_deinit(struct AVFormatContext *s) - { -- egl_display_env_t * const de = s->priv_data; -+ egl_display_env_t *const de = s->priv_data; - - av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); - -@@ -773,11 +770,11 @@ static void egl_vout_deinit(struct AVFormatContext * s) - - #define OFFSET(x) offsetof(egl_display_env_t, x) - static const AVOption options[] = { -- { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, -- { "window_size", "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, -- { "window_x", "set window x offset", OFFSET(window_x), AV_OPT_TYPE_INT, {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, -- { "window_y", "set window y offset", OFFSET(window_y), AV_OPT_TYPE_INT, {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, -- { "fullscreen", "set fullscreen display", OFFSET(fullscreen), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "window_size", "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "window_x", "set window x offset", OFFSET(window_x), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "window_y", "set window y offset", OFFSET(window_y), AV_OPT_TYPE_INT, { .i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "fullscreen", "set fullscreen display", OFFSET(fullscreen), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, - { NULL } - - }; - -From a132ef51a831edfa36f52ba699922fdb06acd1b0 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 12 Dec 2022 16:49:43 +0000 -Subject: [PATCH 098/186] v4l2m2m: reporganise get_raw_format for loop logic - ---- - libavcodec/v4l2_context.c | 16 +++++----------- - 1 file changed, 5 insertions(+), 11 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 7031f3d3409d..79a31cf9300b 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -828,28 +828,22 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) - return 0; - } - -- for (;;) { -+ for (;; ++fdesc.index) { - ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc); - if (ret) - return AVERROR(EINVAL); - - if (priv->pix_fmt != AV_PIX_FMT_NONE) { -- if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) { -- fdesc.index++; -+ if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) - continue; -- } - } - - pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO); - ret = v4l2_try_raw_format(ctx, pixfmt); -- if (ret){ -- fdesc.index++; -- continue; -+ if (ret == 0) { -+ *p = pixfmt; -+ return 0; - } -- -- *p = pixfmt; -- -- return 0; - } - - return AVERROR(EINVAL); - -From 0189b1c3bb002b0385a419f4140371ea1ac4153c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 12 Dec 2022 17:49:12 +0000 -Subject: [PATCH 099/186] drm_vout: Set zpos on the plane we pick to ensure it - is at the front - ---- - libavdevice/drm_vout.c | 38 +++++++++++++++++++++++++++++++++----- - 1 file changed, 33 insertions(+), 5 deletions(-) - -diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c -index cfb33ce7c319..9bd9e04421d7 100644 ---- a/libavdevice/drm_vout.c -+++ b/libavdevice/drm_vout.c -@@ -115,9 +115,11 @@ static int find_plane(struct AVFormatContext * const avctx, - { - drmModePlaneResPtr planes; - drmModePlanePtr plane; -+ drmModeObjectPropertiesPtr props = NULL; -+ drmModePropertyPtr prop = NULL; - unsigned int i; - unsigned int j; -- int ret = 0; -+ int ret = -1; - - planes = drmModeGetPlaneResources(drmfd); - if (!planes) -@@ -154,11 +156,37 @@ static int find_plane(struct AVFormatContext * const avctx, - break; - } - -- if (i == planes->count_planes) -- ret = -1; -+ if (i == planes->count_planes) { -+ ret = -1; -+ goto fail; -+ } - -- drmModeFreePlaneResources(planes); -- return ret; -+ props = drmModeObjectGetProperties(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE); -+ if (!props) -+ goto fail; -+ for (i = 0; i != props->count_props; ++i) { -+ if (prop) -+ drmModeFreeProperty(prop); -+ prop = drmModeGetProperty(drmfd, props->props[i]); -+ if (!prop) -+ goto fail; -+ if (strcmp("zpos", prop->name) == 0) { -+ if (drmModeObjectSetProperty(drmfd, *pplane_id, DRM_MODE_OBJECT_PLANE, props->props[i], prop->values[1]) == 0) -+ av_log(avctx, AV_LOG_DEBUG, "ZPOS set to %d\n", (int)prop->values[1]); -+ else -+ av_log(avctx, AV_LOG_WARNING, "Failed to set ZPOS on DRM plane\n"); -+ break; -+ } -+ } -+ -+ ret = 0; -+fail: -+ if (props) -+ drmModeFreeObjectProperties(props); -+ if (prop) -+ drmModeFreeProperty(prop); -+ drmModeFreePlaneResources(planes); -+ return ret; - } - - static void da_uninit(drm_display_env_t * const de, drm_aux_t * da) - -From 386acb23dd6196fac68a39fa945a5b6b9c18c6a8 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 12 Dec 2022 17:51:46 +0000 -Subject: [PATCH 100/186] drm_vout: Only set modifier flag and pass modifiers - if there are some - ---- - libavdevice/drm_vout.c | 17 ++++++++++++----- - 1 file changed, 12 insertions(+), 5 deletions(-) - -diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c -index 9bd9e04421d7..a56adea86625 100644 ---- a/libavdevice/drm_vout.c -+++ b/libavdevice/drm_vout.c -@@ -34,6 +34,7 @@ - - #include - #include -+#include - - #define TRACE_ALL 0 - -@@ -249,6 +250,7 @@ static int do_display(AVFormatContext * const s, drm_display_env_t * const de, A - uint32_t offsets[4] = {0}; - uint64_t modifiers[4] = {0}; - uint32_t bo_handles[4] = {0}; -+ int has_mods = 0; - int i, j, n; - - da->frame = frame; -@@ -258,6 +260,9 @@ static int do_display(AVFormatContext * const s, drm_display_env_t * const de, A - av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR); - return -1; - } -+ if (desc->objects[i].format_modifier != DRM_FORMAT_MOD_LINEAR && -+ desc->objects[i].format_modifier != DRM_FORMAT_MOD_INVALID) -+ has_mods = 1; - } - - n = 0; -@@ -299,11 +304,13 @@ static int do_display(AVFormatContext * const s, drm_display_env_t * const de, A - #endif - - if (drmModeAddFB2WithModifiers(de->drm_fd, -- av_frame_cropped_width(frame), -- av_frame_cropped_height(frame), -- desc->layers[0].format, bo_handles, -- pitches, offsets, modifiers, -- &da->fb_handle, DRM_MODE_FB_MODIFIERS /** 0 if no mods */) != 0) { -+ av_frame_cropped_width(frame), -+ av_frame_cropped_height(frame), -+ desc->layers[0].format, bo_handles, -+ pitches, offsets, -+ has_mods ? modifiers : NULL, -+ &da->fb_handle, -+ has_mods ? DRM_MODE_FB_MODIFIERS : 0) != 0) { - av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR); - return -1; - } - -From 66cc08dbaec01e45af9c09a06829fcb2db58e30c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 12 Dec 2022 17:52:58 +0000 -Subject: [PATCH 101/186] drm_vout: Fix typo in error message - ---- - libavdevice/drm_vout.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c -index a56adea86625..351abf1d60aa 100644 ---- a/libavdevice/drm_vout.c -+++ b/libavdevice/drm_vout.c -@@ -596,7 +596,7 @@ static int drm_vout_init(struct AVFormatContext * s) - sem_init(&de->q_sem_out, 0, 0); - if (pthread_create(&de->q_thread, NULL, display_thread, s)) { - rv = AVERROR(errno); -- av_log(s, AV_LOG_ERROR, "Failed to creatye display thread: %s\n", av_err2str(rv)); -+ av_log(s, AV_LOG_ERROR, "Failed to create display thread: %s\n", av_err2str(rv)); - goto fail_close; - } - - -From e11c24968da620816853eb0a7d33cb3e9488afb1 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 12 Dec 2022 18:00:41 +0000 -Subject: [PATCH 102/186] drm_vout: Add option to name the drm_module to use - ---- - libavdevice/drm_vout.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c -index 351abf1d60aa..491e1dc60861 100644 ---- a/libavdevice/drm_vout.c -+++ b/libavdevice/drm_vout.c -@@ -70,7 +70,9 @@ typedef struct drm_display_env_s - uint32_t con_id; - struct drm_setup setup; - enum AVPixelFormat avfmt; -+ - int show_all; -+ const char * drm_module; - - unsigned int ano; - drm_aux_t aux[AUX_SIZE]; -@@ -569,7 +571,6 @@ static int drm_vout_init(struct AVFormatContext * s) - { - drm_display_env_t * const de = s->priv_data; - int rv; -- const char * drm_module = DRM_MODULE; - - av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__); - -@@ -578,10 +579,10 @@ static int drm_vout_init(struct AVFormatContext * s) - de->setup = (struct drm_setup){0}; - de->q_terminate = 0; - -- if ((de->drm_fd = drmOpen(drm_module, NULL)) < 0) -+ if ((de->drm_fd = drmOpen(de->drm_module, NULL)) < 0) - { - rv = AVERROR(errno); -- av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", drm_module, av_err2str(rv)); -+ av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", de->drm_module, av_err2str(rv)); - return rv; - } - -@@ -641,6 +642,7 @@ static void drm_vout_deinit(struct AVFormatContext * s) - #define OFFSET(x) offsetof(drm_display_env_t, x) - static const AVOption options[] = { - { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM }, -+ { "drm_module", "drm_module name to use, default=" DRM_MODULE, OFFSET(drm_module), AV_OPT_TYPE_STRING, { .str = DRM_MODULE }, 0, 0, AV_OPT_FLAG_ENCODING_PARAM }, - { NULL } - }; - - -From 397c9473723e936b86ff26fc5c0d5ba381874be3 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Dec 2022 13:01:00 +0000 -Subject: [PATCH 103/186] dmabufs: Rework to allow for non-CMA backends - ---- - libavcodec/v4l2_req_dmabufs.c | 161 ++++++++++++++++++++++++---------- - 1 file changed, 116 insertions(+), 45 deletions(-) - -diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c -index c4bbed18c680..1c3a5e861f0c 100644 ---- a/libavcodec/v4l2_req_dmabufs.c -+++ b/libavcodec/v4l2_req_dmabufs.c -@@ -1,3 +1,4 @@ -+#include - #include - #include + * @return side data descriptor corresponding to a given side data type, NULL + * when not available. +diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c +index 0847db09a08b..6365b7cf211b 100644 +--- a/libavutil/hwcontext_drm.c ++++ b/libavutil/hwcontext_drm.c +@@ -21,6 +21,7 @@ + #include + #include #include -@@ -19,9 +20,21 @@ ++#include - #define TRACE_ALLOC 0 - -+struct dmabufs_ctl; -+struct dmabuf_h; -+ -+struct dmabuf_fns { -+ int (*buf_alloc)(struct dmabufs_ctl * dbsc, struct dmabuf_h * dh, size_t size); -+ void (*buf_free)(struct dmabuf_h * dh); -+ int (*ctl_new)(struct dmabufs_ctl * dbsc); -+ void (*ctl_free)(struct dmabufs_ctl * dbsc); -+}; -+ - struct dmabufs_ctl { - int fd; - size_t page_size; -+ void * v; -+ const struct dmabuf_fns * fns; - }; - - struct dmabuf_h { -@@ -29,6 +42,8 @@ struct dmabuf_h { - size_t size; - size_t len; - void * mapptr; -+ void * v; -+ const struct dmabuf_fns * fns; - }; - - #if TRACE_ALLOC -@@ -88,15 +103,8 @@ struct dmabuf_h * dmabuf_import(int fd, size_t size) - struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size) - { - struct dmabuf_h * dh; -- struct dma_heap_allocation_data data = { -- .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1), -- .fd = 0, -- .fd_flags = O_RDWR, -- .heap_flags = 0 -- }; -- - if (old != NULL) { -- if (old->size == data.len) { -+ if (old->size >= size) { - return old; - } - dmabuf_free(old); -@@ -106,24 +114,16 @@ struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * ol - (dh = malloc(sizeof(*dh))) == NULL) - return NULL; - -- while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) { -- int err = errno; -- request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n", -- (uint64_t)data.len, -- dbsc->fd, -- err, -- strerror(err)); -- if (err == EINTR) -- continue; -- goto fail; -- } -- - *dh = (struct dmabuf_h){ -- .fd = data.fd, -- .size = (size_t)data.len, -- .mapptr = MAP_FAILED -+ .fd = -1, -+ .mapptr = MAP_FAILED, -+ .fns = dbsc->fns - }; - -+ if (dh->fns->buf_alloc(dbsc, dh, size) != 0) -+ goto fail; -+ -+ - #if TRACE_ALLOC - ++total_bufs; - total_size += dh->size; -@@ -220,8 +220,6 @@ void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len) - dh->len = len; - } - -- -- - void dmabuf_free(struct dmabuf_h * dh) - { - if (!dh) -@@ -233,20 +231,63 @@ void dmabuf_free(struct dmabuf_h * dh) - request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); + /* This was introduced in version 4.6. And may not exist all without an + * optional package. So to prevent a hard dependency on needing the Linux +@@ -31,6 +32,7 @@ #endif -- if (dh->mapptr != MAP_FAILED) -+ dh->fns->buf_free(dh); -+ -+ if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL) - munmap(dh->mapptr, dh->size); -- while (close(dh->fd) == -1 && errno == EINTR) -- /* loop */; -+ if (dh->fd != -1) -+ while (close(dh->fd) == -1 && errno == EINTR) -+ /* loop */; - free(dh); - } + #include ++#include + #include --struct dmabufs_ctl * dmabufs_ctl_new(void) -+static struct dmabufs_ctl * dmabufs_ctl_new2(const struct dmabuf_fns * const fns) + #include "avassert.h" +@@ -40,6 +42,9 @@ + #include "imgutils.h" + #include "mem.h" + ++#if CONFIG_SAND ++#include "libavutil/rpi_sand_fns.h" ++#endif + + static void drm_device_free(AVHWDeviceContext *hwdev) { -- struct dmabufs_ctl * dbsc = malloc(sizeof(*dbsc)); -+ struct dmabufs_ctl * dbsc = calloc(1, sizeof(*dbsc)); +@@ -54,6 +59,11 @@ static int drm_device_create(AVHWDeviceContext *hwdev, const char *device, + AVDRMDeviceContext *hwctx = hwdev->hwctx; + drmVersionPtr version; - if (!dbsc) - return NULL; - -+ dbsc->fd = -1; -+ dbsc->fns = fns; -+ dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE); -+ -+ if (fns->ctl_new(dbsc) != 0) -+ goto fail; -+ -+ return dbsc; -+ -+fail: -+ free(dbsc); -+ return NULL; -+} -+ -+static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc) -+{ -+ request_debug(NULL, "Free dmabuf ctl\n"); -+ -+ dbsc->fns->ctl_free(dbsc); -+ -+ free(dbsc); -+} -+ -+void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc) -+{ -+ struct dmabufs_ctl * const dbsc = *pDbsc; -+ -+ if (!dbsc) -+ return; -+ *pDbsc = NULL; -+ -+ dmabufs_ctl_free(dbsc); -+} -+ -+//----------------------------------------------------------------------------- -+// -+// Alloc dmabuf via CMA -+ -+static int ctl_cma_new(struct dmabufs_ctl * dbsc) -+{ - while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 && - errno == EINTR) - /* Loop */; -@@ -258,31 +299,61 @@ struct dmabufs_ctl * dmabufs_ctl_new(void) - if (dbsc->fd == -1) { - request_log("Unable to open either %s or %s\n", - DMABUF_NAME1, DMABUF_NAME2); -- goto fail; -+ return -1; - } - } -+ return 0; -+} - -- dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE); -- -- return dbsc; -+static void ctl_cma_free(struct dmabufs_ctl * dbsc) -+{ -+ if (dbsc->fd != -1) -+ while (close(dbsc->fd) == -1 && errno == EINTR) -+ /* loop */; - --fail: -- free(dbsc); -- return NULL; - } - --void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc) -+static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size) - { -- struct dmabufs_ctl * const dbsc = *pDbsc; -+ struct dma_heap_allocation_data data = { -+ .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1), -+ .fd = 0, -+ .fd_flags = O_RDWR, -+ .heap_flags = 0 -+ }; - -- if (!dbsc) -- return; -- *pDbsc = NULL; -+ while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) { -+ int err = errno; -+ request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n", -+ (uint64_t)data.len, -+ dbsc->fd, -+ err, -+ strerror(err)); -+ if (err == EINTR) -+ continue; -+ return -err; ++ if (device == NULL) { ++ hwctx->fd = -1; ++ return 0; + } - -- while (close(dbsc->fd) == -1 && errno == EINTR) -- /* loop */; -+ dh->fd = data.fd; -+ dh->size = (size_t)data.len; -+ return 0; -+} - -- free(dbsc); -+static void buf_cma_free(struct dmabuf_h * dh) -+{ -+ // Nothing needed - } - -+static const struct dmabuf_fns dmabuf_cma_fns = { -+ .buf_alloc = buf_cma_alloc, -+ .buf_free = buf_cma_free, -+ .ctl_new = ctl_cma_new, -+ .ctl_free = ctl_cma_free, -+}; + -+struct dmabufs_ctl * dmabufs_ctl_new(void) -+{ -+ request_debug(NULL, "Dmabufs using CMA\n");; -+ return dmabufs_ctl_new2(&dmabuf_cma_fns); -+} + hwctx->fd = open(device, O_RDWR); + if (hwctx->fd < 0) + return AVERROR(errno); +@@ -140,6 +150,8 @@ static int drm_map_frame(AVHWFramesContext *hwfc, + if (flags & AV_HWFRAME_MAP_WRITE) + mmap_prot |= PROT_WRITE; - -From c788ac962a1a4221d3fe9ab2b0d19ebf43964519 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Dec 2022 13:07:58 +0000 -Subject: [PATCH 104/186] dmabufs: Use unref rather than deleet on cmabufs_ctl - ---- - libavcodec/v4l2_req_dmabufs.c | 12 +++++++++++- - libavcodec/v4l2_req_dmabufs.h | 3 ++- - libavcodec/v4l2_request_hevc.c | 4 ++-- - 3 files changed, 15 insertions(+), 4 deletions(-) - -diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c -index 1c3a5e861f0c..acc0366e7630 100644 ---- a/libavcodec/v4l2_req_dmabufs.c -+++ b/libavcodec/v4l2_req_dmabufs.c -@@ -31,6 +31,7 @@ struct dmabuf_fns { - }; ++ if (dst->format == AV_PIX_FMT_NONE) ++ dst->format = hwfc->sw_format; + #if HAVE_LINUX_DMA_BUF_H + if (flags & AV_HWFRAME_MAP_READ) + map->sync_flags |= DMA_BUF_SYNC_READ; +@@ -186,6 +198,23 @@ static int drm_map_frame(AVHWFramesContext *hwfc, - struct dmabufs_ctl { -+ atomic_int ref_count; - int fd; - size_t page_size; - void * v; -@@ -271,7 +272,7 @@ static void dmabufs_ctl_free(struct dmabufs_ctl * const dbsc) - free(dbsc); - } + dst->width = src->width; + dst->height = src->height; ++ dst->crop_top = src->crop_top; ++ dst->crop_bottom = src->crop_bottom; ++ dst->crop_left = src->crop_left; ++ dst->crop_right = src->crop_right; ++ ++#if CONFIG_SAND ++ // Rework for sand frames ++ if (av_rpi_is_sand_frame(dst)) { ++ // As it stands the sand formats hold stride2 in linesize[3] ++ // linesize[0] & [1] contain stride1 which is always 128 for everything we do ++ // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1] ++ dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier); ++ dst->linesize[0] = 128; ++ dst->linesize[1] = 128; ++ // *** Are we sure src->height is actually what we want ??? ++ } ++#endif --void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc) -+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pDbsc) + err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, + &drm_unmap_frame, map); +@@ -207,16 +236,29 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) { - struct dmabufs_ctl * const dbsc = *pDbsc; +- enum AVPixelFormat *pix_fmts; ++ enum AVPixelFormat *p; -@@ -279,9 +280,18 @@ void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc) - return; - *pDbsc = NULL; +- pix_fmts = av_malloc_array(2, sizeof(*pix_fmts)); +- if (!pix_fmts) ++ p = *formats = av_malloc_array(3, sizeof(*p)); ++ if (!p) + return AVERROR(ENOMEM); -+ if (atomic_fetch_sub(&dbsc->ref_count, 1) != 0) -+ return; +- pix_fmts[0] = ctx->sw_format; +- pix_fmts[1] = AV_PIX_FMT_NONE; ++ // **** Offer native sand too ???? ++ *p++ = ++#if CONFIG_SAND ++ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ? ++ AV_PIX_FMT_YUV420P : ++ ctx->sw_format == AV_PIX_FMT_RPI4_10 ? ++ AV_PIX_FMT_YUV420P10LE : ++#endif ++ ctx->sw_format; + - dmabufs_ctl_free(dbsc); ++#if CONFIG_SAND ++ if (ctx->sw_format == AV_PIX_FMT_RPI4_10 || ++ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128) ++ *p++ = AV_PIX_FMT_NV12; ++#endif + +- *formats = pix_fmts; ++ *p = AV_PIX_FMT_NONE; + return 0; } -+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc) -+{ -+ atomic_fetch_add(&dbsc->ref_count, 1); -+ return dbsc; -+} -+ - //----------------------------------------------------------------------------- - // - // Alloc dmabuf via CMA -diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h -index c1d3d8c8d751..381ba2708da6 100644 ---- a/libavcodec/v4l2_req_dmabufs.h -+++ b/libavcodec/v4l2_req_dmabufs.h -@@ -7,7 +7,8 @@ struct dmabufs_ctl; - struct dmabuf_h; +@@ -232,18 +274,62 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, + map = av_frame_alloc(); + if (!map) + return AVERROR(ENOMEM); +- map->format = dst->format; - struct dmabufs_ctl * dmabufs_ctl_new(void); --void dmabufs_ctl_delete(struct dmabufs_ctl ** const pdbsc); -+void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc); -+struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc); - - // Need not preserve old contents - // On NULL return old buffer is freed -diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -index 767ecb036ad2..db7ed13b6d76 100644 ---- a/libavcodec/v4l2_request_hevc.c -+++ b/libavcodec/v4l2_request_hevc.c -@@ -105,7 +105,7 @@ static int v4l2_request_hevc_uninit(AVCodecContext *avctx) - mediabufs_ctl_unref(&ctx->mbufs); - media_pool_delete(&ctx->mpool); - pollqueue_unref(&ctx->pq); -- dmabufs_ctl_delete(&ctx->dbufs); -+ dmabufs_ctl_unref(&ctx->dbufs); - devscan_delete(&ctx->devscan); - - decode_q_uninit(&ctx->decode_q); -@@ -324,7 +324,7 @@ fail3: - fail2: - pollqueue_unref(&ctx->pq); - fail1: -- dmabufs_ctl_delete(&ctx->dbufs); -+ dmabufs_ctl_unref(&ctx->dbufs); - fail0: - devscan_delete(&ctx->devscan); - return ret; - -From 95d64bce0aaeb0f2e1b2bcd15e8345349efe295b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Dec 2022 14:21:40 +0000 -Subject: [PATCH 105/186] egl_vout: Remove redundant & completely broken debug - ---- - libavdevice/egl_vout.c | 25 ------------------------- - 1 file changed, 25 deletions(-) - -diff --git a/libavdevice/egl_vout.c b/libavdevice/egl_vout.c -index a52cabb082e9..afc7afd13ea0 100644 ---- a/libavdevice/egl_vout.c -+++ b/libavdevice/egl_vout.c -@@ -515,31 +515,6 @@ static int do_display(AVFormatContext *const s, egl_display_env_t *const de, AVF - } - - da->fd = desc->objects[0].fd; -- --#if 0 -- av_log(s, AV_LOG_INFO, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d," -- " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n", -- av_frame_cropped_width(frame), -- av_frame_cropped_height(frame), -- desc->layers[0].format, -- bo_plane_handles[0], -- bo_plane_handles[1], -- bo_plane_handles[2], -- bo_plane_handles[3], -- pitches[0], -- pitches[1], -- pitches[2], -- pitches[3], -- offsets[0], -- offsets[1], -- offsets[2], -- offsets[3], -- (long long)modifiers[0], -- (long long)modifiers[1], -- (long long)modifiers[2], -- (long long)modifiers[3] -- ); --#endif - } - - glClearColor(0.5, 0.5, 0.5, 0.5); - -From b79c28a0644c4d8b83c616dab6005ca862ec99df Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Dec 2022 16:12:12 +0000 -Subject: [PATCH 106/186] v4l2m2m: Use offset from querybuf rather than always - 0 - ---- - libavcodec/v4l2_buffers.c | 4 +++- - libavcodec/v4l2_buffers.h | 3 ++- - 2 files changed, 5 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 9ef2f40e3991..5ca58ea5935b 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -379,7 +379,7 @@ static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) - - for (int i = 0; i < avbuf->num_planes; i++) { - layer->planes[i].object_index = i; -- layer->planes[i].offset = 0; -+ layer->planes[i].offset = avbuf->plane_info[i].offset; - layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; - } - -@@ -934,6 +934,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; -+ avbuf->plane_info[i].offset = avbuf->buf.m.planes[i].data_offset; - - if (want_mmap) - avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, -@@ -941,6 +942,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); - } else { - avbuf->plane_info[i].length = avbuf->buf.length; -+ avbuf->plane_info[i].offset = 0; - - if (want_mmap) - avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, -diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index 1ac32c5989f1..d91d5d1dd07b 100644 ---- a/libavcodec/v4l2_buffers.h -+++ b/libavcodec/v4l2_buffers.h -@@ -66,7 +66,8 @@ typedef struct V4L2Buffer { - - /* keep track of the mmap address and mmap length */ - struct V4L2Plane_info { -- int bytesperline; -+ size_t bytesperline; -+ size_t offset; - void * mm_addr; - size_t length; - } plane_info[VIDEO_MAX_PLANES]; - -From 920d901527cbe17accc42659db548229318ac855 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Dec 2022 17:57:27 +0000 -Subject: [PATCH 107/186] v4l2m2m: Fix crash if init errors out before setting - avctx - ---- - libavcodec/v4l2_m2m.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index 1e30d15fd866..ac6bae0dc327 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -278,7 +278,7 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) - - av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n"); - -- if (av_codec_is_decoder(s->avctx->codec)) -+ if (s->avctx && av_codec_is_decoder(s->avctx->codec)) - av_packet_unref(&s->buf_pkt); - - if (s->fd >= 0) { - -From c339fbc23b3d0698e29301d7740cba39c9993fbc Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Dec 2022 18:10:30 +0000 -Subject: [PATCH 108/186] v4l2_buffers: Add and use ctx_to_m2mctx + error debug - ---- - libavcodec/v4l2_buffers.c | 22 +++++++++++++++------- - 1 file changed, 15 insertions(+), 7 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 5ca58ea5935b..e28ef2d1e802 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -41,11 +41,16 @@ - #define USEC_PER_SEC 1000000 - static const AVRational v4l2_timebase = { 1, USEC_PER_SEC }; - -+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) -+{ -+ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? -+ container_of(ctx, V4L2m2mContext, output) : -+ container_of(ctx, V4L2m2mContext, capture); -+} -+ - static inline V4L2m2mContext *buf_to_m2mctx(const V4L2Buffer * const buf) - { -- return V4L2_TYPE_IS_OUTPUT(buf->context->type) ? -- container_of(buf->context, V4L2m2mContext, output) : -- container_of(buf->context, V4L2m2mContext, capture); -+ return ctx_to_m2mctx(buf->context); - } - - static inline AVCodecContext *logger(const V4L2Buffer * const buf) -@@ -883,6 +888,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - int ret, i; - V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); - AVBufferRef * bufref; -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); - - *pbufref = NULL; - if (avbuf == NULL) -@@ -910,7 +916,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - avbuf->buf.m.planes = avbuf->planes; - } - -- ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf); -+ ret = ioctl(s->fd, VIDIOC_QUERYBUF, &avbuf->buf); - if (ret < 0) ++ // Map to default ++ map->format = AV_PIX_FMT_NONE; + err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ); + if (err) goto fail; -@@ -969,10 +975,12 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - } - - if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { -- if (buf_to_m2mctx(avbuf)->output_drm) { -+ if (s->output_drm) { - ret = v4l2_buffer_export_drm(avbuf); -- if (ret) -- goto fail; -+ if (ret) { -+ av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n"); -+ goto fail; -+ } - } - } - - -From 5959f5fb7ef1d1cab901393035e7a6ac31d0d78b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Dec 2022 18:53:22 +0000 -Subject: [PATCH 109/186] v4l2m2m: Add ability to use cma alloced dmabufs as - well as v4l2 mmap - ---- - libavcodec/Makefile | 2 +- - libavcodec/v4l2_buffers.c | 65 ++++++++++++++++++++++++++------------- - libavcodec/v4l2_buffers.h | 2 ++ - libavcodec/v4l2_m2m.c | 6 +++- - libavcodec/v4l2_m2m.h | 4 +++ - libavcodec/v4l2_m2m_dec.c | 16 ++++++++++ - 6 files changed, 71 insertions(+), 24 deletions(-) - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 11f183c9b9ba..8b1d66983423 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -170,7 +170,7 @@ OBJS-$(CONFIG_VP3DSP) += vp3dsp.o - OBJS-$(CONFIG_VP56DSP) += vp56dsp.o - OBJS-$(CONFIG_VP8DSP) += vp8dsp.o - OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\ -- weak_link.o -+ weak_link.o v4l2_req_dmabufs.o - OBJS-$(CONFIG_V4L2_REQUEST) += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\ - v4l2_req_devscan.o weak_link.o - OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index e28ef2d1e802..8d80d1978830 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -36,6 +36,7 @@ - #include "v4l2_context.h" - #include "v4l2_buffers.h" - #include "v4l2_m2m.h" -+#include "v4l2_req_dmabufs.h" - #include "weak_link.h" - - #define USEC_PER_SEC 1000000 -@@ -477,33 +478,46 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data) - av_buffer_unref(&bufref); - } - -+static inline uint32_t ff_v4l2_buf_len(const struct v4l2_buffer * b, unsigned int i) -+{ -+ return V4L2_TYPE_IS_MULTIPLANAR(b->type) ? b->m.planes[i].length : b->length; -+} +- map->width = dst->width; +- map->height = dst->height; ++#if 0 ++ av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__, ++ hwfc->sw_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE, ++ map->width, map->height, ++ map->linesize[0], ++ map->linesize[1], ++ map->linesize[2], ++ map->linesize[3], ++ dst->width, dst->height, ++ dst->linesize[0], ++ dst->linesize[1], ++ dst->linesize[2]); ++#endif ++#if CONFIG_SAND ++ if (av_rpi_is_sand_frame(map)) { ++ // Preserve crop - later ffmpeg code assumes that we have in that it ++ // overwrites any crop that we create with the old values ++ const unsigned int w = FFMIN(dst->width, map->width); ++ const unsigned int h = FFMIN(dst->height, map->height); + - static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) - { -- struct v4l2_exportbuffer expbuf; - int i, ret; -+ const V4L2m2mContext * const s = buf_to_m2mctx(avbuf); - - for (i = 0; i < avbuf->num_planes; i++) { -- memset(&expbuf, 0, sizeof(expbuf)); -- -- expbuf.index = avbuf->buf.index; -- expbuf.type = avbuf->buf.type; -- expbuf.plane = i; -+ int dma_fd = -1; -+ const uint32_t blen = ff_v4l2_buf_len(&avbuf->buf, i); ++ map->crop_top = 0; ++ map->crop_bottom = 0; ++ map->crop_left = 0; ++ map->crop_right = 0; + -+ if (s->db_ctl != NULL) { -+ if ((avbuf->dmabuf[i] = dmabuf_alloc(s->db_ctl, blen)) == NULL) -+ return AVERROR(ENOMEM); -+ dma_fd = dmabuf_fd(avbuf->dmabuf[i]); -+ if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) -+ avbuf->buf.m.planes[i].m.fd = dma_fd; -+ else -+ avbuf->buf.m.fd = dma_fd; ++ if (av_rpi_sand_to_planar_frame(dst, map) != 0) ++ { ++ av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__); ++ err = AVERROR(EINVAL); ++ goto fail; + } -+ else { -+ struct v4l2_exportbuffer expbuf; -+ memset(&expbuf, 0, sizeof(expbuf)); - -- ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_EXPBUF, &expbuf); -- if (ret < 0) -- return AVERROR(errno); -+ expbuf.index = avbuf->buf.index; -+ expbuf.type = avbuf->buf.type; -+ expbuf.plane = i; - -- if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) { -- /* drm frame */ -- avbuf->drm_frame.objects[i].size = avbuf->buf.m.planes[i].length; -- avbuf->drm_frame.objects[i].fd = expbuf.fd; -- avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; -- } else { -- /* drm frame */ -- avbuf->drm_frame.objects[0].size = avbuf->buf.length; -- avbuf->drm_frame.objects[0].fd = expbuf.fd; -- avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ ret = ioctl(s->fd, VIDIOC_EXPBUF, &expbuf); -+ if (ret < 0) -+ return AVERROR(errno); -+ dma_fd = expbuf.fd; - } + -+ avbuf->drm_frame.objects[i].size = blen; -+ avbuf->drm_frame.objects[i].fd = dma_fd; -+ avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; - } - - return 0; -@@ -870,9 +884,16 @@ static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data) - munmap(p->mm_addr, p->length); - } - -- for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { -- if (avbuf->drm_frame.objects[i].fd != -1) -- close(avbuf->drm_frame.objects[i].fd); -+ if (avbuf->dmabuf[0] == NULL) { -+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) { -+ if (avbuf->drm_frame.objects[i].fd != -1) -+ close(avbuf->drm_frame.objects[i].fd); -+ } ++ dst->width = w; ++ dst->height = h; + } -+ else { -+ for (i = 0; i != FF_ARRAY_ELEMS(avbuf->dmabuf); ++i) { -+ dmabuf_free(avbuf->dmabuf[i]); -+ } - } - - av_buffer_unref(&avbuf->ref_buf); -diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index d91d5d1dd07b..444ad94b145e 100644 ---- a/libavcodec/v4l2_buffers.h -+++ b/libavcodec/v4l2_buffers.h -@@ -46,6 +46,7 @@ enum V4L2Buffer_status { - */ - struct V4L2Context; - struct ff_weak_link_client; -+struct dmabuf_h; - - typedef struct V4L2Buffer { - /* each buffer needs to have a reference to its context -@@ -80,6 +81,7 @@ typedef struct V4L2Buffer { - - enum V4L2Buffer_status status; - -+ struct dmabuf_h * dmabuf[VIDEO_MAX_PLANES]; // If externally alloced dmabufs - stash other info here - } V4L2Buffer; - - /** -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index ac6bae0dc327..f802687b1bb2 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -34,6 +34,7 @@ - #include "v4l2_context.h" - #include "v4l2_fmt.h" - #include "v4l2_m2m.h" -+#include "v4l2_req_dmabufs.h" - - static void - xlat_init(xlat_track_t * const x) -@@ -75,7 +76,7 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) - - s->capture.done = s->output.done = 0; - s->capture.name = "capture"; -- s->capture.buf_mem = V4L2_MEMORY_MMAP; -+ s->capture.buf_mem = s->db_ctl != NULL ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; - s->output.name = "output"; - s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; - atomic_init(&s->refcount, 0); -@@ -94,12 +95,14 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) - if (v4l2_mplane_video(&cap)) { - s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; - s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; -+ s->output.format.type = s->output.type; - return 0; - } - - if (v4l2_splane_video(&cap)) { - s->capture.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; - s->output.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -+ s->output.format.type = s->output.type; - return 0; - } - -@@ -293,6 +296,7 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) - - ff_v4l2_context_release(&s->output); - -+ dmabufs_ctl_unref(&s->db_ctl); - close(s->fd); - s->fd = -1; - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 26a7161042b5..0f41f94694d3 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -71,6 +71,8 @@ typedef struct xlat_track_s { - V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; - } xlat_track_t; - -+struct dmabufs_ctl; -+ - typedef struct V4L2m2mContext { - char devname[PATH_MAX]; - int fd; -@@ -124,6 +126,7 @@ typedef struct V4L2m2mContext { - /* Quirks */ - unsigned int quirks; - -+ struct dmabufs_ctl * db_ctl; - } V4L2m2mContext; - - typedef struct V4L2m2mPriv { -@@ -134,6 +137,7 @@ typedef struct V4L2m2mPriv { - - int num_output_buffers; - int num_capture_buffers; -+ const char * dmabuf_alloc; - enum AVPixelFormat pix_fmt; - } V4L2m2mPriv; - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 47b2735f8252..4d170572980e 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -41,6 +41,7 @@ - #include "v4l2_context.h" - #include "v4l2_m2m.h" - #include "v4l2_fmt.h" -+#include "v4l2_req_dmabufs.h" - - // Pick 64 for max last count - that is >1sec at 60fps - #define STATS_LAST_COUNT_MAX 64 -@@ -896,6 +897,20 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - s->output_drm = 0; - } - -+ s->db_ctl = NULL; -+ if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) { -+ if (strcmp(priv->dmabuf_alloc, "cma") == 0) -+ s->db_ctl = dmabufs_ctl_new(); -+ else { -+ av_log(avctx, AV_LOG_ERROR, "Unknown dmabuf alloc method: '%s'\n", priv->dmabuf_alloc); -+ return AVERROR(EINVAL); -+ } -+ if (!s->db_ctl) { -+ av_log(avctx, AV_LOG_ERROR, "Can't open dmabuf provider '%s'\n", priv->dmabuf_alloc); -+ return AVERROR(ENOMEM); -+ } ++ else ++#endif ++ { ++ // Kludge mapped h/w s.t. frame_copy works ++ map->width = dst->width; ++ map->height = dst->height; ++ err = av_frame_copy(dst, map); + } + +- err = av_frame_copy(dst, map); + if (err) ++ { ++ av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__); + goto fail; ++ } + + err = 0; + fail: +@@ -258,7 +344,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc, + int err; + + if (src->width > hwfc->width || src->height > hwfc->height) ++ { ++ av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height); + return AVERROR(EINVAL); ++ } + + map = av_frame_alloc(); + if (!map) +diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c +index 6317ab7d0efa..7632ba7fa29f 100644 +--- a/libavutil/hwcontext_vulkan.c ++++ b/libavutil/hwcontext_vulkan.c +@@ -72,6 +72,14 @@ + #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) + #endif + ++// Sometimes missing definitions ++#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME ++#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264" ++#endif ++#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME ++#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265" ++#endif + - s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM); - if (!s->device_ref) { - ret = AVERROR(ENOMEM); -@@ -1000,6 +1015,7 @@ static const AVOption options[] = { - { "num_capture_buffers", "Number of buffers in the capture context", - OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS }, - { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS }, -+ { "dmabuf_alloc", "Dmabuf alloc method", OFFSET(dmabuf_alloc), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS }, - { NULL}, + typedef struct VulkanDevicePriv { + /** + * The public AVVulkanDeviceContext. See hwcontext_vulkan.h for it. +diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c +index 1c0bcf2232be..c1d7dde0418d 100644 +--- a/libavutil/pixdesc.c ++++ b/libavutil/pixdesc.c +@@ -2791,6 +2791,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { + }, + .flags = AV_PIX_FMT_FLAG_PLANAR, + }, ++ [AV_PIX_FMT_SAND128] = { ++ .name = "sand128", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 1, ++ .comp = { ++ { 0, 1, 0, 0, 8 }, /* Y */ ++ { 1, 2, 0, 0, 8 }, /* U */ ++ { 1, 2, 1, 0, 8 }, /* V */ ++ }, ++ .flags = 0, ++ }, ++ [AV_PIX_FMT_SAND64_10] = { ++ .name = "sand64_10", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 1, ++ .comp = { ++ { 0, 2, 0, 0, 10 }, /* Y */ ++ { 1, 4, 0, 0, 10 }, /* U */ ++ { 1, 4, 2, 0, 10 }, /* V */ ++ }, ++ .flags = 0, ++ }, ++ [AV_PIX_FMT_SAND64_16] = { ++ .name = "sand64_16", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 1, ++ .comp = { ++ { 0, 2, 0, 0, 16 }, /* Y */ ++ { 1, 4, 0, 0, 16 }, /* U */ ++ { 1, 4, 2, 0, 16 }, /* V */ ++ }, ++ .flags = 0, ++ }, ++ [AV_PIX_FMT_RPI4_8] = { ++ .name = "rpi4_8", ++ .flags = AV_PIX_FMT_FLAG_HWACCEL, ++ }, ++ [AV_PIX_FMT_RPI4_10] = { ++ .name = "rpi4_10", ++ .flags = AV_PIX_FMT_FLAG_HWACCEL, ++ }, }; - -From 778d48c8652016879f09d7ac4aff7592f9ea9d13 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Dec 2022 19:05:47 +0000 -Subject: [PATCH 110/186] testfilt: Skeleton of hw filter test code - ---- - pi-util/testfilt.py | 83 +++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 83 insertions(+) - create mode 100755 pi-util/testfilt.py - + static const char * const color_range_names[] = { +diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h +index a7f50e169038..f3f5a38584d2 100644 +--- a/libavutil/pixfmt.h ++++ b/libavutil/pixfmt.h +@@ -380,6 +380,14 @@ enum AVPixelFormat { + + AV_PIX_FMT_Y210BE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian + AV_PIX_FMT_Y210LE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian ++// RPI - not on ifdef so can be got at by calling progs ++// #define so code that uses this can know it is there ++#define AVUTIL_HAVE_PIX_FMT_SAND 1 ++ AV_PIX_FMT_SAND128, ///< 4:2:0 8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding ++ AV_PIX_FMT_SAND64_10, ///< 4:2:0 10-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding ++ AV_PIX_FMT_SAND64_16, ///< 4:2:0 16-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding ++ AV_PIX_FMT_RPI4_8, ++ AV_PIX_FMT_RPI4_10, + + AV_PIX_FMT_X2RGB10LE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), little-endian, X=unused/undefined + AV_PIX_FMT_X2RGB10BE, ///< packed RGB 10:10:10, 30bpp, (msb)2X 10R 10G 10B(lsb), big-endian, X=unused/undefined +diff --git a/libavutil/rpi_sand_fn_pw.c b/libavutil/rpi_sand_fn_pw.c +new file mode 100644 +index 000000000000..0d5d203dc3cd +--- /dev/null ++++ b/libavutil/rpi_sand_fn_pw.c +@@ -0,0 +1,227 @@ ++/* ++Copyright (c) 2018 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++// * Included twice from rpi_sand_fn with different PW ++ ++#define STRCAT(x,y) x##y ++ ++#if PW == 1 ++#define pixel uint8_t ++#define FUNC(f) STRCAT(f, 8) ++#elif PW == 2 ++#define pixel uint16_t ++#define FUNC(f) STRCAT(f, 16) ++#else ++#error Unexpected PW ++#endif ++ ++// Fetches a single patch - offscreen fixup not done here ++// w <= stride1 ++// unclipped ++void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x = _x; ++ const unsigned int w = _w; ++ const unsigned int mask = stride1 - 1; ++ ++#if PW == 1 && HAVE_SAND_ASM ++ if (_x == 0) { ++ ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride, ++ src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if ((x & ~mask) == ((x + w) & ~mask)) { ++ // All in one sand stripe ++ const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) { ++ memcpy(dst, p, w); ++ } ++ } ++ else ++ { ++ // Two+ stripe ++ const unsigned int sstride = stride1 * stride2; ++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ const uint8_t * p2 = p1 + sstride - (x & mask); ++ const unsigned int w1 = stride1 - (x & mask); ++ const unsigned int w3 = (x + w) & mask; ++ const unsigned int w2 = w - (w1 + w3); ++ ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) { ++ unsigned int j; ++ const uint8_t * p = p2; ++ uint8_t * d = dst; ++ memcpy(d, p1, w1); ++ d += w1; ++ for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) { ++ memcpy(d, p, stride1); ++ } ++ memcpy(d, p, w3); ++ } ++ } ++} ++ ++// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V) ++ ++void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x = _x * 2; ++ const unsigned int w = _w * 2; ++ const unsigned int mask = stride1 - 1; ++ ++#if PW == 1 && HAVE_SAND_ASM ++ if (_x == 0) { ++ ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v, ++ src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if ((x & ~mask) == ((x + w) & ~mask)) { ++ // All in one sand stripe ++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) { ++ pixel * du = (pixel *)dst_u; ++ pixel * dv = (pixel *)dst_v; ++ const pixel * p = (const pixel *)p1; ++ for (unsigned int k = 0; k < w; k += 2 * PW) { ++ *du++ = *p++; ++ *dv++ = *p++; ++ } ++ } ++ } ++ else ++ { ++ // Two+ stripe ++ const unsigned int sstride = stride1 * stride2; ++ const unsigned int sstride_p = (sstride - stride1) / PW; ++ ++ const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ const uint8_t * p2 = p1 + sstride - (x & mask); ++ const unsigned int w1 = stride1 - (x & mask); ++ const unsigned int w3 = (x + w) & mask; ++ const unsigned int w2 = w - (w1 + w3); ++ ++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) { ++ unsigned int j; ++ const pixel * p = (const pixel *)p1; ++ pixel * du = (pixel *)dst_u; ++ pixel * dv = (pixel *)dst_v; ++ for (unsigned int k = 0; k < w1; k += 2 * PW) { ++ *du++ = *p++; ++ *dv++ = *p++; ++ } ++ for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) { ++ for (unsigned int k = 0; k < stride1; k += 2 * PW) { ++ *du++ = *p++; ++ *dv++ = *p++; ++ } ++ } ++ for (unsigned int k = 0; k < w3; k += 2 * PW) { ++ *du++ = *p++; ++ *dv++ = *p++; ++ } ++ } ++ } ++} ++ ++void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c, ++ unsigned int stride1, unsigned int stride2, ++ const uint8_t * src_u, const unsigned int src_stride_u, ++ const uint8_t * src_v, const unsigned int src_stride_v, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x = _x * 2; ++ const unsigned int w = _w * 2; ++ const unsigned int mask = stride1 - 1; ++ if ((x & ~mask) == ((x + w) & ~mask)) { ++ // All in one sand stripe ++ uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) { ++ const pixel * su = (const pixel *)src_u; ++ const pixel * sv = (const pixel *)src_v; ++ pixel * p = (pixel *)p1; ++ for (unsigned int k = 0; k < w; k += 2 * PW) { ++ *p++ = *su++; ++ *p++ = *sv++; ++ } ++ } ++ } ++ else ++ { ++ // Two+ stripe ++ const unsigned int sstride = stride1 * stride2; ++ const unsigned int sstride_p = (sstride - stride1) / PW; ++ ++ const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2; ++ const uint8_t * p2 = p1 + sstride - (x & mask); ++ const unsigned int w1 = stride1 - (x & mask); ++ const unsigned int w3 = (x + w) & mask; ++ const unsigned int w2 = w - (w1 + w3); ++ ++ for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) { ++ unsigned int j; ++ const pixel * su = (const pixel *)src_u; ++ const pixel * sv = (const pixel *)src_v; ++ pixel * p = (pixel *)p1; ++ for (unsigned int k = 0; k < w1; k += 2 * PW) { ++ *p++ = *su++; ++ *p++ = *sv++; ++ } ++ for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) { ++ for (unsigned int k = 0; k < stride1; k += 2 * PW) { ++ *p++ = *su++; ++ *p++ = *sv++; ++ } ++ } ++ for (unsigned int k = 0; k < w3; k += 2 * PW) { ++ *p++ = *su++; ++ *p++ = *sv++; ++ } ++ } ++ } ++} ++ ++ ++#undef pixel ++#undef STRCAT ++#undef FUNC ++ +diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c +new file mode 100644 +index 000000000000..2e19dd3a7b84 +--- /dev/null ++++ b/libavutil/rpi_sand_fns.c +@@ -0,0 +1,447 @@ ++/* ++Copyright (c) 2018 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++#include "config.h" ++#include ++#include ++#include "rpi_sand_fns.h" ++#include "avassert.h" ++#include "frame.h" ++ ++#if ARCH_ARM && HAVE_NEON ++#include "libavutil/arm/cpu.h" ++#include "libavutil/arm/rpi_sand_neon.h" ++#define HAVE_SAND_ASM 1 ++#elif ARCH_AARCH64 && HAVE_NEON ++#include "libavutil/aarch64/cpu.h" ++#include "libavutil/aarch64/rpi_sand_neon.h" ++#define HAVE_SAND_ASM 1 ++#else ++#define HAVE_SAND_ASM 0 ++#endif ++ ++#define PW 1 ++#include "rpi_sand_fn_pw.c" ++#undef PW ++ ++#define PW 2 ++#include "rpi_sand_fn_pw.c" ++#undef PW ++ ++#if 1 ++// Simple round ++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) ++{ ++ const unsigned int rnd = (1 << shr) >> 1; ++ const uint16_t * src = (const uint16_t *)_src; ++ ++ for (; n != 0; --n) { ++ *dst++ = (*src++ + rnd) >> shr; ++ } ++} ++#else ++// Dithered variation ++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr) ++{ ++ unsigned int rnd = (1 << shr) >> 1; ++ const unsigned int mask = ((1 << shr) - 1); ++ const uint16_t * src = (const uint16_t *)_src; ++ ++ for (; n != 0; --n) { ++ rnd = *src++ + (rnd & mask); ++ *dst++ = rnd >> shr; ++ } ++} ++#endif ++ ++// Fetches a single patch - offscreen fixup not done here ++// w <= stride1 ++// unclipped ++// _x & _w in pixels, strides in bytes ++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word ++ const unsigned int xskip0 = _x - (x0 >> 2) * 3; ++ const unsigned int x1 = ((_x + _w) / 3) * 4; ++ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; ++ const unsigned int mask = stride1 - 1; ++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; ++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words ++ ++#if HAVE_SAND_ASM ++ if (_x == 0 && have_neon(av_get_cpu_flags())) { ++ ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if (x0 == x1) { ++ // ******************* ++ // Partial single word xfer ++ return; ++ } ++ ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) ++ { ++ unsigned int x = x0; ++ const uint32_t * p = (const uint32_t *)p0; ++ uint16_t * d = (uint16_t *)dst; ++ ++ if (xskip0 != 0) { ++ const uint32_t p3 = *p++; ++ ++ if (xskip0 == 1) ++ *d++ = (p3 >> 10) & 0x3ff; ++ *d++ = (p3 >> 20) & 0x3ff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ while (x != x1) { ++ const uint32_t p3 = *p++; ++ *d++ = p3 & 0x3ff; ++ *d++ = (p3 >> 10) & 0x3ff; ++ *d++ = (p3 >> 20) & 0x3ff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ if (xrem1 != 0) { ++ const uint32_t p3 = *p; ++ ++ *d++ = p3 & 0x3ff; ++ if (xrem1 == 2) ++ *d++ = (p3 >> 10) & 0x3ff; ++ } ++ } ++} ++ ++ ++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word ++ const unsigned int xskip0 = _x - (x0 >> 3) * 3; ++ const unsigned int x1 = ((_x + _w) / 3) * 8; ++ const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3; ++ const unsigned int mask = stride1 - 1; ++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; ++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words ++ ++#if HAVE_SAND_ASM ++ if (_x == 0 && have_neon(av_get_cpu_flags())) { ++ ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v, ++ src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if (x0 == x1) { ++ // ******************* ++ // Partial single word xfer ++ return; ++ } ++ ++ for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1) ++ { ++ unsigned int x = x0; ++ const uint32_t * p = (const uint32_t *)p0; ++ uint16_t * du = (uint16_t *)dst_u; ++ uint16_t * dv = (uint16_t *)dst_v; ++ ++ if (xskip0 != 0) { ++ const uint32_t p3a = *p++; ++ const uint32_t p3b = *p++; ++ ++ if (xskip0 == 1) ++ { ++ *du++ = (p3a >> 20) & 0x3ff; ++ *dv++ = (p3b >> 0) & 0x3ff; ++ } ++ *du++ = (p3b >> 10) & 0x3ff; ++ *dv++ = (p3b >> 20) & 0x3ff; ++ ++ if (((x += 8) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ while (x != x1) { ++ const uint32_t p3a = *p++; ++ const uint32_t p3b = *p++; ++ ++ *du++ = p3a & 0x3ff; ++ *dv++ = (p3a >> 10) & 0x3ff; ++ *du++ = (p3a >> 20) & 0x3ff; ++ *dv++ = p3b & 0x3ff; ++ *du++ = (p3b >> 10) & 0x3ff; ++ *dv++ = (p3b >> 20) & 0x3ff; ++ ++ if (((x += 8) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ if (xrem1 != 0) { ++ const uint32_t p3a = *p++; ++ const uint32_t p3b = *p++; ++ ++ *du++ = p3a & 0x3ff; ++ *dv++ = (p3a >> 10) & 0x3ff; ++ if (xrem1 == 2) ++ { ++ *du++ = (p3a >> 20) & 0x3ff; ++ *dv++ = p3b & 0x3ff; ++ } ++ } ++ } ++} ++ ++// Fetches a single patch - offscreen fixup not done here ++// w <= stride1 ++// single lose bottom 2 bits truncation ++// _x & _w in pixels, strides in bytes ++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word ++ const unsigned int xskip0 = _x - (x0 >> 2) * 3; ++ const unsigned int x1 = ((_x + _w) / 3) * 4; ++ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; ++ const unsigned int mask = stride1 - 1; ++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; ++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words ++ ++#if HAVE_SAND_ASM ++ if (_x == 0) { ++ ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if (x0 == x1) { ++ // ******************* ++ // Partial single word xfer ++ return; ++ } ++ ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) ++ { ++ unsigned int x = x0; ++ const uint32_t * p = (const uint32_t *)p0; ++ uint8_t * d = dst; ++ ++ if (xskip0 != 0) { ++ const uint32_t p3 = *p++; ++ ++ if (xskip0 == 1) ++ *d++ = (p3 >> 12) & 0xff; ++ *d++ = (p3 >> 22) & 0xff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ while (x != x1) { ++ const uint32_t p3 = *p++; ++ *d++ = (p3 >> 2) & 0xff; ++ *d++ = (p3 >> 12) & 0xff; ++ *d++ = (p3 >> 22) & 0xff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ if (xrem1 != 0) { ++ const uint32_t p3 = *p; ++ ++ *d++ = (p3 >> 2) & 0xff; ++ if (xrem1 == 2) ++ *d++ = (p3 >> 12) & 0xff; ++ } ++ } ++} ++ ++ ++ ++// w/h in pixels ++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, ++ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, ++ unsigned int w, unsigned int h, const unsigned int shr) ++{ ++ const unsigned int n = dst_stride1 / 2; ++ unsigned int j; ++ ++ // This is true for our current layouts ++ av_assert0(dst_stride1 == src_stride1); ++ ++ // As we have the same stride1 for src & dest and src is wider than dest ++ // then if we loop on src we can always write contiguously to dest ++ // We make no effort to copy an exact width - round up to nearest src stripe ++ // as we will always have storage in dest for that ++ ++#if ARCH_ARM && HAVE_NEON ++ if (shr == 3 && src_stride1 == 128) { ++ for (j = 0; j + n < w; j += dst_stride1) { ++ uint8_t * d = dst + j * dst_stride2; ++ const uint8_t * s1 = src + j * 2 * src_stride2; ++ const uint8_t * s2 = s1 + src_stride1 * src_stride2; ++ ++ ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h); ++ } ++ } ++ else ++#endif ++ { ++ for (j = 0; j + n < w; j += dst_stride1) { ++ uint8_t * d = dst + j * dst_stride2; ++ const uint8_t * s1 = src + j * 2 * src_stride2; ++ const uint8_t * s2 = s1 + src_stride1 * src_stride2; ++ ++ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) { ++ cpy16_to_8(d, s1, n, shr); ++ cpy16_to_8(d + n, s2, n, shr); ++ } ++ } ++ } ++ ++ // Fix up a trailing dest half stripe ++ if (j < w) { ++ uint8_t * d = dst + j * dst_stride2; ++ const uint8_t * s1 = src + j * 2 * src_stride2; ++ ++ for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) { ++ cpy16_to_8(d, s1, n, shr); ++ } ++ } ++} ++ ++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src) ++{ ++ const int w = av_frame_cropped_width(src); ++ const int h = av_frame_cropped_height(src); ++ const int x = src->crop_left; ++ const int y = src->crop_top; ++ ++ // We will crop as part of the conversion ++ dst->crop_top = 0; ++ dst->crop_left = 0; ++ dst->crop_bottom = 0; ++ dst->crop_right = 0; ++ ++ switch (src->format){ ++ case AV_PIX_FMT_SAND128: ++ case AV_PIX_FMT_RPI4_8: ++ switch (dst->format){ ++ case AV_PIX_FMT_YUV420P: ++ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], ++ dst->data[2], dst->linesize[2], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w/2, h/2); ++ break; ++ case AV_PIX_FMT_NV12: ++ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w, h/2); ++ break; ++ default: ++ return -1; ++ } ++ break; ++ case AV_PIX_FMT_SAND64_10: ++ switch (dst->format){ ++ case AV_PIX_FMT_YUV420P10: ++ av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x*2, y, w*2, h); ++ av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1], ++ dst->data[2], dst->linesize[2], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y/2, w, h/2); ++ break; ++ default: ++ return -1; ++ } ++ break; ++ case AV_PIX_FMT_RPI4_10: ++ switch (dst->format){ ++ case AV_PIX_FMT_YUV420P10: ++ av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], ++ dst->data[2], dst->linesize[2], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w/2, h/2); ++ break; ++ case AV_PIX_FMT_NV12: ++ av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w, h/2); ++ break; ++ default: ++ return -1; ++ } ++ break; ++ default: ++ return -1; ++ } ++ ++ return av_frame_copy_props(dst, src); ++} +diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h +new file mode 100644 +index 000000000000..f7ba62ff7380 +--- /dev/null ++++ b/libavutil/rpi_sand_fns.h +@@ -0,0 +1,188 @@ ++/* ++Copyright (c) 2018 Raspberry Pi (Trading) Ltd. ++All rights reserved. ++ ++Redistribution and use in source and binary forms, with or without ++modification, are permitted provided that the following conditions are met: ++ * Redistributions of source code must retain the above copyright ++ notice, this list of conditions and the following disclaimer. ++ * Redistributions in binary form must reproduce the above copyright ++ notice, this list of conditions and the following disclaimer in the ++ documentation and/or other materials provided with the distribution. ++ * Neither the name of the copyright holder nor the ++ names of its contributors may be used to endorse or promote products ++ derived from this software without specific prior written permission. ++ ++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++Authors: John Cox ++*/ ++ ++#ifndef AVUTIL_RPI_SAND_FNS_H ++#define AVUTIL_RPI_SAND_FNS_H ++ ++#include "libavutil/frame.h" ++ ++// For all these fns _x & _w are measured as coord * PW ++// For the C fns coords are in chroma pels (so luma / 2) ++// Strides are in bytes ++ ++void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void av_rpi_planar_to_sand_c8(uint8_t * dst_c, ++ unsigned int stride1, unsigned int stride2, ++ const uint8_t * src_u, const unsigned int src_stride_u, ++ const uint8_t * src_v, const unsigned int src_stride_v, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++void av_rpi_planar_to_sand_c16(uint8_t * dst_c, ++ unsigned int stride1, unsigned int stride2, ++ const uint8_t * src_u, const unsigned int src_stride_u, ++ const uint8_t * src_v, const unsigned int src_stride_v, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u, ++ uint8_t * dst_v, const unsigned int dst_stride_v, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); ++ ++// w/h in pixels ++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, ++ const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2, ++ unsigned int w, unsigned int h, const unsigned int shr); ++ ++ ++// dst must contain required pixel format & allocated data buffers ++// Cropping on the src buffer will be honoured and dst crop will be set to zero ++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src); ++ ++ ++static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame) ++{ ++#ifdef RPI_ZC_SAND128_ONLY ++ // If we are sure we only only support 128 byte sand formats replace the ++ // var with a constant which should allow for better optimisation ++ return 128; ++#else ++ return frame->linesize[0]; ++#endif ++} ++ ++static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame) ++{ ++ return frame->linesize[3]; ++} ++ ++ ++static inline int av_rpi_is_sand_format(const int format) ++{ ++ return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10); ++} ++ ++static inline int av_rpi_is_sand_frame(const AVFrame * const frame) ++{ ++ return av_rpi_is_sand_format(frame->format); ++} ++ ++static inline int av_rpi_is_sand8_frame(const AVFrame * const frame) ++{ ++ return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8); ++} ++ ++static inline int av_rpi_is_sand16_frame(const AVFrame * const frame) ++{ ++ return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16); ++} ++ ++static inline int av_rpi_is_sand30_frame(const AVFrame * const frame) ++{ ++ return (frame->format == AV_PIX_FMT_RPI4_10); ++} ++ ++static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame) ++{ ++ return av_rpi_is_sand8_frame(frame) ? 0 : 1; ++} ++ ++// If x is measured in bytes (not pixels) then this works for sand64_16 as ++// well as sand128 - but in the general case we work that out ++ ++static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y) ++{ ++ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); ++ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); ++ const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame); ++ const unsigned int x1 = x & (stride1 - 1); ++ const unsigned int x2 = x ^ x1; ++ ++ return x1 + stride1 * y + stride2 * x2; ++} ++ ++static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c) ++{ ++ const unsigned int stride1 = av_rpi_sand_frame_stride1(frame); ++ const unsigned int stride2 = av_rpi_sand_frame_stride2(frame); ++ const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1); ++ const unsigned int x1 = x & (stride1 - 1); ++ const unsigned int x2 = x ^ x1; ++ ++ return x1 + stride1 * y_c + stride2 * x2; ++} ++ ++static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y) ++{ ++ return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y); ++} ++ ++static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y) ++{ ++ return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y); ++} ++ ++#endif ++ +diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c +index 20a25033cba2..ebbb5e04fc17 100644 +--- a/libswscale/aarch64/rgb2rgb.c ++++ b/libswscale/aarch64/rgb2rgb.c +@@ -57,6 +57,12 @@ void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2, + void ff_deinterleave_bytes_neon(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, + int width, int height, int srcStride, + int dst1Stride, int dst2Stride); ++void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv); ++void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv); + + av_cold void rgb2rgb_init_aarch64(void) + { +@@ -66,5 +72,7 @@ av_cold void rgb2rgb_init_aarch64(void) + ff_rgb24toyv12 = rgb24toyv12; + interleaveBytes = ff_interleave_bytes_neon; + deinterleaveBytes = ff_deinterleave_bytes_neon; ++ ff_rgb24toyv12 = ff_rgb24toyv12_aarch64; ++ ff_bgr24toyv12 = ff_bgr24toyv12_aarch64; + } + } +diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S +index 1382e00261a9..a2bab11b34de 100644 +--- a/libswscale/aarch64/rgb2rgb_neon.S ++++ b/libswscale/aarch64/rgb2rgb_neon.S +@@ -296,3 +296,359 @@ function ff_deinterleave_bytes_neon, export=1 + 0: + ret + endfunc ++ ++// Expand rgb2 into r0+r1/g0+g1/b0+b1 ++.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2 ++ uxtl \r0\().8h, \r2\().8b ++ uxtl \g0\().8h, \g2\().8b ++ uxtl \b0\().8h, \b2\().8b ++ ++ uxtl2 \r1\().8h, \r2\().16b ++ uxtl2 \g1\().8h, \g2\().16b ++ uxtl2 \b1\().8h, \b2\().16b ++.endm ++ ++// Expand rgb2 into r0+r1/g0+g1/b0+b1 ++// and pick every other el to put back into rgb2 for chroma ++.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2 ++ XRGB3Y \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2 ++ ++ bic \r2\().8h, #0xff, LSL #8 ++ bic \g2\().8h, #0xff, LSL #8 ++ bic \b2\().8h, #0xff, LSL #8 ++.endm ++ ++.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2 ++ smull \d0\().4s, \s0\().4h, \c0 ++ smlal \d0\().4s, \s1\().4h, \c1 ++ smlal \d0\().4s, \s2\().4h, \c2 ++ smull2 \d1\().4s, \s0\().8h, \c0 ++ smlal2 \d1\().4s, \s1\().8h, \c1 ++ smlal2 \d1\().4s, \s2\().8h, \c2 ++.endm ++ ++// d0 may be s0 ++// s0, s2 corrupted ++.macro SHRN_Y d0, s0, s1, s2, s3, k128h ++ shrn \s0\().4h, \s0\().4s, #12 ++ shrn2 \s0\().8h, \s1\().4s, #12 ++ add \s0\().8h, \s0\().8h, \k128h\().8h // +128 (>> 3 = 16) ++ sqrshrun \d0\().8b, \s0\().8h, #3 ++ shrn \s2\().4h, \s2\().4s, #12 ++ shrn2 \s2\().8h, \s3\().4s, #12 ++ add \s2\().8h, \s2\().8h, \k128h\().8h ++ sqrshrun2 \d0\().16b, v28.8h, #3 ++.endm ++ ++.macro SHRN_C d0, s0, s1, k128b ++ shrn \s0\().4h, \s0\().4s, #14 ++ shrn2 \s0\().8h, \s1\().4s, #14 ++ sqrshrn \s0\().8b, \s0\().8h, #1 ++ add \d0\().8b, \s0\().8b, \k128b\().8b // +128 ++.endm ++ ++.macro STB2V s0, n, a ++ st1 {\s0\().b}[(\n+0)], [\a], #1 ++ st1 {\s0\().b}[(\n+1)], [\a], #1 ++.endm ++ ++.macro STB4V s0, n, a ++ STB2V \s0, (\n+0), \a ++ STB2V \s0, (\n+2), \a ++.endm ++ ++ ++// void ff_rgb24toyv12_aarch64( ++// const uint8_t *src, // x0 ++// uint8_t *ydst, // x1 ++// uint8_t *udst, // x2 ++// uint8_t *vdst, // x3 ++// int width, // w4 ++// int height, // w5 ++// int lumStride, // w6 ++// int chromStride, // w7 ++// int srcStr, // [sp, #0] ++// int32_t *rgb2yuv); // [sp, #8] ++ ++function ff_rgb24toyv12_aarch64, export=1 ++ ldr x15, [sp, #8] ++ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 ++ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 ++ ld3 {v3.s, v4.s, v5.s}[2], [x15] ++ mov v6.16b, v3.16b ++ mov v3.16b, v5.16b ++ mov v5.16b, v6.16b ++ b 99f ++endfunc ++ ++// void ff_bgr24toyv12_aarch64( ++// const uint8_t *src, // x0 ++// uint8_t *ydst, // x1 ++// uint8_t *udst, // x2 ++// uint8_t *vdst, // x3 ++// int width, // w4 ++// int height, // w5 ++// int lumStride, // w6 ++// int chromStride, // w7 ++// int srcStr, // [sp, #0] ++// int32_t *rgb2yuv); // [sp, #8] (including Mac) ++ ++// regs ++// v0-2 Src bytes - reused as chroma src ++// v3-5 Coeffs (packed very inefficiently - could be squashed) ++// v6 128b ++// v7 128h ++// v8-15 Reserved ++// v16-18 Lo Src expanded as H ++// v19 - ++// v20-22 Hi Src expanded as H ++// v23 - ++// v24 U out ++// v25 U tmp ++// v26 Y out ++// v27-29 Y tmp ++// v30 V out ++// v31 V tmp ++ ++function ff_bgr24toyv12_aarch64, export=1 ++ ldr x15, [sp, #8] ++ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 ++ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 ++ ld3 {v3.s, v4.s, v5.s}[2], [x15] ++ ++99: ++ ldr w14, [sp, #0] ++ movi v7.8b, #128 ++ uxtl v6.8h, v7.8b ++ // Ensure if nothing to do then we do nothing ++ cmp w4, #0 ++ b.le 90f ++ cmp w5, #0 ++ b.le 90f ++ // If w % 16 != 0 then -16 so we do main loop 1 fewer times with ++ // the remainder done in the tail ++ tst w4, #15 ++ b.eq 1f ++ sub w4, w4, #16 ++1: ++ ++// -------------------- Even line body - YUV ++11: ++ subs w9, w4, #0 ++ mov x10, x0 ++ mov x11, x1 ++ mov x12, x2 ++ mov x13, x3 ++ b.lt 12f ++ ++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 ++ subs w9, w9, #16 ++ b.le 13f ++ ++10: ++ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2 ++ ++ // Testing shows it is faster to stack the smull/smlal ops together ++ // rather than interleave them between channels and indeed even the ++ // shift/add sections seem happier not interleaved ++ ++ // Y0 ++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] ++ // Y1 ++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] ++ SHRN_Y v26, v26, v27, v28, v29, v6 ++ ++ // U ++ // Vector subscript *2 as we loaded into S but are only using H ++ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2] ++ ++ // V ++ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4] ++ ++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 ++ ++ SHRN_C v24, v24, v25, v7 ++ SHRN_C v30, v30, v31, v7 ++ ++ subs w9, w9, #16 ++ ++ st1 {v26.16b}, [x11], #16 ++ st1 {v24.8b}, [x12], #8 ++ st1 {v30.8b}, [x13], #8 ++ ++ b.gt 10b ++ ++// -------------------- Even line tail - YUV ++// If width % 16 == 0 then simply runs once with preloaded RGB ++// If other then deals with preload & then does remaining tail ++ ++13: ++ // Body is simple copy of main loop body minus preload ++ ++ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2 ++ // Y0 ++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] ++ // Y1 ++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] ++ SHRN_Y v26, v26, v27, v28, v29, v6 ++ // U ++ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2] ++ // V ++ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4] ++ ++ cmp w9, #-16 ++ ++ SHRN_C v24, v24, v25, v7 ++ SHRN_C v30, v30, v31, v7 ++ ++ // Here: ++ // w9 == 0 width % 16 == 0, tail done ++ // w9 > -16 1st tail done (16 pels), remainder still to go ++ // w9 == -16 shouldn't happen ++ // w9 > -32 2nd tail done ++ // w9 <= -32 shouldn't happen ++ ++ b.lt 2f ++ st1 {v26.16b}, [x11], #16 ++ st1 {v24.8b}, [x12], #8 ++ st1 {v30.8b}, [x13], #8 ++ cbz w9, 3f ++ ++12: ++ sub w9, w9, #16 ++ ++ tbz w9, #3, 1f ++ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 ++1: tbz w9, #2, 1f ++ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 ++1: tbz w9, #1, 1f ++ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 ++1: tbz w9, #0, 13b ++ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 ++ b 13b ++ ++2: ++ tbz w9, #3, 1f ++ st1 {v26.8b}, [x11], #8 ++ STB4V v24, 0, x12 ++ STB4V v30, 0, x13 ++1: tbz w9, #2, 1f ++ STB4V v26 8, x11 ++ STB2V v24, 4, x12 ++ STB2V v30, 4, x13 ++1: tbz w9, #1, 1f ++ STB2V v26, 12, x11 ++ st1 {v24.b}[6], [x12], #1 ++ st1 {v30.b}[6], [x13], #1 ++1: tbz w9, #0, 1f ++ st1 {v26.b}[14], [x11] ++ st1 {v24.b}[7], [x12] ++ st1 {v30.b}[7], [x13] ++1: ++3: ++ ++// -------------------- Odd line body - Y only ++ ++ subs w5, w5, #1 ++ b.eq 90f ++ ++ subs w9, w4, #0 ++ add x0, x0, w14, sxtw ++ add x1, x1, w6, sxtw ++ mov x10, x0 ++ mov x11, x1 ++ b.lt 12f ++ ++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 ++ subs w9, w9, #16 ++ b.le 13f ++ ++10: ++ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2 ++ // Y0 ++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] ++ // Y1 ++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] ++ ++ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 ++ ++ SHRN_Y v26, v26, v27, v28, v29, v6 ++ ++ subs w9, w9, #16 ++ ++ st1 {v26.16b}, [x11], #16 ++ ++ b.gt 10b ++ ++// -------------------- Odd line tail - Y ++// If width % 16 == 0 then simply runs once with preloaded RGB ++// If other then deals with preload & then does remaining tail ++ ++13: ++ // Body is simple copy of main loop body minus preload ++ ++ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2 ++ // Y0 ++ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] ++ // Y1 ++ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] ++ ++ cmp w9, #-16 ++ ++ SHRN_Y v26, v26, v27, v28, v29, v6 ++ ++ // Here: ++ // w9 == 0 width % 16 == 0, tail done ++ // w9 > -16 1st tail done (16 pels), remainder still to go ++ // w9 == -16 shouldn't happen ++ // w9 > -32 2nd tail done ++ // w9 <= -32 shouldn't happen ++ ++ b.lt 2f ++ st1 {v26.16b}, [x11], #16 ++ cbz w9, 3f ++ ++12: ++ sub w9, w9, #16 ++ ++ tbz w9, #3, 1f ++ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 ++1: tbz w9, #2, 1f ++ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 ++1: tbz w9, #1, 1f ++ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 ++ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 ++1: tbz w9, #0, 13b ++ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 ++ b 13b ++ ++2: ++ tbz w9, #3, 1f ++ st1 {v26.8b}, [x11], #8 ++1: tbz w9, #2, 1f ++ STB4V v26, 8, x11 ++1: tbz w9, #1, 1f ++ STB2V v26, 12, x11 ++1: tbz w9, #0, 1f ++ st1 {v26.b}[14], [x11] ++1: ++3: ++ ++// ------------------- Loop to start ++ ++ add x0, x0, w14, sxtw ++ add x1, x1, w6, sxtw ++ add x2, x2, w7, sxtw ++ add x3, x3, w7, sxtw ++ subs w5, w5, #1 ++ b.gt 11b ++90: ++ ret ++endfunc +diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c +index e98fdac8ead6..c3b9079d2b3e 100644 +--- a/libswscale/rgb2rgb.c ++++ b/libswscale/rgb2rgb.c +@@ -83,6 +83,31 @@ void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); ++void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, ++ uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); + void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, + int srcStride, int dstStride); + void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, +diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h +index e3b088390184..b8b1d649033e 100644 +--- a/libswscale/rgb2rgb.h ++++ b/libswscale/rgb2rgb.h +@@ -79,6 +79,9 @@ void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size); + void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv); ++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv); + + /** + * Height should be a multiple of 2 and width should be a multiple of 16. +@@ -126,6 +129,26 @@ extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + int width, int height, + int lumStride, int chromStride, int srcStride, + int32_t *rgb2yuv); ++extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); ++extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, ++ int width, int height, ++ int lumStride, int chromStride, int srcStride, ++ int32_t *rgb2yuv); + extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, + int srcStride, int dstStride); + +diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c +index 32d90d44feb7..e711589e1e1a 100644 +--- a/libswscale/rgb2rgb_template.c ++++ b/libswscale/rgb2rgb_template.c +@@ -642,65 +642,235 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, + /** + * Height should be a multiple of 2 and width should be a multiple of 2. + * (If this is a problem for anyone then tell me, and I will fix it.) ++ * Chrominance data is only taken from every second line, ++ * others are ignored in the C version. ++ * FIXME: Write HQ version. + */ ++static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv, ++ const uint8_t x[9]) ++{ ++ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; ++ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; ++ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; ++ int y; ++ const int chromWidth = width >> 1; ++ ++ for (y = 0; y < height; y += 2) { ++ int i; ++ for (i = 0; i < chromWidth; i++) { ++ unsigned int b = src[6 * i + 0]; ++ unsigned int g = src[6 * i + 1]; ++ unsigned int r = src[6 * i + 2]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; ++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; ++ ++ udst[i] = U; ++ vdst[i] = V; ++ ydst[2 * i] = Y; ++ ++ b = src[6 * i + 3]; ++ g = src[6 * i + 4]; ++ r = src[6 * i + 5]; ++ ++ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ydst[2 * i + 1] = Y; ++ } ++ if ((width & 1) != 0) { ++ unsigned int b = src[6 * i + 0]; ++ unsigned int g = src[6 * i + 1]; ++ unsigned int r = src[6 * i + 2]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; ++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; ++ ++ udst[i] = U; ++ vdst[i] = V; ++ ydst[2 * i] = Y; ++ } ++ ydst += lumStride; ++ src += srcStride; ++ ++ if (y+1 == height) ++ break; ++ ++ for (i = 0; i < chromWidth; i++) { ++ unsigned int b = src[6 * i + 0]; ++ unsigned int g = src[6 * i + 1]; ++ unsigned int r = src[6 * i + 2]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ++ ydst[2 * i] = Y; ++ ++ b = src[6 * i + 3]; ++ g = src[6 * i + 4]; ++ r = src[6 * i + 5]; ++ ++ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ydst[2 * i + 1] = Y; ++ } ++ if ((width & 1) != 0) { ++ unsigned int b = src[6 * i + 0]; ++ unsigned int g = src[6 * i + 1]; ++ unsigned int r = src[6 * i + 2]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ++ ydst[2 * i] = Y; ++ } ++ udst += chromStride; ++ vdst += chromStride; ++ ydst += lumStride; ++ src += srcStride; ++ } ++} ++ ++static const uint8_t x_rgb[9] = { ++ RY_IDX, GY_IDX, BY_IDX, ++ RU_IDX, GU_IDX, BU_IDX, ++ RV_IDX, GV_IDX, BV_IDX, ++}; ++ ++static const uint8_t x_bgr[9] = { ++ BY_IDX, GY_IDX, RY_IDX, ++ BU_IDX, GU_IDX, RU_IDX, ++ BV_IDX, GV_IDX, RV_IDX, ++}; ++ + void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, + uint8_t *vdst, int width, int height, int lumStride, + int chromStride, int srcStride, int32_t *rgb2yuv) + { +- int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; +- int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; +- int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; ++ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); ++} ++ ++void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); ++} ++ ++static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv, ++ const uint8_t x[9]) ++{ ++ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; ++ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; ++ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; + int y; + const int chromWidth = width >> 1; +- const uint8_t *src1 = src; +- const uint8_t *src2 = src1 + srcStride; +- uint8_t *ydst1 = ydst; +- uint8_t *ydst2 = ydst + lumStride; + + for (y = 0; y < height; y += 2) { + int i; + for (i = 0; i < chromWidth; i++) { +- unsigned int b11 = src1[6 * i + 0]; +- unsigned int g11 = src1[6 * i + 1]; +- unsigned int r11 = src1[6 * i + 2]; +- unsigned int b12 = src1[6 * i + 3]; +- unsigned int g12 = src1[6 * i + 4]; +- unsigned int r12 = src1[6 * i + 5]; +- unsigned int b21 = src2[6 * i + 0]; +- unsigned int g21 = src2[6 * i + 1]; +- unsigned int r21 = src2[6 * i + 2]; +- unsigned int b22 = src2[6 * i + 3]; +- unsigned int g22 = src2[6 * i + 4]; +- unsigned int r22 = src2[6 * i + 5]; +- +- unsigned int Y11 = ((ry * r11 + gy * g11 + by * b11) >> RGB2YUV_SHIFT) + 16; +- unsigned int Y12 = ((ry * r12 + gy * g12 + by * b12) >> RGB2YUV_SHIFT) + 16; +- unsigned int Y21 = ((ry * r21 + gy * g21 + by * b21) >> RGB2YUV_SHIFT) + 16; +- unsigned int Y22 = ((ry * r22 + gy * g22 + by * b22) >> RGB2YUV_SHIFT) + 16; +- +- unsigned int bx = (b11 + b12 + b21 + b22) >> 2; +- unsigned int gx = (g11 + g12 + g21 + g22) >> 2; +- unsigned int rx = (r11 + r12 + r21 + r22) >> 2; +- +- unsigned int U = ((ru * rx + gu * gx + bu * bx) >> RGB2YUV_SHIFT) + 128; +- unsigned int V = ((rv * rx + gv * gx + bv * bx) >> RGB2YUV_SHIFT) + 128; +- +- ydst1[2 * i + 0] = Y11; +- ydst1[2 * i + 1] = Y12; +- ydst2[2 * i + 0] = Y21; +- ydst2[2 * i + 1] = Y22; +- udst[i] = U; +- vdst[i] = V; ++ unsigned int b = src[8 * i + 2]; ++ unsigned int g = src[8 * i + 1]; ++ unsigned int r = src[8 * i + 0]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; ++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; ++ ++ udst[i] = U; ++ vdst[i] = V; ++ ydst[2 * i] = Y; ++ ++ b = src[8 * i + 6]; ++ g = src[8 * i + 5]; ++ r = src[8 * i + 4]; ++ ++ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ydst[2 * i + 1] = Y; ++ } ++ if ((width & 1) != 0) { ++ unsigned int b = src[8 * i + 2]; ++ unsigned int g = src[8 * i + 1]; ++ unsigned int r = src[8 * i + 0]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; ++ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; ++ ++ udst[i] = U; ++ vdst[i] = V; ++ ydst[2 * i] = Y; ++ } ++ ydst += lumStride; ++ src += srcStride; ++ ++ if (y+1 == height) ++ break; ++ ++ for (i = 0; i < chromWidth; i++) { ++ unsigned int b = src[8 * i + 2]; ++ unsigned int g = src[8 * i + 1]; ++ unsigned int r = src[8 * i + 0]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ++ ydst[2 * i] = Y; ++ ++ b = src[8 * i + 6]; ++ g = src[8 * i + 5]; ++ r = src[8 * i + 4]; ++ ++ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ydst[2 * i + 1] = Y; + } +- src1 += srcStride * 2; +- src2 += srcStride * 2; +- ydst1 += lumStride * 2; +- ydst2 += lumStride * 2; +- udst += chromStride; +- vdst += chromStride; ++ if ((width & 1) != 0) { ++ unsigned int b = src[8 * i + 2]; ++ unsigned int g = src[8 * i + 1]; ++ unsigned int r = src[8 * i + 0]; ++ ++ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; ++ ++ ydst[2 * i] = Y; ++ } ++ udst += chromStride; ++ vdst += chromStride; ++ ydst += lumStride; ++ src += srcStride; + } + } + ++static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); ++} ++ ++static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); ++} ++ ++// As the general code does no SIMD-like ops simply adding 1 to the src address ++// will fix the ignored alpha position ++static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); ++} ++ ++static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ++ uint8_t *vdst, int width, int height, int lumStride, ++ int chromStride, int srcStride, int32_t *rgb2yuv) ++{ ++ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); ++} ++ ++ + static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, + uint8_t *dest, int width, int height, + int src1Stride, int src2Stride, int dstStride) +@@ -974,6 +1144,11 @@ static av_cold void rgb2rgb_init_c(void) + yuy2toyv12 = yuy2toyv12_c; + planar2x = planar2x_c; + ff_rgb24toyv12 = ff_rgb24toyv12_c; ++ ff_bgr24toyv12 = ff_bgr24toyv12_c; ++ ff_rgbxtoyv12 = ff_rgbxtoyv12_c; ++ ff_bgrxtoyv12 = ff_bgrxtoyv12_c; ++ ff_xrgbtoyv12 = ff_xrgbtoyv12_c; ++ ff_xbgrtoyv12 = ff_xbgrtoyv12_c; + interleaveBytes = interleaveBytes_c; + deinterleaveBytes = deinterleaveBytes_c; + vu9_to_vu12 = vu9_to_vu12_c; +diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c +index dc1d5f359325..519a69eaa37e 100644 +--- a/libswscale/swscale_unscaled.c ++++ b/libswscale/swscale_unscaled.c +@@ -1696,6 +1696,91 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], + return srcSliceH; + } + ++static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_bgr24toyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ ++static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_bgrxtoyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ ++static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_rgbxtoyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ ++static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_xbgrtoyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ ++static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[], ++ int srcStride[], int srcSliceY, int srcSliceH, ++ uint8_t *dst[], int dstStride[]) ++{ ++ ff_xrgbtoyv12( ++ src[0], ++ dst[0] + srcSliceY * dstStride[0], ++ dst[1] + (srcSliceY >> 1) * dstStride[1], ++ dst[2] + (srcSliceY >> 1) * dstStride[2], ++ c->srcW, srcSliceH, ++ dstStride[0], dstStride[1], srcStride[0], ++ c->input_rgb2yuv_table); ++ if (dst[3]) ++ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); ++ return srcSliceH; ++} ++ + static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[], + int srcStride[], int srcSliceY, int srcSliceH, + uint8_t *dst[], int dstStride[]) +@@ -2019,7 +2104,6 @@ void ff_get_unscaled_swscale(SwsContext *c) + const enum AVPixelFormat dstFormat = c->dstFormat; + const int flags = c->flags; + const int dstH = c->dstH; +- const int dstW = c->dstW; + int needsDither; + + needsDither = isAnyRGB(dstFormat) && +@@ -2077,8 +2161,34 @@ void ff_get_unscaled_swscale(SwsContext *c) + /* bgr24toYV12 */ + if (srcFormat == AV_PIX_FMT_BGR24 && + (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && +- !(flags & SWS_ACCURATE_RND) && !(dstW&1)) ++ !(flags & SWS_ACCURATE_RND)) + c->convert_unscaled = bgr24ToYv12Wrapper; ++ /* rgb24toYV12 */ ++ if (srcFormat == AV_PIX_FMT_RGB24 && ++ (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = rgb24ToYv12Wrapper; ++ ++ /* bgrxtoYV12 */ ++ if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) || ++ (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = bgrxToYv12Wrapper; ++ /* rgbx24toYV12 */ ++ if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) || ++ (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = rgbxToYv12Wrapper; ++ /* xbgrtoYV12 */ ++ if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) || ++ (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = xbgrToYv12Wrapper; ++ /* xrgb24toYV12 */ ++ if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) || ++ (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && ++ !(flags & SWS_ACCURATE_RND)) ++ c->convert_unscaled = xrgbToYv12Wrapper; + + /* RGB/BGR -> RGB/BGR (no dither needed forms) */ + if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) +diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c +index cf8d04de8983..97d5a4bd2ebc 100644 +--- a/libswscale/tests/swscale.c ++++ b/libswscale/tests/swscale.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + + #undef HAVE_AV_CONFIG_H + #include "libavutil/cpu.h" +@@ -98,6 +99,15 @@ struct Results { + uint32_t crc; + }; + ++static int time_rep = 0; ++ ++static uint64_t utime(void) ++{ ++ struct timespec ts; ++ clock_gettime(CLOCK_MONOTONIC, &ts); ++ return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000; ++} ++ + // test by ref -> src -> dst -> out & compare out against ref + // ref & out are YV12 + static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, +@@ -213,7 +223,7 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, + goto end; + } + +- printf(" %s %dx%d -> %s %3dx%3d flags=%2d", ++ printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d", + desc_src->name, srcW, srcH, + desc_dst->name, dstW, dstH, + flags); +@@ -221,6 +231,17 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, + + sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); + ++ if (time_rep != 0) ++ { ++ const uint64_t now = utime(); ++ uint64_t done; ++ for (i = 1; i != time_rep; ++i) { ++ sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); ++ } ++ done = utime(); ++ printf(" T=%7"PRId64"us ", done-now); ++ } ++ + for (i = 0; i < 4 && dstStride[i]; i++) + crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], + dstStride[i] * dstH); +@@ -413,30 +434,31 @@ static int fileTest(const uint8_t * const ref[4], int refStride[4], + return 0; + } + +-#define W 96 +-#define H 96 +- + int main(int argc, char **argv) + { ++ unsigned int W = 96; ++ unsigned int H = 96; ++ unsigned int W2; ++ unsigned int H2; ++ unsigned int S; + enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE; + enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE; +- uint8_t *rgb_data = av_malloc(W * H * 4); +- const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL }; +- int rgb_stride[4] = { 4 * W, 0, 0, 0 }; +- uint8_t *data = av_malloc(4 * W * H); +- const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; +- int stride[4] = { W, W, W, W }; + int x, y; + struct SwsContext *sws; + AVLFG rand; + int res = -1; + int i; + FILE *fp = NULL; +- +- if (!rgb_data || !data) +- return -1; ++ uint8_t *rgb_data; ++ uint8_t * rgb_src[4] = { NULL }; ++ int rgb_stride[4] = { 0 }; ++ uint8_t *data; ++ uint8_t * src[4] = { NULL }; ++ int stride[4] = { 0 }; + + for (i = 1; i < argc; i += 2) { ++ const char * const arg2 = argv[i+1]; ++ + if (!strcmp(argv[i], "-help") || !strcmp(argv[i], "--help")) { + fprintf(stderr, + "swscale [options...]\n" +@@ -459,29 +481,50 @@ int main(int argc, char **argv) + if (argv[i][0] != '-' || i + 1 == argc) + goto bad_option; + if (!strcmp(argv[i], "-ref")) { +- fp = fopen(argv[i + 1], "r"); ++ fp = fopen(arg2, "r"); + if (!fp) { +- fprintf(stderr, "could not open '%s'\n", argv[i + 1]); ++ fprintf(stderr, "could not open '%s'\n", arg2); + goto error; + } + } else if (!strcmp(argv[i], "-cpuflags")) { + unsigned flags = av_get_cpu_flags(); +- int ret = av_parse_cpu_caps(&flags, argv[i + 1]); ++ int ret = av_parse_cpu_caps(&flags, arg2); + if (ret < 0) { +- fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]); ++ fprintf(stderr, "invalid cpu flags %s\n", arg2); + return ret; + } + av_force_cpu_flags(flags); + } else if (!strcmp(argv[i], "-src")) { +- srcFormat = av_get_pix_fmt(argv[i + 1]); ++ srcFormat = av_get_pix_fmt(arg2); + if (srcFormat == AV_PIX_FMT_NONE) { +- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); ++ fprintf(stderr, "invalid pixel format %s\n", arg2); + return -1; + } + } else if (!strcmp(argv[i], "-dst")) { +- dstFormat = av_get_pix_fmt(argv[i + 1]); ++ dstFormat = av_get_pix_fmt(arg2); + if (dstFormat == AV_PIX_FMT_NONE) { +- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); ++ fprintf(stderr, "invalid pixel format %s\n", arg2); ++ return -1; ++ } ++ } else if (!strcmp(argv[i], "-w")) { ++ char * p = NULL; ++ W = strtoul(arg2, &p, 0); ++ if (!W || *p) { ++ fprintf(stderr, "bad width %s\n", arg2); ++ return -1; ++ } ++ } else if (!strcmp(argv[i], "-h")) { ++ char * p = NULL; ++ H = strtoul(arg2, &p, 0); ++ if (!H || *p) { ++ fprintf(stderr, "bad height '%s'\n", arg2); ++ return -1; ++ } ++ } else if (!strcmp(argv[i], "-t")) { ++ char * p = NULL; ++ time_rep = (int)strtol(arg2, &p, 0); ++ if (*p) { ++ fprintf(stderr, "bad time repetitions '%s'\n", arg2); + return -1; + } + } else if (!strcmp(argv[i], "-p")) { +@@ -495,15 +538,34 @@ bad_option: + + ff_sfc64_init(&prng_state, 0, 0, 0, 12); + +- sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H, ++ S = (W + 15) & ~15; ++ rgb_data = av_mallocz(S * H * 4); ++ rgb_src[0] = rgb_data; ++ rgb_stride[0] = 4 * S; ++ data = av_mallocz(4 * S * H); ++ src[0] = data; ++ src[1] = data + S * H; ++ src[2] = data + S * H * 2; ++ src[3] = data + S * H * 3; ++ stride[0] = S; ++ stride[1] = S; ++ stride[2] = S; ++ stride[3] = S; ++ H2 = H < 96 ? 8 : H / 12; ++ W2 = W < 96 ? 8 : W / 12; ++ ++ if (!rgb_data || !data) ++ return -1; ++ ++ sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H, + AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); + + av_lfg_init(&rand, 1); + + for (y = 0; y < H; y++) + for (x = 0; x < W * 4; x++) +- rgb_data[ x + y * 4 * W] = av_lfg_get(&rand); +- res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride); ++ rgb_data[ x + y * 4 * S] = av_lfg_get(&rand); ++ res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride); + if (res < 0 || res != H) { + res = -1; + goto error; +@@ -512,10 +574,10 @@ bad_option: + av_free(rgb_data); + + if(fp) { +- res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat); ++ res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat); + fclose(fp); + } else { +- selfTest(src, stride, W, H, srcFormat, dstFormat); ++ selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat); + res = 0; + } + error: +diff --git a/pi-util/BUILD.txt b/pi-util/BUILD.txt +new file mode 100644 +index 000000000000..2b62d660c0d1 +--- /dev/null ++++ b/pi-util/BUILD.txt +@@ -0,0 +1,67 @@ ++Building Pi FFmpeg ++================== ++ ++Current only building on a Pi is supported. ++This builds ffmpeg the way I've tested it ++ ++Get all dependencies - the current package dependencies are good enough ++ ++$ sudo apt-get build-dep ffmpeg ++ ++Configure using the pi-util/conf_native.sh script ++------------------------------------------------- ++ ++This sets the normal release options and creates an ouutput dir to build into ++The directory name will depend on system and options but will be under out/ ++ ++There are a few choices here ++ --mmal build including the legacy mmal-based decoders and zero-copy code ++ this requires appropriate libraries which currently will exist for ++ armv7 but not arm64 ++ --noshared ++ Build a static image rather than a shared library one. Static is ++ easier for testing as there is no need to worry about library ++ paths being confused and therefore running the wrong code, Shared ++ is what is needed, in most cases, when building for use by other ++ programs. ++ --usr Set install dir to /usr (i.e. system default) rather than in ++ /install ++ ++So for a static build ++--------------------- ++ ++$ pi-util/conf_native.sh --noshared ++ ++$ make -j8 -C out/ ++ ++You can now run ffmpeg directly from where it was built ++ ++For a shared build ++------------------ ++ ++There are two choices here ++ ++$ pi-util/conf_native.sh ++$ make -j8 -C out/ install ++ ++This sets the install prefix to /install and is probably what you ++want if you don't want to overwrite the system files. ++ ++You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was ++built. You can copy the contents of /install to /usr and that mostly ++works. The only downside is that paths in pkgconfig end up being set to the ++install directory in your build directory which may be less than ideal when ++building other packages. ++ ++The alternative if you just want to replace the system libs is: ++ ++$ pi-util/conf_native.sh --usr ++$ make -j8 -C out/ ++$ sudo pi-util/clean_usr_libs.sh ++$ sudo make -j8 -C out/ install ++ ++The clean_usr_libs.sh step wipes any existing libs & includes (for all ++architectures) from the system which helps avoid confusion when running other ++progs as you can be sure you're not running old code which is unfortunately ++easy to do otherwise. ++ +diff --git a/pi-util/NOTES.txt b/pi-util/NOTES.txt +new file mode 100644 +index 000000000000..fcce72226a32 +--- /dev/null ++++ b/pi-util/NOTES.txt +@@ -0,0 +1,69 @@ ++Notes on the hevc_rpi decoder & associated support code ++------------------------------------------------------- ++ ++There are 3 main parts to the existing code: ++ ++1) The decoder - this is all in libavcodec as rpi_hevc*. ++ ++2) A few filters to deal with Sand frames and a small patch to ++automatically select the sand->i420 converter when required. ++ ++3) A kludge in ffmpeg.c to display the decoded video. This could & should ++be converted into a proper ffmpeg display module. ++ ++ ++Decoder ++------- ++ ++The decoder is a modified version of the existing ffmpeg hevc decoder. ++Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder. ++More complex bitstreams can be up to ~200% faster but particularly easy ++streams can cut its advantage down to ~50%. This means that a Pi3+ can ++display nearly all 8-bit 1080p30 streams and with some overclocking it can ++display most lower bitrate 10-bit 1080p30 streams - this latter case is ++not helped by the requirement to downsample to 8-bit before display on a ++Pi. ++ ++It has had co-processor offload added for inter-pred and large block ++residual transform. Various parts have had optimized ARM NEON assembler ++added and the existing ARM asm sections have been profiled and ++re-optimized for A53. The main C code has been substantially reworked at ++its lower levels in an attempt to optimize it and minimize memory ++bandwidth. To some extent code paths that deal with frame types that it ++doesn't support have been pruned. ++ ++It outputs frames in Broadcom Sand format. This is a somewhat annoying ++layout that doesn't fit into ffmpegs standard frame descriptions. It has ++vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for ++the stripe followed by interleaved U & V, that is then followed by the Y ++for the next stripe, etc. The final stripe is always padded to ++stripe-width. This is used in an attempt to help with cache locality and ++cut down on the number of dram bank switches. It is annoying to use for ++inter-pred with conventional processing but the way the Pi QPU (which is ++used for inter-pred) works means that it has negligible downsides here and ++the improved memory performance exceeds the overhead of the increased ++complexity in the rest of the code. ++ ++Frames must be allocated out of GPU memory (as otherwise they can't be ++accessed by the co-processors). Utility functions (in rpi_zc.c) have been ++written to make this easier. As the frames are already in GPU memory they ++can be displayed by the Pi h/w without any further copying. ++ ++ ++Known non-features ++------------------ ++ ++Frame allocation should probably be done in some other way in order to fit ++into the standard framework better. ++ ++Sand frames are currently declared as software frames, there is an ++argument that they should be hardware frames but they aren't really. ++ ++There must be a better way of auto-selecting the hevc_rpi decoder over the ++normal s/w hevc decoder, but I became confused by the existing h/w ++acceleration framework and what I wanted to do didn't seem to fit in ++neatly. ++ ++Display should be a proper device rather than a kludge in ffmpeg.c ++ ++ +diff --git a/pi-util/TESTMESA.txt b/pi-util/TESTMESA.txt +new file mode 100644 +index 000000000000..92bc13a3dfa1 +--- /dev/null ++++ b/pi-util/TESTMESA.txt +@@ -0,0 +1,82 @@ ++# Setup & Build instructions for testing Argon30 mesa support (on Pi4) ++ ++# These assume that the drm_mmal test for Sand8 has been built on this Pi ++# as build relies on many of the same files ++ ++# 1st get everything required to build ffmpeg ++# If sources aren't already enabled on your Pi then enable them ++sudo su ++sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list ++sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list ++mv /tmp/sources.list /etc/apt/ ++mv /tmp/raspi.list /etc/apt/sources.list.d/ ++apt update ++ ++# Get dependancies ++sudo apt build-dep ffmpeg ++ ++sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev ++ ++# Enable H265 V4L2 request decoder ++sudo su ++echo dtoverlay=rpivid-v4l2 >> /boot/config.txt ++# You may also want to add more CMA if you are going to try 4k videos ++# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read ++# dtoverlay=vc4-fkms-v3d,cma-512 ++reboot ++# Check it has turned up ++ls -la /dev/video* ++# This should include video19 ++# crw-rw----+ 1 root video 81, 7 Aug 4 17:25 /dev/video19 ++ ++# Currently on the Pi the linux headers from the debian distro don't match ++# the kernel that we ship and we need to update them - hopefully this step ++# will be unneeded in the future ++sudo apt install git bc bison flex libssl-dev make ++git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y ++cd linux ++KERNEL=kernel7l ++make bcm2711_defconfig ++make headers_install ++sudo cp -r usr/include/linux /usr/include ++cd .. ++ ++# Config - this builds a staticly linked ffmpeg which is easier for testing ++pi-util/conf_native.sh --noshared ++ ++# Build (this is a bit dull) ++# If you want to poke the source the libavdevice/egl_vout.c contains the ++# output code - ++cd out/armv7-static-rel ++ ++# Check that you have actually configured V4L2 request ++grep HEVC_V4L2REQUEST config.h ++# You are hoping for ++# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1 ++# if you get 0 then the config has failed ++ ++make -j6 ++ ++# Grab test streams ++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv ++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv ++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv ++ ++# Test i420 output (works currently) ++./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl - ++ ++# Test Sand8 output - doesn't currently work but should once you have ++# Sand8 working in drm_mmal. I can't guarantee that this will work as ++# I can't test this path with a known working format, but the debug looks ++# good. If this doesn't work & drm_mmal does with sand8 then come back to me ++# The "show_all 1" forces vout to display every frame otherwise it drops any ++# frame that would cause it to block ++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl - ++ ++# Test Sand30 - doesn't currently work ++# (Beware that when FFmpeg errors out it often leaves your teminal window ++# in a state where you need to reset it) ++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl - ++ ++ ++ +diff --git a/pi-util/clean_usr_libs.sh b/pi-util/clean_usr_libs.sh +new file mode 100755 +index 000000000000..01bd6a6a2254 +--- /dev/null ++++ b/pi-util/clean_usr_libs.sh +@@ -0,0 +1,42 @@ ++set -e ++U=/usr/include/arm-linux-gnueabihf ++rm -rf $U/libavcodec ++rm -rf $U/libavdevice ++rm -rf $U/libavfilter ++rm -rf $U/libavformat ++rm -rf $U/libavutil ++rm -rf $U/libswresample ++rm -rf $U/libswscale ++U=/usr/include/aarch64-linux-gnu ++rm -rf $U/libavcodec ++rm -rf $U/libavdevice ++rm -rf $U/libavfilter ++rm -rf $U/libavformat ++rm -rf $U/libavutil ++rm -rf $U/libswresample ++rm -rf $U/libswscale ++U=/usr/lib/arm-linux-gnueabihf ++rm -f $U/libavcodec.* ++rm -f $U/libavdevice.* ++rm -f $U/libavfilter.* ++rm -f $U/libavformat.* ++rm -f $U/libavutil.* ++rm -f $U/libswresample.* ++rm -f $U/libswscale.* ++U=/usr/lib/arm-linux-gnueabihf/neon/vfp ++rm -f $U/libavcodec.* ++rm -f $U/libavdevice.* ++rm -f $U/libavfilter.* ++rm -f $U/libavformat.* ++rm -f $U/libavutil.* ++rm -f $U/libswresample.* ++rm -f $U/libswscale.* ++U=/usr/lib/aarch64-linux-gnu ++rm -f $U/libavcodec.* ++rm -f $U/libavdevice.* ++rm -f $U/libavfilter.* ++rm -f $U/libavformat.* ++rm -f $U/libavutil.* ++rm -f $U/libswresample.* ++rm -f $U/libswscale.* ++ +diff --git a/pi-util/conf_arm64_native.sh b/pi-util/conf_arm64_native.sh +new file mode 100644 +index 000000000000..9e3bbfa1908a +--- /dev/null ++++ b/pi-util/conf_arm64_native.sh +@@ -0,0 +1,45 @@ ++echo "Configure for ARM64 native build" ++ ++#RPI_KEEPS="-save-temps=obj" ++ ++SHARED_LIBS="--enable-shared" ++if [ "$1" == "--noshared" ]; then ++ SHARED_LIBS="--disable-shared" ++ echo Static libs ++ OUT=out/arm64-static-rel ++else ++ echo Shared libs ++ OUT=out/arm64-shared-rel ++fi ++ ++mkdir -p $OUT ++cd $OUT ++ ++A=aarch64-linux-gnu ++USR_PREFIX=`pwd`/install ++LIB_PREFIX=$USR_PREFIX/lib/$A ++INC_PREFIX=$USR_PREFIX/include/$A ++ ++../../configure \ ++ --prefix=$USR_PREFIX\ ++ --libdir=$LIB_PREFIX\ ++ --incdir=$INC_PREFIX\ ++ --disable-stripping\ ++ --disable-thumb\ ++ --disable-mmal\ ++ --enable-sand\ ++ --enable-v4l2-request\ ++ --enable-libdrm\ ++ --enable-epoxy\ ++ --enable-libudev\ ++ --enable-vout-drm\ ++ --enable-vout-egl\ ++ $SHARED_LIBS\ ++ --extra-cflags="-ggdb" ++ ++# --enable-decoder=hevc_rpi\ ++# --enable-extra-warnings\ ++# --arch=armv71\ ++ ++# gcc option for getting asm listing ++# -Wa,-ahls +diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv +new file mode 100644 +index 000000000000..177f1c8111fd +--- /dev/null ++++ b/pi-util/conf_h265.2016.csv +@@ -0,0 +1,195 @@ ++1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8 ++1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8 ++1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8 ++1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8 ++1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8 ++1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8 ++1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8 ++1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8 ++1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8 ++1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8 ++1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8 ++1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8 ++1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8 ++1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8 ++1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8 ++1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8 ++1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8 ++1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8 ++1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8 ++1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8 ++1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8 ++1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10 ++1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8 ++1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8 ++1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8 ++1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8 ++1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8 ++1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8 ++1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8 ++1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8 ++1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8 ++1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8 ++1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8 ++1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8 ++1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8 ++1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8 ++1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8 ++1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8 ++1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8 ++1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8 ++1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8 ++1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8 ++1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10 ++1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8 ++1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8 ++1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8 ++1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8 ++1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8 ++1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8 ++1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8 ++1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8 ++1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8 ++1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8 ++1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8 ++1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8 ++1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8 ++1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8 ++1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8 ++1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8 ++1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8 ++1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8 ++1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8 ++1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8 ++1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8 ++1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8 ++1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8 ++1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8 ++1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8 ++1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8 ++1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8 ++1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8 ++1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8 ++1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8 ++1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8 ++1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8 ++1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8 ++1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8 ++1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8 ++1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8 ++1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8 ++1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8 ++1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8 ++1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8 ++1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8 ++1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8 ++1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8 ++1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8 ++1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8 ++1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8 ++1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8 ++1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8 ++1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8 ++1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8 ++1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8 ++1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8 ++1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8 ++1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8 ++1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8 ++1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8 ++1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8 ++1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8 ++1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8 ++1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8 ++1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8 ++1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8 ++1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8 ++1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8 ++1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8 ++1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8 ++1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8 ++1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8 ++1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8 ++1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8 ++1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8 ++1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8 ++1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8 ++1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8 ++1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8 ++1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8 ++1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8 ++1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8 ++1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8 ++1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8 ++1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8 ++1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8 ++1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8 ++1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8 ++3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10 ++1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8 ++1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8 ++2,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8 ++1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10 ++1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8 ++1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8 ++1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10 ++1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10 ++1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8 ++1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10 ++1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8 ++1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10 ++1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8 ++1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10 ++1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8 ++1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10 ++1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8 ++1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10 ++1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8 ++1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0 ++0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8 ++0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8 ++0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10 ++0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8 ++0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8 ++1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0 ++0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8 ++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10 ++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8 ++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8 ++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8 ++0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10 ++0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8 ++0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8 ++0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8 ++1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10 ++1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0 ++1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0 ++1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0 ++1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0 ++1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0 ++1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0 ++0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0 ++0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8 ++0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8 ++1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0 ++1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8 ++1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0 ++1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0 ++1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0 ++1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0 ++1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0 ++1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0 ++1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0 ++0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8 ++0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10 ++0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10 ++0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8 ++0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8 ++0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8 ++0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8 ++0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8 ++1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8 ++1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8 ++1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8 ++1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8 ++1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8 +diff --git a/pi-util/conf_h265.2016_HEVC_v1.csv b/pi-util/conf_h265.2016_HEVC_v1.csv +new file mode 100644 +index 000000000000..60826412715c +--- /dev/null ++++ b/pi-util/conf_h265.2016_HEVC_v1.csv +@@ -0,0 +1,147 @@ ++1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5 ++1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5 ++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5 ++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5 ++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5 ++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5 ++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5 ++1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5 ++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5 ++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5 ++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5 ++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5 ++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5 ++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5 ++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5 ++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5 ++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5 ++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5 ++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5 ++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5 ++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5 ++1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5 ++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5 ++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5 ++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5 ++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5 ++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5 ++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5 ++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5 ++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5 ++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5 ++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5 ++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5 ++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5 ++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5 ++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5 ++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5 ++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5 ++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5 ++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5 ++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5 ++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5 ++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5 ++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5 ++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5 ++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5 ++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5 ++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5 ++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5 ++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5 ++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5 ++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5 ++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5 ++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5 ++1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5 ++1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5 ++1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5 ++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5 ++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5 ++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5 ++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5 ++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5 ++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5 ++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5 ++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5 ++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5 ++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5 ++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5 ++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5 ++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5 ++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5 ++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5 ++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5 ++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5 ++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5 ++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5 ++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5 ++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5 ++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5 ++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5 ++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5 ++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5 ++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5 ++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5 ++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5 ++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5 ++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5 ++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5 ++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5 ++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5 ++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5 ++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5 ++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5 ++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5 ++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5 ++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5 ++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5 ++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5 ++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5 ++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5 ++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5 ++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5 ++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5 ++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5 ++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5 ++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5 ++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5 ++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5 ++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5 ++1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5 ++2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt ++2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt ++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5 ++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5 ++1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5 ++1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5 ++1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5 ++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5 ++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5 ++1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5 ++1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5 ++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5 ++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5 ++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5 ++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5 ++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5 ++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5 ++3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth ++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5 ++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5 ++3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ??? ++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5 ++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5 ++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5 ++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5 ++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5 ++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5 ++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5 ++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5 ++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5 ++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5 ++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5 ++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5 ++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5 ++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5 ++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5 ++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5 +diff --git a/pi-util/conf_h265.csv b/pi-util/conf_h265.csv +new file mode 100644 +index 000000000000..fc14f2a3c2bb +--- /dev/null ++++ b/pi-util/conf_h265.csv +@@ -0,0 +1,144 @@ ++1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5 ++1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5 ++1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5 ++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5 ++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5 ++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5 ++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5 ++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5 ++1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5 ++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5 ++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5 ++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5 ++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5 ++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5 ++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5 ++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5 ++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5 ++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5 ++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5 ++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5 ++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5 ++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5 ++1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5 ++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5 ++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5 ++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5 ++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5 ++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5 ++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5 ++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5 ++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5 ++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5 ++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5 ++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5 ++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5 ++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5 ++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5 ++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5 ++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5 ++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5 ++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5 ++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5 ++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5 ++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5 ++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5 ++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5 ++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5 ++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5 ++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5 ++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5 ++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5 ++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5 ++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5 ++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5 ++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5 ++1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5 ++1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5 ++1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5 ++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5 ++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5 ++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5 ++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5 ++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5 ++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5 ++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5 ++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5 ++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5 ++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5 ++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5 ++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5 ++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5 ++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5 ++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5 ++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5 ++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5 ++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5 ++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5 ++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5 ++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5 ++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5 ++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5 ++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5 ++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5 ++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5 ++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5 ++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5 ++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5 ++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5 ++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5 ++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5 ++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5 ++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5 ++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5 ++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5 ++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5 ++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5 ++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5 ++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5 ++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5 ++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5 ++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5 ++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5 ++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5 ++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5 ++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5 ++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5 ++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5 ++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5 ++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5 ++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5 ++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5 ++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5 ++1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5 ++1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5 ++1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5 ++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5 ++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5 ++1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5 ++1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5 ++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5 ++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5 ++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5 ++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5 ++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5 ++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5 ++0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched ++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5 ++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5 ++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5 ++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5 ++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5 ++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5 ++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5 ++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5 ++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5 ++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5 ++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5 ++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5 ++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5 ++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5 ++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5 ++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5 ++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5 ++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5 +diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh +new file mode 100755 +index 000000000000..0dbaa53e97e0 +--- /dev/null ++++ b/pi-util/conf_native.sh +@@ -0,0 +1,135 @@ ++echo "Configure for native build" ++ ++FFSRC=`pwd` ++MC=`dpkg --print-architecture` ++BUILDBASE=$FFSRC/out ++ ++#RPI_KEEPS="-save-temps=obj" ++RPI_KEEPS="" ++ ++NOSHARED= ++MMAL= ++USR_PREFIX= ++TOOLCHAIN= ++R=rel ++ ++while [ "$1" != "" ] ; do ++ case $1 in ++ --noshared) ++ NOSHARED=1 ++ ;; ++ --mmal) ++ MMAL=1 ++ ;; ++ --usr) ++ USR_PREFIX=/usr ++ ;; ++ --tsan) ++ TOOLCHAIN="--toolchain=gcc-tsan" ++ R=tsan ++ ;; ++ *) ++ echo "Usage $0: [--noshared] [--mmal] [--usr]" ++ echo " noshared Build static libs and executable - good for testing" ++ echo " mmal Build mmal decoders" ++ echo " usr Set install prefix to /usr [default=/install]" ++ exit 1 ++ ;; ++ esac ++ shift ++done ++ ++ ++MCOPTS= ++RPI_INCLUDES= ++RPI_LIBDIRS= ++RPI_DEFINES= ++RPI_EXTRALIBS= ++ ++# uname -m gives kernel type which may not have the same ++# 32/64bitness as userspace :-( getconf shoudl provide the answer ++# but use uname to check we are on the right processor ++MC=`uname -m` ++LB=`getconf LONG_BIT` ++if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then ++ if [ "$LB" == "32" ]; then ++ echo "M/C armv7" ++ A=arm-linux-gnueabihf ++ B=armv7 ++ MCOPTS="--arch=armv6t2 --cpu=cortex-a7" ++ RPI_DEFINES=-mfpu=neon-vfpv4 ++ elif [ "$LB" == "64" ]; then ++ echo "M/C aarch64" ++ A=aarch64-linux-gnu ++ B=arm64 ++ else ++ echo "Unknown LONG_BIT name: $LB" ++ exit 1 ++ fi ++else ++ echo "Unknown machine name: $MC" ++ exit 1 ++fi ++ ++if [ $MMAL ]; then ++ RPI_OPT_VC=/opt/vc ++ RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux" ++ RPI_LIBDIRS="-L$RPI_OPT_VC/lib" ++ RPI_DEFINES="$RPI_DEFINES -D__VCCOREVER__=0x4000000" ++ RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group" ++ RPIOPTS="--enable-mmal" ++else ++ RPIOPTS="--disable-mmal" ++fi ++ ++C=`lsb_release -sc` ++V=`cat RELEASE` ++ ++SHARED_LIBS="--enable-shared" ++if [ $NOSHARED ]; then ++ SHARED_LIBS="--disable-shared" ++ OUT=$BUILDBASE/$B-$C-$V-static-$R ++ echo Static libs ++else ++ echo Shared libs ++ OUT=$BUILDBASE/$B-$C-$V-shared-$R ++fi ++ ++if [ ! $USR_PREFIX ]; then ++ USR_PREFIX=$OUT/install ++fi ++LIB_PREFIX=$USR_PREFIX/lib/$A ++INC_PREFIX=$USR_PREFIX/include/$A ++ ++echo Destination directory: $OUT ++mkdir -p $OUT ++# Nothing under here need worry git - including this .gitignore! ++echo "**" > $BUILDBASE/.gitignore ++cd $OUT ++ ++$FFSRC/configure \ ++ --prefix=$USR_PREFIX\ ++ --libdir=$LIB_PREFIX\ ++ --incdir=$INC_PREFIX\ ++ $MCOPTS\ ++ $TOOLCHAIN\ ++ --disable-stripping\ ++ --disable-thumb\ ++ --enable-sand\ ++ --enable-v4l2-request\ ++ --enable-libdrm\ ++ --enable-vout-egl\ ++ --enable-vout-drm\ ++ --enable-gpl\ ++ $SHARED_LIBS\ ++ $RPIOPTS\ ++ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\ ++ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\ ++ --extra-ldflags="$RPI_LIBDIRS"\ ++ --extra-libs="$RPI_EXTRALIBS"\ ++ --extra-version="rpi" ++ ++echo "Configured into $OUT" ++ ++# gcc option for getting asm listing ++# -Wa,-ahls +diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py +new file mode 100755 +index 000000000000..573f1e03c0c0 +--- /dev/null ++++ b/pi-util/ffconf.py +@@ -0,0 +1,276 @@ ++#!/usr/bin/env python3 ++ ++import string ++import os ++import subprocess ++import re ++import argparse ++import sys ++import csv ++from stat import * ++ ++class DecodeType: ++ def __init__(self, textname, hwaccel): ++ self.textname = textname ++ self.hwaccel = hwaccel ++ ++hwaccel_rpi = DecodeType("RPI Test/Legacy", "rpi") ++hwaccel_sw = DecodeType("Software", None) ++hwaccel_drm = DecodeType("DRM Prime", "drm") ++hwaccel_vaapi = DecodeType("VAAPI", "vaapi") ++ ++def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, args): ++ ffmpeg_exec = args.ffmpeg ++ gen_yuv = args.gen_yuv ++ valgrind = args.valgrind ++ rv = 0 ++ ++ pix_fmt = [] ++ if pix == "8": ++ pix_fmt = ["-pix_fmt", "yuv420p"] ++ elif pix == "10": ++ pix_fmt = ["-pix_fmt", "yuv420p10le"] ++ elif pix == "12": ++ pix_fmt = ["-pix_fmt", "yuv420p12le"] ++ ++ tmp_root = "/tmp" ++ ++ names = srcname.split('/') ++ while len(names) > 1: ++ tmp_root = os.path.join(tmp_root, names[0]) ++ del names[0] ++ name = names[0] ++ ++ if not os.path.exists(tmp_root): ++ os.makedirs(tmp_root) ++ ++ dec_file = os.path.join(tmp_root, name + ".dec.md5") ++ try: ++ os.remove(dec_file) ++ except: ++ pass ++ ++ yuv_file = os.path.join(tmp_root, name + ".dec.yuv") ++ try: ++ os.remove(yuv_file) ++ except: ++ pass ++ ++ flog = open(os.path.join(tmp_root, name + ".log"), "w+t") ++ ++ ffargs = [ffmpeg_exec, "-flags", "unaligned"] +\ ++ (["-hwaccel", dectype.hwaccel] if dectype.hwaccel else []) +\ ++ ["-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] +\ ++ pix_fmt +\ ++ ([yuv_file] if gen_yuv else ["-f", "md5", dec_file]) ++ ++ if valgrind: ++ ffargs = ['valgrind', '--leak-check=full'] + ffargs ++ ++ # Unaligned needed for cropping conformance ++ rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT) ++ ++ if gen_yuv: ++ with open(dec_file, 'wt') as f: ++ subprocess.call(["md5sum", yuv_file], stdout=f, stderr=subprocess.STDOUT) ++ ++ try: ++ m1 = None ++ m2 = None ++ with open(os.path.join(fileroot, md5_file)) as f: ++ for line in f: ++ m1 = re.search("[0-9a-f]{32}", line.lower()) ++ if m1: ++ break ++ ++ with open(dec_file) as f: ++ m2 = re.search("[0-9a-f]{32}", f.readline()) ++ except: ++ pass ++ ++ if valgrind: ++ flog.seek(0) ++ leak = True ++ valerr = True ++ ++ for line in flog: ++ if re.search("^==[0-9]+== All heap blocks were freed", line): ++ leak = False ++ if re.search("^==[0-9]+== ERROR SUMMARY: 0 errors", line): ++ valerr = False ++ if leak or valerr: ++ rv = 4 ++ ++ if m1 and m2 and m1.group() == m2.group(): ++ print("Match: " + m1.group(), file=flog) ++ elif not m1: ++ print("****** Cannot find m1", file=flog) ++ rv = 3 ++ elif not m2: ++ print("****** Cannot find m2", file=flog) ++ rv = 2 ++ else: ++ print("****** Mismatch: " + m1.group() + " != " + m2.group(), file=flog) ++ rv = 1 ++ flog.close() ++ return rv ++ ++def scandir(root): ++ aconf = [] ++ ents = os.listdir(root) ++ ents.sort(key=str.lower) ++ for name in ents: ++ test_path = os.path.join(root, name) ++ if S_ISDIR(os.stat(test_path).st_mode): ++ files = os.listdir(test_path) ++ es_file = "?" ++ md5_file = "?" ++ for f in files: ++ (base, ext) = os.path.splitext(f) ++ if base[0] == '.': ++ pass ++ elif ext == ".bit" or ext == ".bin": ++ es_file = f ++ elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")): ++ if md5_file == "?": ++ md5_file = f ++ elif base[-3:] == "yuv": ++ md5_file = f ++ aconf.append((1, name, es_file, md5_file)) ++ return aconf ++ ++def runtest(name, tests): ++ if not tests: ++ return True ++ for t in tests: ++ if name[0:len(t)] == t or name.find("/" + t) != -1: ++ return True ++ return False ++ ++def doconf(csva, tests, test_root, vcodec, dectype, args): ++ unx_failures = [] ++ unx_success = [] ++ failures = 0 ++ successes = 0 ++ for a in csva: ++ exp_test = int(a[0]) ++ if (exp_test and runtest(a[1], tests)): ++ name = a[1] ++ print ("==== ", name, end="") ++ sys.stdout.flush() ++ ++ rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, args=args) ++ if (rv == 0): ++ successes += 1 ++ else: ++ failures += 1 ++ ++ if (rv == 0): ++ if exp_test == 2: ++ print(": * OK *") ++ unx_success.append(name) ++ else: ++ print(": ok") ++ elif exp_test == 2 and rv == 1: ++ print(": fail") ++ elif exp_test == 3 and rv == 2: ++ # Call an expected "crash" an abort ++ print(": abort") ++ else: ++ unx_failures.append(name) ++ if rv == 1: ++ print(": * FAIL *") ++ elif (rv == 2) : ++ print(": * CRASH *") ++ elif (rv == 3) : ++ print(": * MD5 MISSING *") ++ elif (rv == 4) : ++ print(": * VALGRIND *") ++ else : ++ print(": * BANG *") ++ ++ print() ++ print("Tested using decode type:", dectype.textname) ++ if unx_failures or unx_success: ++ print("Unexpected Failures:", unx_failures) ++ print("Unexpected Success: ", unx_success) ++ else: ++ print("All tests normal:", successes, "ok,", failures, "failed") ++ ++ return unx_failures + unx_success ++ ++ ++class ConfCSVDialect(csv.Dialect): ++ delimiter = ',' ++ doublequote = True ++ lineterminator = '\n' ++ quotechar='"' ++ quoting = csv.QUOTE_MINIMAL ++ skipinitialspace = True ++ strict = True ++ ++ ++ ++if __name__ == '__main__': ++ ++ argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester") ++ argp.add_argument("tests", nargs='*') ++ argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line") ++ argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line") ++ argp.add_argument("--sw", action='store_true', help="Use software decode") ++ argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line") ++ argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test") ++ argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir") ++ argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename") ++ argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use") ++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name; if directory given use /ffmpeg") ++ argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests") ++ argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)") ++ argp.add_argument("--loop", default=0, type=int, help="Loop n times, or until unexpected result") ++ args = argp.parse_args() ++ ++ if not os.path.isdir(args.test_root): ++ print("Test root dir '%s' not found" % args.test_root) ++ exit(1) ++ ++ if args.csvgen: ++ csv.writer(sys.stdout).writerows(scandir(args.test_root)) ++ exit(0) ++ ++ with open(args.csv, 'rt') as csvfile: ++ csva = [a for a in csv.reader(csvfile, ConfCSVDialect())] ++ ++ dectype = None ++ if os.path.exists("/dev/rpivid-hevcmem"): ++ dectype = hwaccel_rpi ++ if os.path.exists("/sys/module/rpivid_hevc"): ++ dectype = hwaccel_drm ++ ++ if args.pi4: ++ dectype = hwaccel_rpi ++ elif args.drm: ++ dectype = hwaccel_drm ++ elif args.vaapi: ++ dectype = hwaccel_vaapi ++ elif args.sw: ++ dectype = hwaccel_sw ++ ++ if os.path.isdir(args.ffmpeg): ++ args.ffmpeg = os.path.join(args.ffmpeg, "ffmpeg") ++ if not os.path.isfile(args.ffmpeg): ++ print("FFmpeg file '%s' not found" % args.ffmpeg) ++ exit(1) ++ ++ if not dectype: ++ print("No decode type selected and no h/w detected") ++ exit(1) ++ print("Running test using decode:", dectype.textname) ++ ++ i = 0 ++ while True: ++ i = i + 1 ++ if args.loop: ++ print("== Loop ", i) ++ if doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args) or (args.loop >= 0 and i > args.loop): ++ break ++ +diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py +new file mode 100755 +index 000000000000..767efe2de2fa +--- /dev/null ++++ b/pi-util/ffperf.py +@@ -0,0 +1,140 @@ ++#!/usr/bin/env python3 ++ ++import shlex ++import time ++import string ++import os ++import tempfile ++import subprocess ++import re ++import argparse ++import sys ++import csv ++from stat import * ++ ++class tstats: ++ close_threshold = 0.01 ++ ++ def __init__(self, stats_dict=None): ++ if stats_dict != None: ++ self.name = stats_dict["name"] ++ self.elapsed = float(stats_dict["elapsed"]) ++ self.user = float(stats_dict["user"]) ++ self.sys = float(stats_dict["sys"]) ++ ++ def times_str(self): ++ ctime = self.sys + self.user ++ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) ++ ++ def dict(self): ++ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} ++ ++ def is_close(self, other): ++ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold ++ ++ def __lt__(self, other): ++ return self.elapsed < other.elapsed ++ def __gt__(self, other): ++ return self.elapsed > other.elapsed ++ ++ def time_file(name, prefix, args): ++ cmdargs = [args.ffmpeg] ++ for x in args.args : ++ if x == '{INPUT}': ++ cmdargs.append(prefix + name) ++ elif x == '{NULL}': ++ cmdargs.append(os.devnull) ++ else: ++ cmdargs.append(x) ++ ++ stats = tstats() ++ stats.name = name ++ start_time = time.clock_gettime(time.CLOCK_MONOTONIC); ++ cproc = subprocess.Popen(cmdargs, bufsize=-1, stdout=flog, stderr=flog); ++ pinfo = os.wait4(cproc.pid, 0) ++ end_time = time.clock_gettime(time.CLOCK_MONOTONIC); ++ stats.elapsed = end_time - start_time ++ stats.user = pinfo[2].ru_utime ++ stats.sys = pinfo[2].ru_stime ++ return stats ++ ++ ++def common_prefix(s1, s2): ++ for i in range(min(len(s1),len(s2))): ++ if s1[i] != s2[i]: ++ return s1[:i] ++ return s1[:i+1] ++ ++def main(): ++ global flog ++ ++ argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog=""" ++To blank the screen before starting use "xdg-screensaver activate" ++(For some reason this doesn't seem to work from within python). ++""") ++ ++ argp.add_argument("streams", nargs='*') ++ argp.add_argument("--args", default='-t 30 -i {INPUT} -f null {NULL}', help=""" ++ffmpeg arguments, default='-t 30 -i {INPUT} -f null {NULL}'; ++ {INPUT} is replaced by current inputfile path; ++ {NULL} is replaced by the system null device""") ++ argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename") ++ argp.add_argument("--csv_in", help="CSV input filename") ++ argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).") ++ argp.add_argument("--repeat", default=3, type=int, help="Run repeat count") ++ argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable") ++ ++ args = argp.parse_args() ++ args.args = shlex.split(args.args) ++ ++ csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"]) ++ csv_out.writeheader() ++ ++ stats_in = {} ++ if args.csv_in != None: ++ with open(args.csv_in, 'r', newline='') as f_in: ++ stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} ++ ++ flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt") ++ ++ streams = args.streams ++ if not streams: ++ if not stats_in: ++ print ("No source streams specified") ++ return 1 ++ prefix = "" if args.prefix == None else args.prefix ++ streams = [k for k in stats_in] ++ elif args.prefix != None: ++ prefix = args.prefix ++ else: ++ prefix = streams[0] ++ for f in streams[1:]: ++ prefix = common_prefix(prefix, f) ++ pp = prefix.rpartition(os.sep) ++ prefix = pp[0] + pp[1] ++ streams = [s[len(prefix):] for s in streams] ++ ++ for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()): ++ print ("====", f) ++ ++ t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999}) ++ for i in range(args.repeat): ++ t = tstats.time_file(f, prefix, args) ++ print ("...", t.times_str()) ++ if t0 > t: ++ t0 = t ++ ++ if t0.name in stats_in: ++ pstat = stats_in[t0.name] ++ print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str()) ++ ++ csv_out.writerow(t0.dict()) ++ ++ print () ++ ++ return 0 ++ ++ ++if __name__ == '__main__': ++ exit(main()) ++ +diff --git a/pi-util/genpatch.sh b/pi-util/genpatch.sh +new file mode 100755 +index 000000000000..0948a68a7ad7 +--- /dev/null ++++ b/pi-util/genpatch.sh +@@ -0,0 +1,35 @@ ++set -e ++ ++NOPATCH= ++if [ "$1" == "--notag" ]; then ++ shift ++ NOPATCH=1 ++fi ++ ++if [ "$1" == "" ]; then ++ echo Usage: $0 [--notag] \ ++ echo e.g.: $0 mmal_4 ++ exit 1 ++fi ++ ++VERSION=`cat RELEASE` ++if [ "$VERSION" == "" ]; then ++ echo Can\'t find version RELEASE ++ exit 1 ++fi ++ ++PATCHFILE=../ffmpeg-$VERSION-$1.patch ++ ++if [ $NOPATCH ]; then ++ echo Not tagged ++else ++ # Only continue if we are all comitted ++ git diff --name-status --exit-code ++ ++ PATCHTAG=pi/$VERSION/$1 ++ echo Tagging: $PATCHTAG ++ ++ git tag $PATCHTAG ++fi ++echo Generating patch: $PATCHFILE ++git diff n$VERSION -- > $PATCHFILE +diff --git a/pi-util/make_array.py b/pi-util/make_array.py +new file mode 100755 +index 000000000000..67b22d2d517f +--- /dev/null ++++ b/pi-util/make_array.py +@@ -0,0 +1,23 @@ ++#!/usr/bin/env python ++ ++# Usage ++# make_array file.bin ++# Produces file.h with array of bytes. ++# ++import sys ++for file in sys.argv[1:]: ++ prefix,suffix = file.split('.') ++ assert suffix=='bin' ++ name=prefix.split('/')[-1] ++ print 'Converting',file ++ with open(prefix+'.h','wb') as out: ++ print >>out, 'static const unsigned char',name,'[] = {' ++ with open(file,'rb') as fd: ++ i = 0 ++ for byte in fd.read(): ++ print >>out, '0x%02x, ' % ord(byte), ++ i = i + 1 ++ if i % 8 == 0: ++ print >>out, ' // %04x' % (i - 8) ++ print >>out,'};' ++ +diff --git a/pi-util/mkinst.sh b/pi-util/mkinst.sh +new file mode 100755 +index 000000000000..271a39e8460a +--- /dev/null ++++ b/pi-util/mkinst.sh +@@ -0,0 +1,5 @@ ++set -e ++ ++make install ++ ++cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr +diff --git a/pi-util/patkodi.sh b/pi-util/patkodi.sh +new file mode 100644 +index 000000000000..dcd05a606e85 +--- /dev/null ++++ b/pi-util/patkodi.sh +@@ -0,0 +1,9 @@ ++set -e ++KODIBASE=/home/jc/rpi/kodi/xbmc ++JOBS=-j20 ++make $JOBS ++git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch ++make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS ++make -C $KODIBASE/build install ++ ++ +diff --git a/pi-util/perfcmp.py b/pi-util/perfcmp.py +new file mode 100755 +index 000000000000..e44cfa0c3c4d +--- /dev/null ++++ b/pi-util/perfcmp.py +@@ -0,0 +1,101 @@ ++#!/usr/bin/env python3 ++ ++import time ++import string ++import os ++import tempfile ++import subprocess ++import re ++import argparse ++import sys ++import csv ++from stat import * ++ ++class tstats: ++ close_threshold = 0.01 ++ ++ def __init__(self, stats_dict=None): ++ if stats_dict != None: ++ self.name = stats_dict["name"] ++ self.elapsed = float(stats_dict["elapsed"]) ++ self.user = float(stats_dict["user"]) ++ self.sys = float(stats_dict["sys"]) ++ ++ def times_str(self): ++ ctime = self.sys + self.user ++ return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed) ++ ++ def dict(self): ++ return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys} ++ ++ def is_close(self, other): ++ return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold ++ ++ def __lt__(self, other): ++ return self.elapsed < other.elapsed ++ def __gt__(self, other): ++ return self.elapsed > other.elapsed ++ ++ def time_file(name, prefix): ++ stats = tstats() ++ stats.name = name ++ start_time = time.clock_gettime(time.CLOCK_MONOTONIC); ++ cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name, ++ "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog); ++ pinfo = os.wait4(cproc.pid, 0) ++ end_time = time.clock_gettime(time.CLOCK_MONOTONIC); ++ stats.elapsed = end_time - start_time ++ stats.user = pinfo[2].ru_utime ++ stats.sys = pinfo[2].ru_stime ++ return stats ++ ++ ++def common_prefix(s1, s2): ++ for i in range(min(len(s1),len(s2))): ++ if s1[i] != s2[i]: ++ return s1[:i] ++ return s1[:i+1] ++ ++def main(): ++ argp = argparse.ArgumentParser(description="FFmpeg performance compare") ++ ++ argp.add_argument("stream0", help="CSV to compare") ++ argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare") ++ ++ args = argp.parse_args() ++ ++ with open(args.stream0, 'r', newline='') as f_in: ++ stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} ++ with open(args.stream1, 'r', newline='') as f_in: ++ stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)} ++ ++ print (args.stream0, "<<-->>", args.stream1) ++ print () ++ ++ for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()): ++ if not (f in stats0) : ++ print (" XX :", f) ++ continue ++ if not (f in stats1) : ++ print (" XX :", f) ++ continue ++ ++ s0 = stats0[f] ++ s1 = stats1[f] ++ ++ pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0 ++ thresh = 0.3 ++ tc = 6 ++ ++ nchar = min(tc - 1, int(abs(pcent) / thresh)) ++ cc = " -- " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar ++ ++ print ("%6.2f %s%6.2f (%+5.2f) : %s" % ++ (s0.elapsed, cc, s1.elapsed, pcent, f)) ++ ++ return 0 ++ ++ ++if __name__ == '__main__': ++ exit(main()) ++ +diff --git a/pi-util/qem.sh b/pi-util/qem.sh +new file mode 100755 +index 000000000000..a4dbb6eacd18 +--- /dev/null ++++ b/pi-util/qem.sh +@@ -0,0 +1,9 @@ ++TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex ++QASM=python\ ../local/bin/qasm.py ++SRC_FILE=libavcodec/rpi_hevc_shader.qasm ++DST_BASE=shader ++ ++cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR ++$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c ++$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h ++ diff --git a/pi-util/testfilt.py b/pi-util/testfilt.py new file mode 100755 index 000000000000..b322dac0c22d @@ -31903,3730 +23289,148 @@ index 000000000000..b322dac0c22d +# "/home/jc/rpi/streams/jellyfish-3-mbps-hd-h264.mkv", + "-c:v", "h264_v4l2m2m", "-b:v", "2M"], ".mkv", + [valid_regex(r'Output stream #0:0 \(video\): 900 frames encoded; 900 packets muxed')]) - -From 2ac054adfa1e9ebece8a9594ac37b61ccff7e440 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 5 Jan 2023 14:39:30 +0000 -Subject: [PATCH 111/186] pixfmt: Add a #define to indicate presence of SAND - formats - ---- - libavutil/pixfmt.h | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h -index 22f70007c3df..5cc780e7d56c 100644 ---- a/libavutil/pixfmt.h -+++ b/libavutil/pixfmt.h -@@ -378,6 +378,8 @@ enum AVPixelFormat { - AV_PIX_FMT_Y210BE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian - AV_PIX_FMT_Y210LE, ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian - // RPI - not on ifdef so can be got at by calling progs -+// #define so code that uses this can know it is there -+#define AVUTIL_HAVE_PIX_FMT_SAND 1 - AV_PIX_FMT_SAND128, ///< 4:2:0 8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding - AV_PIX_FMT_SAND64_10, ///< 4:2:0 10-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding - AV_PIX_FMT_SAND64_16, ///< 4:2:0 16-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding - -From 426d93c7bd910d9222a5cbeb011ede5d9890dcbf Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 11 Jan 2023 16:30:37 +0000 -Subject: [PATCH 112/186] v4l2_m2m_dec: Fix initial pkt send if no extradata - ---- - libavcodec/v4l2_m2m_dec.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 4d170572980e..9daf05adfe74 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -240,7 +240,7 @@ copy_extradata(AVCodecContext * const avctx, - else - len = src_len < 0 ? AVERROR(EINVAL) : src_len; - -- // Zero length is OK but we swant to stop - -ve is error val -+ // Zero length is OK but we want to stop - -ve is error val - if (len <= 0) - return len; - -@@ -525,7 +525,7 @@ static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const - - if (s->extdata_sent) - ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); -- else if (s->extdata_data) -+ else - ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); - - if (ret == AVERROR(EAGAIN)) { - -From da6cd7985ffa515607e68116aa923fda23a40beb Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 16 Jan 2023 16:05:09 +0000 -Subject: [PATCH 113/186] v4l2m2m_dec: Make capture timeout long once pending - count > 31 - -For some applications (ffmpeg command line) the current heuristic of adding -a short timeout and preferring DQ over Q once we think we have buffers -Qed in V4L2 is insufficient to prevent arbitrary buffer growth. -Unfortunately the current method of guessing the number of Qed buffers isn't -reliable enough to allow for a long timeout with only a few few buffers -believed pending so only do so once the number of buffers believed pending -exceeds plausible inaccuracies caused by buffer reordering. - -The limit could be optimised by codec or apparent latency but a simple -number should reduce the unexpected consequences. ---- - libavcodec/v4l2_m2m.h | 3 ++- - libavcodec/v4l2_m2m_dec.c | 18 ++++++++++++++---- - 2 files changed, 16 insertions(+), 5 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 0f41f94694d3..ded1478a49da 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -66,7 +66,7 @@ typedef struct pts_stats_s - - typedef struct xlat_track_s { - unsigned int track_no; -- int64_t last_pts; -+ int64_t last_pts; // Last valid PTS decoded - int64_t last_opaque; - V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; - } xlat_track_t; -@@ -88,6 +88,7 @@ typedef struct V4L2m2mContext { - - /* null frame/packet received */ - int draining; -+ int running; - AVPacket buf_pkt; - - /* Reference to a frame. Only used during encoding */ -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 9daf05adfe74..c8ab883d7ef2 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -582,7 +582,7 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - - do { - const int pending = xlat_pending(&s->xlat); -- const int prefer_dq = (pending > 3); -+ const int prefer_dq = (pending > 4); - const int last_src_rv = src_rv; - - av_log(avctx, AV_LOG_TRACE, "Pending=%d, src_rv=%d, req_pkt=%d\n", pending, src_rv, s->req_pkt); -@@ -611,10 +611,14 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - // (b) enqueue returned a status indicating that decode should be attempted - if (dst_rv != 0 && TRY_DQ(src_rv)) { - // Pick a timeout depending on state -+ // The pending count isn't completely reliable so it is good enough -+ // hint that we want a frame but not good enough to require it in -+ // all cases; however if it has got > 31 that exceeds its margin of -+ // error so require a frame to prevent ridiculous levels of latency - const int t = - src_rv == NQ_Q_FULL ? -1 : - src_rv == NQ_DRAINING ? 300 : -- prefer_dq ? 5 : 0; -+ prefer_dq ? (s->running && pending > 31 ? 100 : 5) : 0; - - // Dequeue frame will unref any previous contents of frame - // if it returns success so we don't need an explicit unref -@@ -631,8 +635,13 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - } - } - -- if (dst_rv == 0) -+ if (dst_rv == 0) { - set_best_effort_pts(avctx, &s->pts_stat, frame); -+ if (!s->running) { -+ s->running = 1; -+ av_log(avctx, AV_LOG_VERBOSE, "Decode running\n"); -+ } -+ } - - if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { - av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); -@@ -998,7 +1007,8 @@ static void v4l2_decode_flush(AVCodecContext *avctx) - - // resend extradata - s->extdata_sent = 0; -- // clear EOS status vars -+ // clear status vars -+ s->running = 0; - s->draining = 0; - output->done = 0; - capture->done = 0; - -From 58854764f365ac020b2d353f1db6b4d7ffa099a4 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 6 Feb 2023 19:23:16 +0000 -Subject: [PATCH 114/186] Initial buffersink alloc callback code - -(cherry picked from commit dde8d3c8f3cc279b9b92ed4f10a2e3990f4aadeb) ---- - libavfilter/buffersink.c | 44 ++++++++++++++++++++++++++++++++++++++++ - libavfilter/buffersink.h | 3 +++ - 2 files changed, 47 insertions(+) - -diff --git a/libavfilter/buffersink.c b/libavfilter/buffersink.c -index 9ab83696ce1b..837579946d65 100644 ---- a/libavfilter/buffersink.c -+++ b/libavfilter/buffersink.c -@@ -62,6 +62,11 @@ typedef struct BufferSinkContext { - int sample_rates_size; - - AVFrame *peeked_frame; +diff --git a/pi-util/v3dusage.py b/pi-util/v3dusage.py +new file mode 100755 +index 000000000000..5935a11ca553 +--- /dev/null ++++ b/pi-util/v3dusage.py +@@ -0,0 +1,128 @@ ++#!/usr/bin/env python ++ ++import sys ++import argparse ++import re ++ ++def do_logparse(logname): ++ ++ rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ') ++ rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$') ++ rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$') ++ rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$') ++ ++ ttotal = {'idle':0.0} ++ tstart = {} ++ qctotal = {} ++ qtstotal = {} ++ l2hits = {} ++ l2total = {} ++ time0 = None ++ idle_start = None ++ qpu_op_no = 0 ++ op_count = 0 ++ ++ with open(logname, "rt") as infile: ++ for line in infile: ++ match = rmatch.match(line) ++ if match: ++# print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":" ++ time = float(match.group(1)) ++ unit = match.group(3) ++ opstart = not match.group(2) ++ optype = match.group(7) ++ hascb = match.group(8) != "0" ++ ++ if unit == 'qpu1': ++ unit = unit + "." + str(qpu_op_no) ++ if not opstart: ++ if hascb or optype == 'EXECUTE_SYNC': ++ qpu_op_no = 0 ++ else: ++ qpu_op_no += 1 ++ ++ # Ignore sync type ++ if optype == 'EXECUTE_SYNC': ++ continue ++ ++ if not time0: ++ time0 = time ++ ++ if opstart: ++ tstart[unit] = time; ++ elif unit in tstart: ++ op_count += 1 ++ if not unit in ttotal: ++ ttotal[unit] = 0.0 ++ ttotal[unit] += time - tstart[unit] ++ del tstart[unit] ++ ++ if not idle_start and not tstart: ++ idle_start = time ++ elif idle_start and tstart: ++ ttotal['idle'] += time - idle_start ++ idle_start = None ++ ++ match = rqcycle.match(line) ++ if match: ++ unit = "qpu1." + str(qpu_op_no) ++ if not unit in qctotal: ++ qctotal[unit] = 0 ++ qctotal[unit] += int(match.group(2)) ++ ++ match = rqtscycle.match(line) ++ if match: ++ unit = "qpu1." + str(qpu_op_no) ++ if not unit in qtstotal: ++ qtstotal[unit] = 0 ++ qtstotal[unit] += int(match.group(2)) ++ ++ match = rl2hits.match(line) ++ if match: ++ unit = "qpu1." + str(qpu_op_no) ++ if not unit in l2total: ++ l2total[unit] = 0 ++ l2hits[unit] = 0 ++ l2total[unit] += int(match.group(3)) ++ if match.group(2) == "hits": ++ l2hits[unit] += int(match.group(3)) ++ ++ ++ if not time0: ++ print "No v3d profile records found" ++ else: ++ tlogged = time - time0 ++ ++ print "Logged time:", tlogged, " Op count:", op_count ++ for unit in sorted(ttotal): ++ print b'%6s: %10.3f %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged) ++ print ++ for unit in sorted(qctotal): ++ if not unit in qtstotal: ++ qtstotal[unit] = 0; ++ print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit]) ++ if unit in l2total: ++ print b' L2Total: %10d, hits: %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit]) ++ ++ ++ ++if __name__ == '__main__': ++ argp = argparse.ArgumentParser( ++ formatter_class=argparse.RawDescriptionHelpFormatter, ++ description="QPU/VPU perf summary from VC logging", ++ epilog = """ ++Will also summarise TMU stalls if logging requests set in qpu noflush param ++in the profiled code. ++ ++Example use: ++ vcgencmd set_logging level=0xc0 ++ ++ sudo vcdbg log msg >& t.log ++ v3dusage.py t.log ++""") ++ ++ argp.add_argument("logfile") ++ args = argp.parse_args() ++ ++ do_logparse(args.logfile) + -+ union { -+ av_buffersink_alloc_video_frame * video; -+ } alloc_cb; -+ void * alloc_v; - } BufferSinkContext; - - #define NB_ITEMS(list) (list ## _size / sizeof(*list)) -@@ -154,6 +159,44 @@ int attribute_align_arg av_buffersink_get_samples(AVFilterContext *ctx, - return get_frame_internal(ctx, frame, 0, nb_samples); - } - -+static AVFrame * alloc_video_buffer(AVFilterLink *link, int w, int h) -+{ -+ AVFilterContext * const ctx = link->dst; -+ BufferSinkContext * const bs = ctx->priv; -+ return bs->alloc_cb.video ? bs->alloc_cb.video(ctx, bs->alloc_v, w, h) : -+ ff_default_get_video_buffer(link, w, h); -+} -+ -+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v) -+{ -+ BufferSinkContext * const bs = ctx->priv; -+ bs->alloc_cb.video = cb; -+ bs->alloc_v = v; -+ return 0; -+} -+ -+#if FF_API_BUFFERSINK_ALLOC -+AVBufferSinkParams *av_buffersink_params_alloc(void) -+{ -+ static const int pixel_fmts[] = { AV_PIX_FMT_NONE }; -+ AVBufferSinkParams *params = av_malloc(sizeof(AVBufferSinkParams)); -+ if (!params) -+ return NULL; -+ -+ params->pixel_fmts = pixel_fmts; -+ return params; -+} -+ -+AVABufferSinkParams *av_abuffersink_params_alloc(void) -+{ -+ AVABufferSinkParams *params = av_mallocz(sizeof(AVABufferSinkParams)); -+ -+ if (!params) -+ return NULL; -+ return params; -+} -+#endif -+ - static av_cold int common_init(AVFilterContext *ctx) - { - BufferSinkContext *buf = ctx->priv; -@@ -381,6 +424,7 @@ static const AVFilterPad avfilter_vsink_buffer_inputs[] = { - { - .name = "default", - .type = AVMEDIA_TYPE_VIDEO, -+ .get_buffer = {.video = alloc_video_buffer}, - }, - }; - -diff --git a/libavfilter/buffersink.h b/libavfilter/buffersink.h -index 64e08de53ee5..09737d322fb7 100644 ---- a/libavfilter/buffersink.h -+++ b/libavfilter/buffersink.h -@@ -166,6 +166,9 @@ int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame); - */ - int av_buffersink_get_samples(AVFilterContext *ctx, AVFrame *frame, int nb_samples); - -+typedef AVFrame * av_buffersink_alloc_video_frame(AVFilterContext * ctx, void * v, int w, int h); -+int av_buffersink_set_alloc_video_frame(AVFilterContext *ctx, av_buffersink_alloc_video_frame * cb, void * v); -+ - /** - * @} - */ - -From d6e844180b9f50a33b837ec5cc897d5387edf2be Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 30 Jan 2023 17:23:12 +0000 -Subject: [PATCH 115/186] v4l2_m2m_dec: Add a profile check - -Check the profile in avctx aginst what the v4l2 driver advertises. If -the driver doesn't support the check then just accept anything. - -(cherry picked from commit 6dd83dead9ebce419fdea152db0c9f5e9a94e9ef) ---- - libavcodec/v4l2_m2m_dec.c | 125 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 125 insertions(+) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index c8ab883d7ef2..098adf4821eb 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -715,6 +715,127 @@ static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - } - #endif - -+static uint32_t -+avprofile_to_v4l2(const enum AVCodecID codec_id, const int avprofile) -+{ -+ switch (codec_id) { -+ case AV_CODEC_ID_H264: -+ switch (avprofile) { -+ case FF_PROFILE_H264_BASELINE: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE; -+ case FF_PROFILE_H264_CONSTRAINED_BASELINE: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE; -+ case FF_PROFILE_H264_MAIN: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_MAIN; -+ case FF_PROFILE_H264_EXTENDED: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED; -+ case FF_PROFILE_H264_HIGH: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH; -+ case FF_PROFILE_H264_HIGH_10: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10; -+ case FF_PROFILE_H264_HIGH_10_INTRA: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA; -+ case FF_PROFILE_H264_MULTIVIEW_HIGH: -+ case FF_PROFILE_H264_HIGH_422: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422; -+ case FF_PROFILE_H264_HIGH_422_INTRA: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA; -+ case FF_PROFILE_H264_STEREO_HIGH: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH; -+ case FF_PROFILE_H264_HIGH_444_PREDICTIVE: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE; -+ case FF_PROFILE_H264_HIGH_444_INTRA: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA; -+ case FF_PROFILE_H264_CAVLC_444: -+ return V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA; -+ case FF_PROFILE_H264_HIGH_444: -+ default: -+ break; -+// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE = 12, -+// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH = 13, -+// V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA = 14, -+// V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH = 16, -+// V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_HIGH = 17, -+ } -+ break; -+ case AV_CODEC_ID_MPEG2VIDEO: -+ case AV_CODEC_ID_MPEG4: -+ case AV_CODEC_ID_VC1: -+ case AV_CODEC_ID_VP8: -+ case AV_CODEC_ID_VP9: -+ case AV_CODEC_ID_AV1: -+ // Most profiles are a simple number that matches the V4L2 enum -+ return avprofile; -+ default: -+ break; -+ } -+ return ~(uint32_t)0; -+} -+ -+// This check mirrors Chrome's profile check by testing to see if the profile -+// exists as a possible value for the V4L2 profile control -+static int -+check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s) -+{ -+ struct v4l2_queryctrl query_ctrl; -+ struct v4l2_querymenu query_menu; -+ uint32_t profile_id; -+ -+ // An unset profile is almost certainly zero or -99 - do not reject -+ if (avctx->profile <= 0) { -+ av_log(avctx, AV_LOG_VERBOSE, "Profile <= 0 - check skipped\n"); -+ return 0; -+ } -+ -+ memset(&query_ctrl, 0, sizeof(query_ctrl)); -+ switch (avctx->codec_id) { -+ case AV_CODEC_ID_MPEG2VIDEO: -+ profile_id = V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE; -+ break; -+ case AV_CODEC_ID_MPEG4: -+ profile_id = V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE; -+ break; -+ case AV_CODEC_ID_H264: -+ profile_id = V4L2_CID_MPEG_VIDEO_H264_PROFILE; -+ break; -+ case AV_CODEC_ID_VP8: -+ profile_id = V4L2_CID_MPEG_VIDEO_VP8_PROFILE; -+ break; -+ case AV_CODEC_ID_VP9: -+ profile_id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE; -+ break; -+#ifdef V4L2_CID_MPEG_VIDEO_AV1_PROFILE -+ case AV_CODEC_ID_AV1: -+ profile_id = V4L2_CID_MPEG_VIDEO_AV1_PROFILE; -+ break; -+#endif -+ default: -+ av_log(avctx, AV_LOG_VERBOSE, "Can't map profile for codec id %d; profile check skipped\n", avctx->codec_id); -+ return 0; -+ } -+ -+ query_ctrl = (struct v4l2_queryctrl){.id = profile_id}; -+ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &query_ctrl) != 0) { -+ av_log(avctx, AV_LOG_VERBOSE, "Query profile ctrl (%#x) not supported: assume OK\n", query_ctrl.id); -+ } -+ else { -+ av_log(avctx, AV_LOG_DEBUG, "%s: Control supported: %#x\n", __func__, query_ctrl.id); -+ -+ query_menu = (struct v4l2_querymenu){ -+ .id = query_ctrl.id, -+ .index = avprofile_to_v4l2(avctx->codec_id, avctx->profile), -+ }; -+ -+ if (query_menu.index > query_ctrl.maximum || -+ query_menu.index < query_ctrl.minimum || -+ ioctl(s->fd, VIDIOC_QUERYMENU, &query_menu) != 0) { -+ return AVERROR(ENOENT); -+ } -+ } -+ -+ return 0; -+}; -+ - static int - check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) - { -@@ -955,6 +1076,10 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - if ((ret = check_size(avctx, s)) != 0) - return ret; - -+ if ((ret = check_profile(avctx, s)) != 0) { -+ av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile); -+ return ret; -+ } - return 0; - } - - -From d0992e458d9017cbb0383961b92589a015337aa3 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 1 Feb 2023 17:24:39 +0000 -Subject: [PATCH 116/186] v4l2_m2m_dec: Add extradata parse for h264 & hevc - -If we have extradata we can extract profile & level and potentailly -other useful info from it. Use the codec parser to get it if the decoder -is configured. - -(cherry picked from commit 6d431e79adeb246c2ed8cebce9011d81175a3906) ---- - libavcodec/v4l2_m2m_dec.c | 84 ++++++++++++++++++++++++++++++++++++++- - 1 file changed, 83 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 098adf4821eb..e64bc707d3c6 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -21,6 +21,8 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -+#include "config.h" -+ - #include - #include - -@@ -43,6 +45,13 @@ - #include "v4l2_fmt.h" - #include "v4l2_req_dmabufs.h" - -+#if CONFIG_H264_DECODER -+#include "h264_parse.h" -+#endif -+#if CONFIG_HEVC_DECODER -+#include "hevc_parse.h" -+#endif -+ - // Pick 64 for max last count - that is >1sec at 60fps - #define STATS_LAST_COUNT_MAX 64 - #define STATS_INTERVAL_MAX (1 << 30) -@@ -956,6 +965,78 @@ static uint32_t max_coded_size(const AVCodecContext * const avctx) - return size + (1 << 16); - } - -+static void -+parse_extradata(AVCodecContext *avctx) -+{ -+ if (!avctx->extradata || !avctx->extradata_size) -+ return; -+ -+ switch (avctx->codec_id) { -+#if CONFIG_H264_DECODER -+ case AV_CODEC_ID_H264: -+ { -+ H264ParamSets ps = {{NULL}}; -+ int is_avc = 0; -+ int nal_length_size = 0; -+ int ret; -+ -+ ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size, -+ &ps, &is_avc, &nal_length_size, -+ avctx->err_recognition, avctx); -+ if (ret > 0) { -+ const SPS * sps = NULL; -+ unsigned int i; -+ for (i = 0; i != MAX_SPS_COUNT; ++i) { -+ if (ps.sps_list[i]) { -+ sps = (const SPS *)ps.sps_list[i]->data; -+ break; -+ } -+ } -+ if (sps) { -+ avctx->profile = ff_h264_get_profile(sps); -+ avctx->level = sps->level_idc; -+ } -+ } -+ ff_h264_ps_uninit(&ps); -+ break; -+ } -+#endif -+#if CONFIG_HEVC_DECODER -+ case AV_CODEC_ID_HEVC: -+ { -+ HEVCParamSets ps = {{NULL}}; -+ HEVCSEI sei = {{{{0}}}}; -+ int is_nalff = 0; -+ int nal_length_size = 0; -+ int ret; -+ -+ ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, -+ &ps, &sei, &is_nalff, &nal_length_size, -+ avctx->err_recognition, 0, avctx); -+ if (ret > 0) { -+ const HEVCSPS * sps = NULL; -+ unsigned int i; -+ for (i = 0; i != HEVC_MAX_SPS_COUNT; ++i) { -+ if (ps.sps_list[i]) { -+ sps = (const HEVCSPS *)ps.sps_list[i]->data; -+ break; -+ } -+ } -+ if (sps) { -+ avctx->profile = sps->ptl.general_ptl.profile_idc; -+ avctx->level = sps->ptl.general_ptl.level_idc; -+ } -+ } -+ ff_hevc_ps_uninit(&ps); -+ ff_hevc_reset_sei(&sei); -+ break; -+ } -+#endif -+ default: -+ break; -+ } -+} -+ - static av_cold int v4l2_decode_init(AVCodecContext *avctx) - { - V4L2Context *capture, *output; -@@ -976,7 +1057,8 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - avctx->ticks_per_frame = 2; - } - -- av_log(avctx, AV_LOG_INFO, "level=%d\n", avctx->level); -+ parse_extradata(avctx); -+ - ret = ff_v4l2_m2m_create_context(priv, &s); - if (ret < 0) - return ret; - -From 7753c3a64821de5104f46068e9753d7ac86b8a5d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 20 Mar 2023 18:12:51 +0000 -Subject: [PATCH 117/186] clean_usr_libs: Now wipes the include files too - -When swapping ffmpeg versions obsolete makefiles could confuse -configure utilities. ---- - pi-util/clean_usr_libs.sh | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/pi-util/clean_usr_libs.sh b/pi-util/clean_usr_libs.sh -index b3b2d5509de0..01bd6a6a2254 100755 ---- a/pi-util/clean_usr_libs.sh -+++ b/pi-util/clean_usr_libs.sh -@@ -1,4 +1,20 @@ - set -e -+U=/usr/include/arm-linux-gnueabihf -+rm -rf $U/libavcodec -+rm -rf $U/libavdevice -+rm -rf $U/libavfilter -+rm -rf $U/libavformat -+rm -rf $U/libavutil -+rm -rf $U/libswresample -+rm -rf $U/libswscale -+U=/usr/include/aarch64-linux-gnu -+rm -rf $U/libavcodec -+rm -rf $U/libavdevice -+rm -rf $U/libavfilter -+rm -rf $U/libavformat -+rm -rf $U/libavutil -+rm -rf $U/libswresample -+rm -rf $U/libswscale - U=/usr/lib/arm-linux-gnueabihf - rm -f $U/libavcodec.* - rm -f $U/libavdevice.* - -From 4116d51b48e57cfbe9a7986d38aa6818cb65bfbb Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 20 Mar 2023 18:15:08 +0000 -Subject: [PATCH 118/186] vulkan: Add missing decode extension defines - -When building on bookworm the video decode extension names -were missing. This adds them. I expect this patch will be -obsolete shortly but it solves a current problem. ---- - libavutil/hwcontext_vulkan.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c -index ffd4f5dec4ec..d59f9409dd7c 100644 ---- a/libavutil/hwcontext_vulkan.c -+++ b/libavutil/hwcontext_vulkan.c -@@ -57,6 +57,14 @@ - #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x) - #endif - -+// Sometimes missing definitions -+#ifndef VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME -+#define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264" -+#endif -+#ifndef VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME -+#define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265" -+#endif -+ - typedef struct VulkanQueueCtx { - VkFence fence; - VkQueue queue; - -From cebdcff3a25a64706c9a863e543260923c823237 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 21 Mar 2023 14:20:05 +0000 -Subject: [PATCH 119/186] v4l2_m2m_dec: Fix config file for finding if decoder - enabled - -Fixes parsing of extradata for profile testing. 5.x changed where that -info is defined. ---- - libavcodec/v4l2_m2m_dec.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index e64bc707d3c6..91136f03da80 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -21,7 +21,7 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - --#include "config.h" -+#include "config_components.h" - - #include - #include - -From d091812c90f3fc6bca97e6efbb1fd30ebc112043 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 21 Mar 2023 14:23:20 +0000 -Subject: [PATCH 120/186] v4l2_m2m_dec: Display profile given if skipped in - debug - ---- - libavcodec/v4l2_m2m_dec.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 91136f03da80..d124c7b1fc43 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -792,7 +792,7 @@ check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s) - - // An unset profile is almost certainly zero or -99 - do not reject - if (avctx->profile <= 0) { -- av_log(avctx, AV_LOG_VERBOSE, "Profile <= 0 - check skipped\n"); -+ av_log(avctx, AV_LOG_VERBOSE, "Profile %d <= 0 - check skipped\n", avctx->profile); - return 0; - } - - -From cbc083d57efdadef70b0b218cc252b24ed501596 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 22 Mar 2023 16:08:08 +0000 -Subject: [PATCH 121/186] conf_native: Fix for 64-bit kernel with 32-bit - userspace - -(cherry picked from commit 5bb1e09cea95b4215c6904b9b1a726e83bc5d327) ---- - pi-util/conf_native.sh | 32 +++++++++++++++++++++----------- - 1 file changed, 21 insertions(+), 11 deletions(-) - -diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh -index 082d9b58320e..0a7d230f1b70 100755 ---- a/pi-util/conf_native.sh -+++ b/pi-util/conf_native.sh -@@ -33,18 +33,28 @@ RPI_LIBDIRS= - RPI_DEFINES= - RPI_EXTRALIBS= - --if [ "$MC" == "arm64" ]; then -- echo "M/C aarch64" -- A=aarch64-linux-gnu -- B=arm64 --elif [ "$MC" == "armhf" ]; then -- echo "M/C armv7" -- A=arm-linux-gnueabihf -- B=armv7 -- MCOPTS="--arch=armv6t2 --cpu=cortex-a7" -- RPI_DEFINES=-mfpu=neon-vfpv4 -+# uname -m gives kernel type which may not have the same -+# 32/64bitness as userspace :-( getconf shoudl provide the answer -+# but use uname to check we are on the right processor -+MC=`uname -m` -+LB=`getconf LONG_BIT` -+if [ "$MC" == "armv7l" ] || [ "$MC" == "aarch64" ]; then -+ if [ "$LB" == "32" ]; then -+ echo "M/C armv7" -+ A=arm-linux-gnueabihf -+ B=armv7 -+ MCOPTS="--arch=armv6t2 --cpu=cortex-a7" -+ RPI_DEFINES=-mfpu=neon-vfpv4 -+ elif [ "$LB" == "64" ]; then -+ echo "M/C aarch64" -+ A=aarch64-linux-gnu -+ B=arm64 -+ else -+ echo "Unknown LONG_BIT name: $LB" -+ exit 1 -+ fi - else -- echo Unexpected architecture $MC -+ echo "Unknown machine name: $MC" - exit 1 - fi - - -From 17d3d7a9750f8cb7954d0f396ea68eb408411c7e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 20 Apr 2023 11:48:25 +0000 -Subject: [PATCH 122/186] conf_native: Add install prefix variation - -(cherry picked from commit 73c3019b534cb8f4b4e4c21995653f6ce440086d) ---- - pi-util/BUILD.txt | 32 ++++++++++++++++++++------------ - pi-util/conf_native.sh | 14 ++++++++++++-- - 2 files changed, 32 insertions(+), 14 deletions(-) - -diff --git a/pi-util/BUILD.txt b/pi-util/BUILD.txt -index b050971f63c5..2b62d660c0d1 100644 ---- a/pi-util/BUILD.txt -+++ b/pi-util/BUILD.txt -@@ -24,6 +24,8 @@ There are a few choices here - paths being confused and therefore running the wrong code, Shared - is what is needed, in most cases, when building for use by other - programs. -+ --usr Set install dir to /usr (i.e. system default) rather than in -+ /install - - So for a static build - --------------------- -@@ -37,23 +39,29 @@ You can now run ffmpeg directly from where it was built - For a shared build - ------------------ - --$ pi-util/conf_native.sh -- --You will normally want an install target if shared. Note that the script has --set this up to be generated in out//install, you don't have to worry --about overwriting your system libs. -+There are two choices here - -+$ pi-util/conf_native.sh - $ make -j8 -C out/ install - --You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was --built or install the image on the system - you have to be careful to get rid --of all other ffmpeg libs or confusion may result. There is a little script --that wipes all other versions - obviously use with care! -+This sets the install prefix to /install and is probably what you -+want if you don't want to overwrite the system files. - --$ sudo pi-util/clean_usr_libs.sh -+You can now set LD_LIBRARY_PATH appropriately and run ffmpeg from where it was -+built. You can copy the contents of /install to /usr and that mostly -+works. The only downside is that paths in pkgconfig end up being set to the -+install directory in your build directory which may be less than ideal when -+building other packages. - --Then simply copying from the install to /usr works -+The alternative if you just want to replace the system libs is: - --$ sudo cp -r out//install/* /usr -+$ pi-util/conf_native.sh --usr -+$ make -j8 -C out/ -+$ sudo pi-util/clean_usr_libs.sh -+$ sudo make -j8 -C out/ install - -+The clean_usr_libs.sh step wipes any existing libs & includes (for all -+architectures) from the system which helps avoid confusion when running other -+progs as you can be sure you're not running old code which is unfortunately -+easy to do otherwise. - -diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh -index 0a7d230f1b70..f0ed1595948b 100755 ---- a/pi-util/conf_native.sh -+++ b/pi-util/conf_native.sh -@@ -9,6 +9,7 @@ RPI_KEEPS="" - - NOSHARED= - MMAL= -+USR_PREFIX= - - while [ "$1" != "" ] ; do - case $1 in -@@ -18,8 +19,14 @@ while [ "$1" != "" ] ; do - --mmal) - MMAL=1 - ;; -+ --usr) -+ USR_PREFIX=/usr -+ ;; - *) -- echo "Usage $0: [--noshared] [--mmal]" -+ echo "Usage $0: [--noshared] [--mmal] [--usr]" -+ echo " noshared Build static libs and executable - good for testing" -+ echo " mmal Build mmal decoders" -+ echo " usr Set install prefix to /usr [default=/install]" - exit 1 - ;; - esac -@@ -82,7 +89,9 @@ else - OUT=$BUILDBASE/$B-$C-$V-shared-rel - fi - --USR_PREFIX=$OUT/install -+if [ ! $USR_PREFIX ]; then -+ USR_PREFIX=$OUT/install -+fi - LIB_PREFIX=$USR_PREFIX/lib/$A - INC_PREFIX=$USR_PREFIX/include/$A - -@@ -113,6 +122,7 @@ $FFSRC/configure \ - --extra-libs="$RPI_EXTRALIBS"\ - --extra-version="rpi" - -+echo "Configured into $OUT" - - # gcc option for getting asm listing - # -Wa,-ahls - -From 5c43e72236ef247a0a68e5ca9417496ee2e179a5 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 19 Apr 2023 10:47:58 +0000 -Subject: [PATCH 123/186] swcale: Add explicit bgr24->yv12 conversion - -(cherry picked from commit 9a22d429f46a038321c66a0cd54737177641b434) ---- - libswscale/rgb2rgb.c | 5 +++++ - libswscale/rgb2rgb.h | 7 +++++++ - libswscale/rgb2rgb_template.c | 36 ++++++++++++++++++++++++++++++----- - libswscale/swscale_unscaled.c | 22 +++++++++++++++++++++ - 4 files changed, 65 insertions(+), 5 deletions(-) - -diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c -index e98fdac8ead6..84bb56e60e94 100644 ---- a/libswscale/rgb2rgb.c -+++ b/libswscale/rgb2rgb.c -@@ -83,6 +83,11 @@ void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, - int width, int height, - int lumStride, int chromStride, int srcStride, - int32_t *rgb2yuv); -+void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, -+ uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); - void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, - int srcStride, int dstStride); - void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, -diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h -index f3951d523ef7..0028ab345fc9 100644 ---- a/libswscale/rgb2rgb.h -+++ b/libswscale/rgb2rgb.h -@@ -79,6 +79,9 @@ void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size); - void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride, int32_t *rgb2yuv); -+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv); - - /** - * Height should be a multiple of 2 and width should be a multiple of 16. -@@ -128,6 +131,10 @@ extern void (*ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - int width, int height, - int lumStride, int chromStride, int srcStride, - int32_t *rgb2yuv); -+extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); - extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, - int srcStride, int dstStride); - -diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c -index 42c69801ba40..e2437826dd41 100644 ---- a/libswscale/rgb2rgb_template.c -+++ b/libswscale/rgb2rgb_template.c -@@ -646,13 +646,14 @@ static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, - * others are ignored in the C version. - * FIXME: Write HQ version. - */ --void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, int lumStride, -- int chromStride, int srcStride, int32_t *rgb2yuv) -+ int chromStride, int srcStride, int32_t *rgb2yuv, -+ const uint8_t x[9]) - { -- int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX]; -- int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX]; -- int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX]; -+ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; -+ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; -+ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; - int y; - const int chromWidth = width >> 1; - -@@ -707,6 +708,30 @@ void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - } - } - -+void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv) -+{ -+ static const uint8_t x[9] = { -+ RY_IDX, GY_IDX, BY_IDX, -+ RU_IDX, GU_IDX, BU_IDX, -+ RV_IDX, GV_IDX, BV_IDX, -+ }; -+ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x); -+} -+ -+void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv) -+{ -+ static const uint8_t x[9] = { -+ BY_IDX, GY_IDX, RY_IDX, -+ BU_IDX, GU_IDX, RU_IDX, -+ BV_IDX, GV_IDX, RV_IDX, -+ }; -+ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x); -+} -+ - static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, - uint8_t *dest, int width, int height, - int src1Stride, int src2Stride, int dstStride) -@@ -980,6 +1005,7 @@ static av_cold void rgb2rgb_init_c(void) - yuy2toyv12 = yuy2toyv12_c; - planar2x = planar2x_c; - ff_rgb24toyv12 = ff_rgb24toyv12_c; -+ ff_bgr24toyv12 = ff_bgr24toyv12_c; - interleaveBytes = interleaveBytes_c; - deinterleaveBytes = deinterleaveBytes_c; - vu9_to_vu12 = vu9_to_vu12_c; -diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c -index 9af2e7ecc30d..9047030ae426 100644 ---- a/libswscale/swscale_unscaled.c -+++ b/libswscale/swscale_unscaled.c -@@ -1654,6 +1654,23 @@ static int bgr24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], - return srcSliceH; - } - -+static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], -+ int srcStride[], int srcSliceY, int srcSliceH, -+ uint8_t *dst[], int dstStride[]) -+{ -+ ff_bgr24toyv12( -+ src[0], -+ dst[0] + srcSliceY * dstStride[0], -+ dst[1] + (srcSliceY >> 1) * dstStride[1], -+ dst[2] + (srcSliceY >> 1) * dstStride[2], -+ c->srcW, srcSliceH, -+ dstStride[0], dstStride[1], srcStride[0], -+ c->input_rgb2yuv_table); -+ if (dst[3]) -+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); -+ return srcSliceH; -+} -+ - static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -@@ -2037,6 +2054,11 @@ void ff_get_unscaled_swscale(SwsContext *c) - (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && - !(flags & SWS_ACCURATE_RND) && !(dstW&1)) - c->convert_unscaled = bgr24ToYv12Wrapper; -+ /* rgb24toYV12 */ -+ if (srcFormat == AV_PIX_FMT_RGB24 && -+ (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && -+ !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ c->convert_unscaled = rgb24ToYv12Wrapper; - - /* RGB/BGR -> RGB/BGR (no dither needed forms) */ - if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) - -From 9161e42a5f914181fa97d86f20498632e9827556 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 20 Apr 2023 11:26:10 +0000 -Subject: [PATCH 124/186] swscale: Add unscaled XRGB->YUV420P functions - -(cherry picked from commit 04cc32ee3f390de513ad8c6156c0c66b2c60abc8) ---- - libswscale/rgb2rgb.c | 20 ++++++ - libswscale/rgb2rgb.h | 16 +++++ - libswscale/rgb2rgb_template.c | 123 ++++++++++++++++++++++++++++++---- - libswscale/swscale_unscaled.c | 89 ++++++++++++++++++++++++ - 4 files changed, 236 insertions(+), 12 deletions(-) - -diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c -index 84bb56e60e94..c3b9079d2b3e 100644 ---- a/libswscale/rgb2rgb.c -+++ b/libswscale/rgb2rgb.c -@@ -88,6 +88,26 @@ void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, - int width, int height, - int lumStride, int chromStride, int srcStride, - int32_t *rgb2yuv); -+void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, -+ uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); -+void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, -+ uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); -+void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, -+ uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); -+void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, -+ uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); - void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, - int srcStride, int dstStride); - void (*interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, -diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h -index 0028ab345fc9..a0dd3ffb79ab 100644 ---- a/libswscale/rgb2rgb.h -+++ b/libswscale/rgb2rgb.h -@@ -135,6 +135,22 @@ extern void (*ff_bgr24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - int width, int height, - int lumStride, int chromStride, int srcStride, - int32_t *rgb2yuv); -+extern void (*ff_rgbxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); -+extern void (*ff_bgrxtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); -+extern void (*ff_xrgbtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); -+extern void (*ff_xbgrtoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, -+ int width, int height, -+ int lumStride, int chromStride, int srcStride, -+ int32_t *rgb2yuv); - extern void (*planar2x)(const uint8_t *src, uint8_t *dst, int width, int height, - int srcStride, int dstStride); - -diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c -index e2437826dd41..703de90690d3 100644 ---- a/libswscale/rgb2rgb_template.c -+++ b/libswscale/rgb2rgb_template.c -@@ -708,30 +708,125 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - } - } - -+static const uint8_t x_rgb[9] = { -+ RY_IDX, GY_IDX, BY_IDX, -+ RU_IDX, GU_IDX, BU_IDX, -+ RV_IDX, GV_IDX, BV_IDX, -+}; -+ -+static const uint8_t x_bgr[9] = { -+ BY_IDX, GY_IDX, RY_IDX, -+ BU_IDX, GU_IDX, RU_IDX, -+ BV_IDX, GV_IDX, RV_IDX, -+}; -+ - void ff_rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride, int32_t *rgb2yuv) - { -- static const uint8_t x[9] = { -- RY_IDX, GY_IDX, BY_IDX, -- RU_IDX, GU_IDX, BU_IDX, -- RV_IDX, GV_IDX, BV_IDX, -- }; -- rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x); -+ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); - } - - void ff_bgr24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride, int32_t *rgb2yuv) - { -- static const uint8_t x[9] = { -- BY_IDX, GY_IDX, RY_IDX, -- BU_IDX, GU_IDX, RU_IDX, -- BV_IDX, GV_IDX, RV_IDX, -- }; -- rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x); -+ rgb24toyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); - } - -+static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv, -+ const uint8_t x[9]) -+{ -+ int32_t ry = rgb2yuv[x[0]], gy = rgb2yuv[x[1]], by = rgb2yuv[x[2]]; -+ int32_t ru = rgb2yuv[x[3]], gu = rgb2yuv[x[4]], bu = rgb2yuv[x[5]]; -+ int32_t rv = rgb2yuv[x[6]], gv = rgb2yuv[x[7]], bv = rgb2yuv[x[8]]; -+ int y; -+ const int chromWidth = width >> 1; -+ -+ for (y = 0; y < height; y += 2) { -+ int i; -+ for (i = 0; i < chromWidth; i++) { -+ unsigned int b = src[8 * i + 2]; -+ unsigned int g = src[8 * i + 1]; -+ unsigned int r = src[8 * i + 0]; -+ -+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; -+ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; -+ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; -+ -+ udst[i] = U; -+ vdst[i] = V; -+ ydst[2 * i] = Y; -+ -+ b = src[8 * i + 6]; -+ g = src[8 * i + 5]; -+ r = src[8 * i + 4]; -+ -+ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; -+ ydst[2 * i + 1] = Y; -+ } -+ ydst += lumStride; -+ src += srcStride; -+ -+ if (y+1 == height) -+ break; -+ -+ for (i = 0; i < chromWidth; i++) { -+ unsigned int b = src[8 * i + 2]; -+ unsigned int g = src[8 * i + 1]; -+ unsigned int r = src[8 * i + 0]; -+ -+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; -+ -+ ydst[2 * i] = Y; -+ -+ b = src[8 * i + 6]; -+ g = src[8 * i + 5]; -+ r = src[8 * i + 4]; -+ -+ Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; -+ ydst[2 * i + 1] = Y; -+ } -+ udst += chromStride; -+ vdst += chromStride; -+ ydst += lumStride; -+ src += srcStride; -+ } -+} -+ -+static void ff_rgbxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv) -+{ -+ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); -+} -+ -+static void ff_bgrxtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv) -+{ -+ rgbxtoyv12_x(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); -+} -+ -+// As the general code does no SIMD-like ops simply adding 1 to the src address -+// will fix the ignored alpha position -+static void ff_xrgbtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv) -+{ -+ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_rgb); -+} -+ -+static void ff_xbgrtoyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv) -+{ -+ rgbxtoyv12_x(src + 1, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv, x_bgr); -+} -+ -+ - static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, - uint8_t *dest, int width, int height, - int src1Stride, int src2Stride, int dstStride) -@@ -1006,6 +1101,10 @@ static av_cold void rgb2rgb_init_c(void) - planar2x = planar2x_c; - ff_rgb24toyv12 = ff_rgb24toyv12_c; - ff_bgr24toyv12 = ff_bgr24toyv12_c; -+ ff_rgbxtoyv12 = ff_rgbxtoyv12_c; -+ ff_bgrxtoyv12 = ff_bgrxtoyv12_c; -+ ff_xrgbtoyv12 = ff_xrgbtoyv12_c; -+ ff_xbgrtoyv12 = ff_xbgrtoyv12_c; - interleaveBytes = interleaveBytes_c; - deinterleaveBytes = deinterleaveBytes_c; - vu9_to_vu12 = vu9_to_vu12_c; -diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c -index 9047030ae426..053c06adf5d1 100644 ---- a/libswscale/swscale_unscaled.c -+++ b/libswscale/swscale_unscaled.c -@@ -1671,6 +1671,74 @@ static int rgb24ToYv12Wrapper(SwsContext *c, const uint8_t *src[], - return srcSliceH; - } - -+static int bgrxToYv12Wrapper(SwsContext *c, const uint8_t *src[], -+ int srcStride[], int srcSliceY, int srcSliceH, -+ uint8_t *dst[], int dstStride[]) -+{ -+ ff_bgrxtoyv12( -+ src[0], -+ dst[0] + srcSliceY * dstStride[0], -+ dst[1] + (srcSliceY >> 1) * dstStride[1], -+ dst[2] + (srcSliceY >> 1) * dstStride[2], -+ c->srcW, srcSliceH, -+ dstStride[0], dstStride[1], srcStride[0], -+ c->input_rgb2yuv_table); -+ if (dst[3]) -+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); -+ return srcSliceH; -+} -+ -+static int rgbxToYv12Wrapper(SwsContext *c, const uint8_t *src[], -+ int srcStride[], int srcSliceY, int srcSliceH, -+ uint8_t *dst[], int dstStride[]) -+{ -+ ff_rgbxtoyv12( -+ src[0], -+ dst[0] + srcSliceY * dstStride[0], -+ dst[1] + (srcSliceY >> 1) * dstStride[1], -+ dst[2] + (srcSliceY >> 1) * dstStride[2], -+ c->srcW, srcSliceH, -+ dstStride[0], dstStride[1], srcStride[0], -+ c->input_rgb2yuv_table); -+ if (dst[3]) -+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); -+ return srcSliceH; -+} -+ -+static int xbgrToYv12Wrapper(SwsContext *c, const uint8_t *src[], -+ int srcStride[], int srcSliceY, int srcSliceH, -+ uint8_t *dst[], int dstStride[]) -+{ -+ ff_xbgrtoyv12( -+ src[0], -+ dst[0] + srcSliceY * dstStride[0], -+ dst[1] + (srcSliceY >> 1) * dstStride[1], -+ dst[2] + (srcSliceY >> 1) * dstStride[2], -+ c->srcW, srcSliceH, -+ dstStride[0], dstStride[1], srcStride[0], -+ c->input_rgb2yuv_table); -+ if (dst[3]) -+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); -+ return srcSliceH; -+} -+ -+static int xrgbToYv12Wrapper(SwsContext *c, const uint8_t *src[], -+ int srcStride[], int srcSliceY, int srcSliceH, -+ uint8_t *dst[], int dstStride[]) -+{ -+ ff_xrgbtoyv12( -+ src[0], -+ dst[0] + srcSliceY * dstStride[0], -+ dst[1] + (srcSliceY >> 1) * dstStride[1], -+ dst[2] + (srcSliceY >> 1) * dstStride[2], -+ c->srcW, srcSliceH, -+ dstStride[0], dstStride[1], srcStride[0], -+ c->input_rgb2yuv_table); -+ if (dst[3]) -+ fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255); -+ return srcSliceH; -+} -+ - static int yvu9ToYv12Wrapper(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -@@ -2060,6 +2128,27 @@ void ff_get_unscaled_swscale(SwsContext *c) - !(flags & SWS_ACCURATE_RND) && !(dstW&1)) - c->convert_unscaled = rgb24ToYv12Wrapper; - -+ /* bgrxtoYV12 */ -+ if (((srcFormat == AV_PIX_FMT_BGRA && dstFormat == AV_PIX_FMT_YUV420P) || -+ (srcFormat == AV_PIX_FMT_BGR0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && -+ !(flags & SWS_ACCURATE_RND)) -+ c->convert_unscaled = bgrxToYv12Wrapper; -+ /* rgbx24toYV12 */ -+ if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) || -+ (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && -+ !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ c->convert_unscaled = rgbxToYv12Wrapper; -+ /* xbgrtoYV12 */ -+ if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) || -+ (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && -+ !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ c->convert_unscaled = xbgrToYv12Wrapper; -+ /* xrgb24toYV12 */ -+ if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) || -+ (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && -+ !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ c->convert_unscaled = xrgbToYv12Wrapper; -+ - /* RGB/BGR -> RGB/BGR (no dither needed forms) */ - if (isAnyRGB(srcFormat) && isAnyRGB(dstFormat) && findRgbConvFn(c) - && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) - -From 2452146f774e85a1f0fe1c8c2551811cd98adc87 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 20 Apr 2023 11:35:44 +0000 -Subject: [PATCH 125/186] swscale: Add aarch64 unscaled RGB24->YUV420P - -(cherry picked from commit 0cf416312095ce5bea3d2f7e9b14736d4b3ed160) ---- - libswscale/aarch64/rgb2rgb.c | 40 +++++++ - libswscale/aarch64/rgb2rgb_neon.S | 181 ++++++++++++++++++++++++++++++ - 2 files changed, 221 insertions(+) - -diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c -index a9bf6ff9e0a8..6d3e0000dc9a 100644 ---- a/libswscale/aarch64/rgb2rgb.c -+++ b/libswscale/aarch64/rgb2rgb.c -@@ -30,6 +30,44 @@ - void ff_interleave_bytes_neon(const uint8_t *src1, const uint8_t *src2, - uint8_t *dest, int width, int height, - int src1Stride, int src2Stride, int dstStride); -+void ff_bgr24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv); -+void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv); -+ -+// RGB to YUV asm fns process 16 pixels at once so ensure that the output -+// will fit into the stride. ARM64 should cope with unaligned SIMD r/w so -+// don't test for that -+// Fall back to C if we cannot use asm -+ -+static inline int chkw(const int width, const int lumStride, const int chromStride) -+{ -+ const int aw = FFALIGN(width, 16); -+ return aw <= FFABS(lumStride) && aw <= FFABS(chromStride) * 2; -+} -+ -+static void rgb24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *rgb2yuv) -+{ -+ if (chkw(width, lumStride, chromStride)) -+ ff_rgb24toyv12_aarch64(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv); -+ else -+ ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv); -+} -+ -+static void bgr24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -+ uint8_t *vdst, int width, int height, int lumStride, -+ int chromStride, int srcStride, int32_t *bgr2yuv) -+{ -+ if (chkw(width, lumStride, chromStride)) -+ ff_bgr24toyv12_aarch64(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, bgr2yuv); -+ else -+ ff_bgr24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, bgr2yuv); -+} -+ - - av_cold void rgb2rgb_init_aarch64(void) - { -@@ -37,5 +75,7 @@ av_cold void rgb2rgb_init_aarch64(void) - - if (have_neon(cpu_flags)) { - interleaveBytes = ff_interleave_bytes_neon; -+ ff_rgb24toyv12 = rgb24toyv12_check; -+ ff_bgr24toyv12 = bgr24toyv12_check; - } - } -diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S -index d81110ec5714..8cf40b65f520 100644 ---- a/libswscale/aarch64/rgb2rgb_neon.S -+++ b/libswscale/aarch64/rgb2rgb_neon.S -@@ -77,3 +77,184 @@ function ff_interleave_bytes_neon, export=1 - 0: - ret - endfunc -+ -+// void ff_rgb24toyv12_aarch64( -+// const uint8_t *src, // x0 -+// uint8_t *ydst, // x1 -+// uint8_t *udst, // x2 -+// uint8_t *vdst, // x3 -+// int width, // w4 -+// int height, // w5 -+// int lumStride, // w6 -+// int chromStride, // w7 -+// int srcStr, // [sp, #0] -+// int32_t *rgb2yuv); // [sp, #8] -+ -+function ff_rgb24toyv12_aarch64, export=1 -+ ldr x15, [sp, #8] -+ ld1 {v3.s}[2], [x15], #4 -+ ld1 {v3.s}[1], [x15], #4 -+ ld1 {v3.s}[0], [x15], #4 -+ ld1 {v4.s}[2], [x15], #4 -+ ld1 {v4.s}[1], [x15], #4 -+ ld1 {v4.s}[0], [x15], #4 -+ ld1 {v5.s}[2], [x15], #4 -+ ld1 {v5.s}[1], [x15], #4 -+ ld1 {v5.s}[0], [x15] -+ b 99f -+endfunc -+ -+// void ff_bgr24toyv12_aarch64( -+// const uint8_t *src, // x0 -+// uint8_t *ydst, // x1 -+// uint8_t *udst, // x2 -+// uint8_t *vdst, // x3 -+// int width, // w4 -+// int height, // w5 -+// int lumStride, // w6 -+// int chromStride, // w7 -+// int srcStr, // [sp, #0] -+// int32_t *rgb2yuv); // [sp, #8] -+ -+function ff_bgr24toyv12_aarch64, export=1 -+ ldr x15, [sp, #8] -+ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 -+ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 -+ ld3 {v3.s, v4.s, v5.s}[2], [x15] -+99: -+ ldr w14, [sp, #0] -+ movi v18.8b, #128 -+ uxtl v17.8h, v18.8b -+ -+ // Even line - YUV -+1: -+ mov x10, x0 -+ mov x11, x1 -+ mov x12, x2 -+ mov x13, x3 -+ mov w9, w4 -+ -+0: -+ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 -+ -+ uxtl2 v20.8h, v0.16b -+ uxtl2 v21.8h, v1.16b -+ uxtl2 v22.8h, v2.16b -+ -+ uxtl v0.8h, v0.8b -+ uxtl v1.8h, v1.8b -+ uxtl v2.8h, v2.8b -+ // Y0 -+ smull v6.4s, v0.4h, v3.h[0] -+ smull2 v7.4s, v0.8h, v3.h[0] -+ smlal v6.4s, v1.4h, v4.h[0] -+ smlal2 v7.4s, v1.8h, v4.h[0] -+ smlal v6.4s, v2.4h, v5.h[0] -+ smlal2 v7.4s, v2.8h, v5.h[0] -+ shrn v6.4h, v6.4s, #12 -+ shrn2 v6.8h, v7.4s, #12 -+ add v6.8h, v6.8h, v17.8h // +128 (>> 3 = 16) -+ uqrshrn v16.8b, v6.8h, #3 -+ // Y1 -+ smull v6.4s, v20.4h, v3.h[0] -+ smull2 v7.4s, v20.8h, v3.h[0] -+ smlal v6.4s, v21.4h, v4.h[0] -+ smlal2 v7.4s, v21.8h, v4.h[0] -+ smlal v6.4s, v22.4h, v5.h[0] -+ smlal2 v7.4s, v22.8h, v5.h[0] -+ shrn v6.4h, v6.4s, #12 -+ shrn2 v6.8h, v7.4s, #12 -+ add v6.8h, v6.8h, v17.8h -+ uqrshrn2 v16.16b, v6.8h, #3 -+ // Y0/Y1 -+ st1 {v16.16b}, [x11], #16 -+ -+ uzp1 v0.8h, v0.8h, v20.8h -+ uzp1 v1.8h, v1.8h, v21.8h -+ uzp1 v2.8h, v2.8h, v22.8h -+ -+ // U -+ // Vector subscript *2 as we loaded into S but are only using H -+ smull v6.4s, v0.4h, v3.h[2] -+ smull2 v7.4s, v0.8h, v3.h[2] -+ smlal v6.4s, v1.4h, v4.h[2] -+ smlal2 v7.4s, v1.8h, v4.h[2] -+ smlal v6.4s, v2.4h, v5.h[2] -+ smlal2 v7.4s, v2.8h, v5.h[2] -+ shrn v6.4h, v6.4s, #14 -+ shrn2 v6.8h, v7.4s, #14 -+ sqrshrn v6.8b, v6.8h, #1 -+ add v6.8b, v6.8b, v18.8b // +128 -+ st1 {v6.8b}, [x12], #8 -+ -+ // V -+ smull v6.4s, v0.4h, v3.h[4] -+ smull2 v7.4s, v0.8h, v3.h[4] -+ smlal v6.4s, v1.4h, v4.h[4] -+ smlal2 v7.4s, v1.8h, v4.h[4] -+ smlal v6.4s, v2.4h, v5.h[4] -+ smlal2 v7.4s, v2.8h, v5.h[4] -+ shrn v6.4h, v6.4s, #14 -+ shrn2 v6.8h, v7.4s, #14 -+ sqrshrn v6.8b, v6.8h, #1 -+ add v6.8b, v6.8b, v18.8b // +128 -+ st1 {v6.8b}, [x13], #8 -+ -+ subs w9, w9, #16 -+ b.gt 0b -+ -+ // Odd line - Y only -+ -+ add x0, x0, w14, SXTX -+ add x1, x1, w6, SXTX -+ mov x10, x0 -+ mov x11, x1 -+ mov w9, w4 -+ -+0: -+ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 -+ -+ uxtl2 v20.8h, v0.16b -+ uxtl2 v21.8h, v1.16b -+ uxtl2 v22.8h, v2.16b -+ -+ uxtl v0.8h, v0.8b -+ uxtl v1.8h, v1.8b -+ uxtl v2.8h, v2.8b -+ // Y0 -+ smull v6.4s, v0.4h, v3.h[0] -+ smull2 v7.4s, v0.8h, v3.h[0] -+ smlal v6.4s, v1.4h, v4.h[0] -+ smlal2 v7.4s, v1.8h, v4.h[0] -+ smlal v6.4s, v2.4h, v5.h[0] -+ smlal2 v7.4s, v2.8h, v5.h[0] -+ shrn v6.4h, v6.4s, #12 -+ shrn2 v6.8h, v7.4s, #12 -+ add v6.8h, v6.8h, v17.8h -+ uqrshrn v16.8b, v6.8h, #3 -+ // Y1 -+ smull v6.4s, v20.4h, v3.h[0] -+ smull2 v7.4s, v20.8h, v3.h[0] -+ smlal v6.4s, v21.4h, v4.h[0] -+ smlal2 v7.4s, v21.8h, v4.h[0] -+ smlal v6.4s, v22.4h, v5.h[0] -+ smlal2 v7.4s, v22.8h, v5.h[0] -+ shrn v6.4h, v6.4s, #12 -+ shrn2 v6.8h, v7.4s, #12 -+ add v6.8h, v6.8h, v17.8h -+ uqrshrn2 v16.16b, v6.8h, #3 -+ // Y0/Y1 -+ st1 {v16.16b}, [x11], #16 -+ -+ subs w9, w9, #16 -+ b.gt 0b -+ -+ add x0, x0, w14, SXTX -+ add x1, x1, w6, SXTX -+ add x2, x2, w7, SXTX -+ add x3, x3, w7, SXTX -+ subs w5, w5, #2 -+ b.gt 1b -+ -+ ret -+endfunc - -From 95900ef928a5254db60ce7182f4903ad6d27a181 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 27 Apr 2023 13:03:52 +0000 -Subject: [PATCH 126/186] rgb2rgb: Fix rgb24->yuv420p with arbitrary wxh - -(cherry picked from commit 58771fdf0218dc670d8a343824f540e2f6e8785d) ---- - libswscale/aarch64/rgb2rgb.c | 5 +- - libswscale/aarch64/rgb2rgb_neon.S | 440 ++++++++++++++++++++++++------ - 2 files changed, 355 insertions(+), 90 deletions(-) - -diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c -index 6d3e0000dc9a..f10c4ef2ded9 100644 ---- a/libswscale/aarch64/rgb2rgb.c -+++ b/libswscale/aarch64/rgb2rgb.c -@@ -44,8 +44,9 @@ void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - - static inline int chkw(const int width, const int lumStride, const int chromStride) - { -- const int aw = FFALIGN(width, 16); -- return aw <= FFABS(lumStride) && aw <= FFABS(chromStride) * 2; -+// const int aw = FFALIGN(width, 16); -+// return aw <= FFABS(lumStride) && aw <= FFABS(chromStride) * 2; -+ return 1; - } - - static void rgb24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S -index 8cf40b65f520..978ab443ea52 100644 ---- a/libswscale/aarch64/rgb2rgb_neon.S -+++ b/libswscale/aarch64/rgb2rgb_neon.S -@@ -116,6 +116,25 @@ endfunc - // int srcStr, // [sp, #0] - // int32_t *rgb2yuv); // [sp, #8] - -+// regs -+// v0-2 Src bytes - reused as chroma src -+// v3-5 Coeffs (packed very inefficiently - could be squashed) -+// v6 128b -+// v7 128h -+// v8-15 Reserved -+// v16-18 Lo Src expanded as H -+// v19 - -+// v20-22 Hi Src expanded as H -+// v23 - -+// v24 U out -+// v25 U tmp -+// v26 Y out -+// v27-29 Y tmp -+// v30 V out -+// v31 V tmp -+ -+// Assumes Little Endian in tail stores & conversion matrix -+ - function ff_bgr24toyv12_aarch64, export=1 - ldr x15, [sp, #8] - ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 -@@ -123,138 +142,383 @@ function ff_bgr24toyv12_aarch64, export=1 - ld3 {v3.s, v4.s, v5.s}[2], [x15] - 99: - ldr w14, [sp, #0] -- movi v18.8b, #128 -- uxtl v17.8h, v18.8b -- -- // Even line - YUV -+ movi v7.8b, #128 -+ uxtl v6.8h, v7.8b -+ // Ensure if nothing to do then we do nothing -+ cmp w4, #0 -+ b.le 90f -+ cmp w5, #0 -+ b.le 90f -+ // If w % 16 != 0 then -16 so we do main loop 1 fewer times with -+ // the remainder done in the tail -+ tst w4, #15 -+ b.eq 1f -+ sub w4, w4, #16 - 1: -+ -+// -------------------- Even line body - YUV -+11: -+ subs w9, w4, #0 - mov x10, x0 - mov x11, x1 - mov x12, x2 - mov x13, x3 -- mov w9, w4 -+ b.lt 12f - --0: - ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 -+ subs w9, w9, #16 -+ b.le 13f -+ -+10: -+ uxtl v16.8h, v0.8b -+ uxtl v17.8h, v1.8b -+ uxtl v18.8h, v2.8b - - uxtl2 v20.8h, v0.16b - uxtl2 v21.8h, v1.16b - uxtl2 v22.8h, v2.16b - -- uxtl v0.8h, v0.8b -- uxtl v1.8h, v1.8b -- uxtl v2.8h, v2.8b -+ bic v0.8h, #0xff, LSL #8 -+ bic v1.8h, #0xff, LSL #8 -+ bic v2.8h, #0xff, LSL #8 -+ -+ // Testing shows it is faster to stack the smull/smlal ops together -+ // rather than interleave them between channels and indeed even the -+ // shift/add sections seem happier not interleaved -+ - // Y0 -- smull v6.4s, v0.4h, v3.h[0] -- smull2 v7.4s, v0.8h, v3.h[0] -- smlal v6.4s, v1.4h, v4.h[0] -- smlal2 v7.4s, v1.8h, v4.h[0] -- smlal v6.4s, v2.4h, v5.h[0] -- smlal2 v7.4s, v2.8h, v5.h[0] -- shrn v6.4h, v6.4s, #12 -- shrn2 v6.8h, v7.4s, #12 -- add v6.8h, v6.8h, v17.8h // +128 (>> 3 = 16) -- uqrshrn v16.8b, v6.8h, #3 -+ smull v26.4s, v16.4h, v3.h[0] -+ smlal v26.4s, v17.4h, v4.h[0] -+ smlal v26.4s, v18.4h, v5.h[0] -+ smull2 v27.4s, v16.8h, v3.h[0] -+ smlal2 v27.4s, v17.8h, v4.h[0] -+ smlal2 v27.4s, v18.8h, v5.h[0] - // Y1 -- smull v6.4s, v20.4h, v3.h[0] -- smull2 v7.4s, v20.8h, v3.h[0] -- smlal v6.4s, v21.4h, v4.h[0] -- smlal2 v7.4s, v21.8h, v4.h[0] -- smlal v6.4s, v22.4h, v5.h[0] -- smlal2 v7.4s, v22.8h, v5.h[0] -- shrn v6.4h, v6.4s, #12 -- shrn2 v6.8h, v7.4s, #12 -- add v6.8h, v6.8h, v17.8h -- uqrshrn2 v16.16b, v6.8h, #3 -+ smull v28.4s, v20.4h, v3.h[0] -+ smlal v28.4s, v21.4h, v4.h[0] -+ smlal v28.4s, v22.4h, v5.h[0] -+ smull2 v29.4s, v20.8h, v3.h[0] -+ smlal2 v29.4s, v21.8h, v4.h[0] -+ smlal2 v29.4s, v22.8h, v5.h[0] -+ shrn v26.4h, v26.4s, #12 -+ shrn2 v26.8h, v27.4s, #12 -+ add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -+ uqrshrn v26.8b, v26.8h, #3 -+ shrn v28.4h, v28.4s, #12 -+ shrn2 v28.8h, v29.4s, #12 -+ add v28.8h, v28.8h, v6.8h -+ uqrshrn2 v26.16b, v28.8h, #3 - // Y0/Y1 -- st1 {v16.16b}, [x11], #16 -- -- uzp1 v0.8h, v0.8h, v20.8h -- uzp1 v1.8h, v1.8h, v21.8h -- uzp1 v2.8h, v2.8h, v22.8h - - // U - // Vector subscript *2 as we loaded into S but are only using H -- smull v6.4s, v0.4h, v3.h[2] -- smull2 v7.4s, v0.8h, v3.h[2] -- smlal v6.4s, v1.4h, v4.h[2] -- smlal2 v7.4s, v1.8h, v4.h[2] -- smlal v6.4s, v2.4h, v5.h[2] -- smlal2 v7.4s, v2.8h, v5.h[2] -- shrn v6.4h, v6.4s, #14 -- shrn2 v6.8h, v7.4s, #14 -- sqrshrn v6.8b, v6.8h, #1 -- add v6.8b, v6.8b, v18.8b // +128 -- st1 {v6.8b}, [x12], #8 -+ smull v24.4s, v0.4h, v3.h[2] -+ smlal v24.4s, v1.4h, v4.h[2] -+ smlal v24.4s, v2.4h, v5.h[2] -+ smull2 v25.4s, v0.8h, v3.h[2] -+ smlal2 v25.4s, v1.8h, v4.h[2] -+ smlal2 v25.4s, v2.8h, v5.h[2] - - // V -- smull v6.4s, v0.4h, v3.h[4] -- smull2 v7.4s, v0.8h, v3.h[4] -- smlal v6.4s, v1.4h, v4.h[4] -- smlal2 v7.4s, v1.8h, v4.h[4] -- smlal v6.4s, v2.4h, v5.h[4] -- smlal2 v7.4s, v2.8h, v5.h[4] -- shrn v6.4h, v6.4s, #14 -- shrn2 v6.8h, v7.4s, #14 -- sqrshrn v6.8b, v6.8h, #1 -- add v6.8b, v6.8b, v18.8b // +128 -- st1 {v6.8b}, [x13], #8 -+ smull v30.4s, v0.4h, v3.h[4] -+ smlal v30.4s, v1.4h, v4.h[4] -+ smlal v30.4s, v2.4h, v5.h[4] -+ smull2 v31.4s, v0.8h, v3.h[4] -+ smlal2 v31.4s, v1.8h, v4.h[4] -+ smlal2 v31.4s, v2.8h, v5.h[4] -+ -+ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 -+ -+ shrn v24.4h, v24.4s, #14 -+ shrn2 v24.8h, v25.4s, #14 -+ sqrshrn v24.8b, v24.8h, #1 -+ add v24.8b, v24.8b, v7.8b // +128 -+ shrn v30.4h, v30.4s, #14 -+ shrn2 v30.8h, v31.4s, #14 -+ sqrshrn v30.8b, v30.8h, #1 -+ add v30.8b, v30.8b, v7.8b // +128 - - subs w9, w9, #16 -- b.gt 0b - -- // Odd line - Y only -+ st1 {v26.16b}, [x11], #16 -+ st1 {v24.8b}, [x12], #8 -+ st1 {v30.8b}, [x13], #8 -+ -+ b.gt 10b -+ -+// -------------------- Even line tail - YUV -+// If width % 16 == 0 then simply runs once with preloaded RGB -+// If other then deals with preload & then does remaining tail -+ -+13: -+ // Body is simple copy of main loop body minus preload -+ -+ uxtl v16.8h, v0.8b -+ uxtl v17.8h, v1.8b -+ uxtl v18.8h, v2.8b -+ -+ uxtl2 v20.8h, v0.16b -+ uxtl2 v21.8h, v1.16b -+ uxtl2 v22.8h, v2.16b -+ -+ bic v0.8h, #0xff, LSL #8 -+ bic v1.8h, #0xff, LSL #8 -+ bic v2.8h, #0xff, LSL #8 -+ -+ // Y0 -+ smull v26.4s, v16.4h, v3.h[0] -+ smlal v26.4s, v17.4h, v4.h[0] -+ smlal v26.4s, v18.4h, v5.h[0] -+ smull2 v27.4s, v16.8h, v3.h[0] -+ smlal2 v27.4s, v17.8h, v4.h[0] -+ smlal2 v27.4s, v18.8h, v5.h[0] -+ // Y1 -+ smull v28.4s, v20.4h, v3.h[0] -+ smlal v28.4s, v21.4h, v4.h[0] -+ smlal v28.4s, v22.4h, v5.h[0] -+ smull2 v29.4s, v20.8h, v3.h[0] -+ smlal2 v29.4s, v21.8h, v4.h[0] -+ smlal2 v29.4s, v22.8h, v5.h[0] -+ shrn v26.4h, v26.4s, #12 -+ shrn2 v26.8h, v27.4s, #12 -+ add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -+ uqrshrn v26.8b, v26.8h, #3 -+ shrn v28.4h, v28.4s, #12 -+ shrn2 v28.8h, v29.4s, #12 -+ add v28.8h, v28.8h, v6.8h -+ uqrshrn2 v26.16b, v28.8h, #3 -+ // Y0/Y1 -+ -+ // U -+ // Vector subscript *2 as we loaded into S but are only using H -+ smull v24.4s, v0.4h, v3.h[2] -+ smlal v24.4s, v1.4h, v4.h[2] -+ smlal v24.4s, v2.4h, v5.h[2] -+ smull2 v25.4s, v0.8h, v3.h[2] -+ smlal2 v25.4s, v1.8h, v4.h[2] -+ smlal2 v25.4s, v2.8h, v5.h[2] - -+ // V -+ smull v30.4s, v0.4h, v3.h[4] -+ smlal v30.4s, v1.4h, v4.h[4] -+ smlal v30.4s, v2.4h, v5.h[4] -+ smull2 v31.4s, v0.8h, v3.h[4] -+ smlal2 v31.4s, v1.8h, v4.h[4] -+ smlal2 v31.4s, v2.8h, v5.h[4] -+ -+ cmp w9, #-16 -+ -+ shrn v24.4h, v24.4s, #14 -+ shrn2 v24.8h, v25.4s, #14 -+ sqrshrn v24.8b, v24.8h, #1 -+ add v24.8b, v24.8b, v7.8b // +128 -+ shrn v30.4h, v30.4s, #14 -+ shrn2 v30.8h, v31.4s, #14 -+ sqrshrn v30.8b, v30.8h, #1 -+ add v30.8b, v30.8b, v7.8b // +128 -+ -+ // Here: -+ // w9 == 0 width % 16 == 0, tail done -+ // w9 > -16 1st tail done (16 pels), remainder still to go -+ // w9 == -16 shouldn't happen -+ // w9 > -32 2nd tail done -+ // w9 <= -32 shouldn't happen -+ -+ b.lt 2f -+ st1 {v26.16b}, [x11], #16 -+ st1 {v24.8b}, [x12], #8 -+ st1 {v30.8b}, [x13], #8 -+ cbz w9, 3f -+ -+12: -+ sub w9, w9, #16 -+ -+ tbz w9, #3, 1f -+ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 -+1: tbz w9, #2, 1f -+ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 -+ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 -+ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 -+ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 -+1: tbz w9, #1, 1f -+ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 -+ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 -+1: tbz w9, #0, 13b -+ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 -+ b 13b -+ -+2: -+ tbz w9, #3, 1f -+ st1 {v26.8b}, [x11], #8 -+ st1 {v24.s}[0], [x12], #4 -+ st1 {v30.s}[0], [x13], #4 -+1: tbz w9, #2, 1f -+ st1 {v26.s}[2], [x11], #4 -+ st1 {v24.h}[2], [x12], #2 -+ st1 {v30.h}[2], [x13], #2 -+1: tbz w9, #1, 1f -+ st1 {v26.h}[6], [x11], #2 -+ st1 {v24.b}[6], [x12], #1 -+ st1 {v30.b}[6], [x13], #1 -+1: tbz w9, #0, 1f -+ st1 {v26.b}[14], [x11] -+ st1 {v24.b}[7], [x12] -+ st1 {v30.b}[7], [x13] -+1: -+3: -+ -+// -------------------- Odd line body - Y only -+ -+ subs w5, w5, #1 -+ b.eq 90f -+ -+ subs w9, w4, #0 - add x0, x0, w14, SXTX - add x1, x1, w6, SXTX - mov x10, x0 - mov x11, x1 -- mov w9, w4 -+ b.lt 12f - --0: - ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 -+ subs w9, w9, #16 -+ b.le 13f -+ -+10: -+ uxtl v16.8h, v0.8b -+ uxtl v17.8h, v1.8b -+ uxtl v18.8h, v2.8b - - uxtl2 v20.8h, v0.16b - uxtl2 v21.8h, v1.16b - uxtl2 v22.8h, v2.16b - -- uxtl v0.8h, v0.8b -- uxtl v1.8h, v1.8b -- uxtl v2.8h, v2.8b -+ // Testing shows it is faster to stack the smull/smlal ops together -+ // rather than interleave them between channels and indeed even the -+ // shift/add sections seem happier not interleaved -+ - // Y0 -- smull v6.4s, v0.4h, v3.h[0] -- smull2 v7.4s, v0.8h, v3.h[0] -- smlal v6.4s, v1.4h, v4.h[0] -- smlal2 v7.4s, v1.8h, v4.h[0] -- smlal v6.4s, v2.4h, v5.h[0] -- smlal2 v7.4s, v2.8h, v5.h[0] -- shrn v6.4h, v6.4s, #12 -- shrn2 v6.8h, v7.4s, #12 -- add v6.8h, v6.8h, v17.8h -- uqrshrn v16.8b, v6.8h, #3 -+ smull v26.4s, v16.4h, v3.h[0] -+ smlal v26.4s, v17.4h, v4.h[0] -+ smlal v26.4s, v18.4h, v5.h[0] -+ smull2 v27.4s, v16.8h, v3.h[0] -+ smlal2 v27.4s, v17.8h, v4.h[0] -+ smlal2 v27.4s, v18.8h, v5.h[0] - // Y1 -- smull v6.4s, v20.4h, v3.h[0] -- smull2 v7.4s, v20.8h, v3.h[0] -- smlal v6.4s, v21.4h, v4.h[0] -- smlal2 v7.4s, v21.8h, v4.h[0] -- smlal v6.4s, v22.4h, v5.h[0] -- smlal2 v7.4s, v22.8h, v5.h[0] -- shrn v6.4h, v6.4s, #12 -- shrn2 v6.8h, v7.4s, #12 -- add v6.8h, v6.8h, v17.8h -- uqrshrn2 v16.16b, v6.8h, #3 -+ smull v28.4s, v20.4h, v3.h[0] -+ smlal v28.4s, v21.4h, v4.h[0] -+ smlal v28.4s, v22.4h, v5.h[0] -+ smull2 v29.4s, v20.8h, v3.h[0] -+ smlal2 v29.4s, v21.8h, v4.h[0] -+ smlal2 v29.4s, v22.8h, v5.h[0] -+ -+ ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 -+ -+ shrn v26.4h, v26.4s, #12 -+ shrn2 v26.8h, v27.4s, #12 -+ add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -+ uqrshrn v26.8b, v26.8h, #3 -+ shrn v28.4h, v28.4s, #12 -+ shrn2 v28.8h, v29.4s, #12 -+ add v28.8h, v28.8h, v6.8h -+ uqrshrn2 v26.16b, v28.8h, #3 - // Y0/Y1 -- st1 {v16.16b}, [x11], #16 - - subs w9, w9, #16 -- b.gt 0b -+ -+ st1 {v26.16b}, [x11], #16 -+ -+ b.gt 10b -+ -+// -------------------- Odd line tail - Y -+// If width % 16 == 0 then simply runs once with preloaded RGB -+// If other then deals with preload & then does remaining tail -+ -+13: -+ // Body is simple copy of main loop body minus preload -+ -+ uxtl v16.8h, v0.8b -+ uxtl v17.8h, v1.8b -+ uxtl v18.8h, v2.8b -+ -+ uxtl2 v20.8h, v0.16b -+ uxtl2 v21.8h, v1.16b -+ uxtl2 v22.8h, v2.16b -+ -+ // Y0 -+ smull v26.4s, v16.4h, v3.h[0] -+ smlal v26.4s, v17.4h, v4.h[0] -+ smlal v26.4s, v18.4h, v5.h[0] -+ smull2 v27.4s, v16.8h, v3.h[0] -+ smlal2 v27.4s, v17.8h, v4.h[0] -+ smlal2 v27.4s, v18.8h, v5.h[0] -+ // Y1 -+ smull v28.4s, v20.4h, v3.h[0] -+ smlal v28.4s, v21.4h, v4.h[0] -+ smlal v28.4s, v22.4h, v5.h[0] -+ smull2 v29.4s, v20.8h, v3.h[0] -+ smlal2 v29.4s, v21.8h, v4.h[0] -+ smlal2 v29.4s, v22.8h, v5.h[0] -+ -+ cmp w9, #-16 -+ -+ shrn v26.4h, v26.4s, #12 -+ shrn2 v26.8h, v27.4s, #12 -+ add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -+ uqrshrn v26.8b, v26.8h, #3 -+ shrn v28.4h, v28.4s, #12 -+ shrn2 v28.8h, v29.4s, #12 -+ add v28.8h, v28.8h, v6.8h -+ uqrshrn2 v26.16b, v28.8h, #3 -+ // Y0/Y1 -+ -+ // Here: -+ // w9 == 0 width % 16 == 0, tail done -+ // w9 > -16 1st tail done (16 pels), remainder still to go -+ // w9 == -16 shouldn't happen -+ // w9 > -32 2nd tail done -+ // w9 <= -32 shouldn't happen -+ -+ b.lt 2f -+ st1 {v26.16b}, [x11], #16 -+ cbz w9, 3f -+ -+12: -+ sub w9, w9, #16 -+ -+ tbz w9, #3, 1f -+ ld3 {v0.8b, v1.8b, v2.8b}, [x10], #24 -+1: tbz w9, #2, 1f -+ ld3 {v0.b, v1.b, v2.b}[8], [x10], #3 -+ ld3 {v0.b, v1.b, v2.b}[9], [x10], #3 -+ ld3 {v0.b, v1.b, v2.b}[10], [x10], #3 -+ ld3 {v0.b, v1.b, v2.b}[11], [x10], #3 -+1: tbz w9, #1, 1f -+ ld3 {v0.b, v1.b, v2.b}[12], [x10], #3 -+ ld3 {v0.b, v1.b, v2.b}[13], [x10], #3 -+1: tbz w9, #0, 13b -+ ld3 {v0.b, v1.b, v2.b}[14], [x10], #3 -+ b 13b -+ -+2: -+ tbz w9, #3, 1f -+ st1 {v26.8b}, [x11], #8 -+1: tbz w9, #2, 1f -+ st1 {v26.s}[2], [x11], #4 -+1: tbz w9, #1, 1f -+ st1 {v26.h}[6], [x11], #2 -+1: tbz w9, #0, 1f -+ st1 {v26.b}[14], [x11] -+1: -+3: -+ -+// ------------------- Loop to start - - add x0, x0, w14, SXTX - add x1, x1, w6, SXTX - add x2, x2, w7, SXTX - add x3, x3, w7, SXTX -- subs w5, w5, #2 -- b.gt 1b -- -+ subs w5, w5, #1 -+ b.gt 11b -+90: - ret - endfunc - -From d33e534ad9a45463b0433767a28256d737827b8c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 26 Apr 2023 15:36:07 +0000 -Subject: [PATCH 127/186] rgb2rgb: Use asm unconditionally - -(cherry picked from commit 7c216c0804836b31c0ea093bb1dde5ab387724b1) ---- - libswscale/aarch64/rgb2rgb.c | 37 ++---------------------------------- - 1 file changed, 2 insertions(+), 35 deletions(-) - -diff --git a/libswscale/aarch64/rgb2rgb.c b/libswscale/aarch64/rgb2rgb.c -index f10c4ef2ded9..6a0e2dcc09f8 100644 ---- a/libswscale/aarch64/rgb2rgb.c -+++ b/libswscale/aarch64/rgb2rgb.c -@@ -37,46 +37,13 @@ void ff_rgb24toyv12_aarch64(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, int lumStride, - int chromStride, int srcStride, int32_t *rgb2yuv); - --// RGB to YUV asm fns process 16 pixels at once so ensure that the output --// will fit into the stride. ARM64 should cope with unaligned SIMD r/w so --// don't test for that --// Fall back to C if we cannot use asm -- --static inline int chkw(const int width, const int lumStride, const int chromStride) --{ --// const int aw = FFALIGN(width, 16); --// return aw <= FFABS(lumStride) && aw <= FFABS(chromStride) * 2; -- return 1; --} -- --static void rgb24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -- uint8_t *vdst, int width, int height, int lumStride, -- int chromStride, int srcStride, int32_t *rgb2yuv) --{ -- if (chkw(width, lumStride, chromStride)) -- ff_rgb24toyv12_aarch64(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv); -- else -- ff_rgb24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, rgb2yuv); --} -- --static void bgr24toyv12_check(const uint8_t *src, uint8_t *ydst, uint8_t *udst, -- uint8_t *vdst, int width, int height, int lumStride, -- int chromStride, int srcStride, int32_t *bgr2yuv) --{ -- if (chkw(width, lumStride, chromStride)) -- ff_bgr24toyv12_aarch64(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, bgr2yuv); -- else -- ff_bgr24toyv12_c(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride, bgr2yuv); --} -- -- - av_cold void rgb2rgb_init_aarch64(void) - { - int cpu_flags = av_get_cpu_flags(); - - if (have_neon(cpu_flags)) { - interleaveBytes = ff_interleave_bytes_neon; -- ff_rgb24toyv12 = rgb24toyv12_check; -- ff_bgr24toyv12 = bgr24toyv12_check; -+ ff_rgb24toyv12 = ff_rgb24toyv12_aarch64; -+ ff_bgr24toyv12 = ff_bgr24toyv12_aarch64; - } - } - -From 79640085d62275d96b2c53f18776cfd773d9fde4 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 27 Apr 2023 13:01:43 +0000 -Subject: [PATCH 128/186] tests/swscale: Add options for width and height on - the command line - -(cherry picked from commit eb8a09779688fc05bf204fdfcd063b04cda07271) ---- - libswscale/tests/swscale.c | 84 ++++++++++++++++++++++++++------------ - 1 file changed, 59 insertions(+), 25 deletions(-) - -diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c -index 6c38041ddb81..4cf41d9f64a4 100644 ---- a/libswscale/tests/swscale.c -+++ b/libswscale/tests/swscale.c -@@ -355,56 +355,71 @@ static int fileTest(const uint8_t * const ref[4], int refStride[4], - return 0; - } - --#define W 96 --#define H 96 -- - int main(int argc, char **argv) - { -+ unsigned int W = 96; -+ unsigned int H = 96; -+ unsigned int W2; -+ unsigned int H2; -+ unsigned int S; - enum AVPixelFormat srcFormat = AV_PIX_FMT_NONE; - enum AVPixelFormat dstFormat = AV_PIX_FMT_NONE; -- uint8_t *rgb_data = av_malloc(W * H * 4); -- const uint8_t * const rgb_src[4] = { rgb_data, NULL, NULL, NULL }; -- int rgb_stride[4] = { 4 * W, 0, 0, 0 }; -- uint8_t *data = av_malloc(4 * W * H); -- const uint8_t * const src[4] = { data, data + W * H, data + W * H * 2, data + W * H * 3 }; -- int stride[4] = { W, W, W, W }; - int x, y; - struct SwsContext *sws; - AVLFG rand; - int res = -1; - int i; - FILE *fp = NULL; -- -- if (!rgb_data || !data) -- return -1; -+ uint8_t *rgb_data; -+ uint8_t * rgb_src[4] = { NULL }; -+ int rgb_stride[4] = { 0 }; -+ uint8_t *data; -+ uint8_t * src[4] = { NULL }; -+ int stride[4] = { 0 }; - - for (i = 1; i < argc; i += 2) { -+ const char * const arg2 = argv[i+1]; -+ - if (argv[i][0] != '-' || i + 1 == argc) - goto bad_option; - if (!strcmp(argv[i], "-ref")) { -- fp = fopen(argv[i + 1], "r"); -+ fp = fopen(arg2, "r"); - if (!fp) { -- fprintf(stderr, "could not open '%s'\n", argv[i + 1]); -+ fprintf(stderr, "could not open '%s'\n", arg2); - goto error; - } - } else if (!strcmp(argv[i], "-cpuflags")) { - unsigned flags = av_get_cpu_flags(); -- int ret = av_parse_cpu_caps(&flags, argv[i + 1]); -+ int ret = av_parse_cpu_caps(&flags, arg2); - if (ret < 0) { -- fprintf(stderr, "invalid cpu flags %s\n", argv[i + 1]); -+ fprintf(stderr, "invalid cpu flags %s\n", arg2); - return ret; - } - av_force_cpu_flags(flags); - } else if (!strcmp(argv[i], "-src")) { -- srcFormat = av_get_pix_fmt(argv[i + 1]); -+ srcFormat = av_get_pix_fmt(arg2); - if (srcFormat == AV_PIX_FMT_NONE) { -- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); -+ fprintf(stderr, "invalid pixel format %s\n", arg2); - return -1; - } - } else if (!strcmp(argv[i], "-dst")) { -- dstFormat = av_get_pix_fmt(argv[i + 1]); -+ dstFormat = av_get_pix_fmt(arg2); - if (dstFormat == AV_PIX_FMT_NONE) { -- fprintf(stderr, "invalid pixel format %s\n", argv[i + 1]); -+ fprintf(stderr, "invalid pixel format %s\n", arg2); -+ return -1; -+ } -+ } else if (!strcmp(argv[i], "-w")) { -+ char * p = NULL; -+ W = strtoul(arg2, &p, 0); -+ if (!W || *p) { -+ fprintf(stderr, "bad width %s\n", arg2); -+ return -1; -+ } -+ } else if (!strcmp(argv[i], "-h")) { -+ char * p = NULL; -+ H = strtoul(arg2, &p, 0); -+ if (!H || *p) { -+ fprintf(stderr, "bad height '%s' (H=%d, *p=%d)\n", arg2, H, *p); - return -1; - } - } else { -@@ -414,15 +429,34 @@ bad_option: - } - } - -- sws = sws_getContext(W / 12, H / 12, AV_PIX_FMT_RGB32, W, H, -+ S = (W + 15) & ~15; -+ rgb_data = av_mallocz(S * H * 4); -+ rgb_src[0] = rgb_data; -+ rgb_stride[0] = 4 * S; -+ data = av_mallocz(4 * S * H); -+ src[0] = data; -+ src[1] = data + S * H; -+ src[2] = data + S * H * 2; -+ src[3] = data + S * H * 3; -+ stride[0] = S; -+ stride[1] = S; -+ stride[2] = S; -+ stride[3] = S; -+ H2 = H < 96 ? 8 : H / 12; -+ W2 = W < 96 ? 8 : W / 12; -+ -+ if (!rgb_data || !data) -+ return -1; -+ -+ sws = sws_getContext(W2, H2, AV_PIX_FMT_RGB32, W, H, - AV_PIX_FMT_YUVA420P, SWS_BILINEAR, NULL, NULL, NULL); - - av_lfg_init(&rand, 1); - - for (y = 0; y < H; y++) - for (x = 0; x < W * 4; x++) -- rgb_data[ x + y * 4 * W] = av_lfg_get(&rand); -- res = sws_scale(sws, rgb_src, rgb_stride, 0, H / 12, (uint8_t * const *) src, stride); -+ rgb_data[ x + y * 4 * S] = av_lfg_get(&rand); -+ res = sws_scale(sws, (const uint8_t * const *)rgb_src, rgb_stride, 0, H2, (uint8_t * const *) src, stride); - if (res < 0 || res != H) { - res = -1; - goto error; -@@ -431,10 +465,10 @@ bad_option: - av_free(rgb_data); - - if(fp) { -- res = fileTest(src, stride, W, H, fp, srcFormat, dstFormat); -+ res = fileTest((const uint8_t * const *)src, stride, W, H, fp, srcFormat, dstFormat); - fclose(fp); - } else { -- selfTest(src, stride, W, H, srcFormat, dstFormat); -+ selfTest((const uint8_t * const *)src, stride, W, H, srcFormat, dstFormat); - res = 0; - } - error: - -From 7fcd6aa72879fad5f0a4d2144b65c37c7a50ecfc Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 26 Apr 2023 16:31:23 +0000 -Subject: [PATCH 129/186] tests/swscale: Add a timing option - --t Where n is the number of time to loop the scale op. - Often useful to do it 10 times or so for better resolution - -(cherry picked from commit 50cd60a23a66254f911376602d07b30fcafbde96) ---- - libswscale/tests/swscale.c | 32 ++++++++++++++++++++++++++++++-- - 1 file changed, 30 insertions(+), 2 deletions(-) - -diff --git a/libswscale/tests/swscale.c b/libswscale/tests/swscale.c -index 4cf41d9f64a4..12776ffec7ae 100644 ---- a/libswscale/tests/swscale.c -+++ b/libswscale/tests/swscale.c -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - - #undef HAVE_AV_CONFIG_H - #include "libavutil/cpu.h" -@@ -78,6 +79,15 @@ struct Results { - uint32_t crc; - }; - -+static int time_rep = 0; -+ -+static uint64_t utime(void) -+{ -+ struct timespec ts; -+ clock_gettime(CLOCK_MONOTONIC, &ts); -+ return ts.tv_nsec / 1000 + (uint64_t)ts.tv_sec * 1000000; -+} -+ - // test by ref -> src -> dst -> out & compare out against ref - // ref & out are YV12 - static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, -@@ -174,7 +184,7 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, - goto end; - } - -- printf(" %s %dx%d -> %s %3dx%3d flags=%2d", -+ printf(" %s %4dx%4d -> %s %4dx%4d flags=%2d", - desc_src->name, srcW, srcH, - desc_dst->name, dstW, dstH, - flags); -@@ -182,6 +192,17 @@ static int doTest(const uint8_t * const ref[4], int refStride[4], int w, int h, - - sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); - -+ if (time_rep != 0) -+ { -+ const uint64_t now = utime(); -+ uint64_t done; -+ for (i = 1; i != time_rep; ++i) { -+ sws_scale(dstContext, (const uint8_t * const*)src, srcStride, 0, srcH, dst, dstStride); -+ } -+ done = utime(); -+ printf(" T=%7"PRId64"us ", done-now); -+ } -+ - for (i = 0; i < 4 && dstStride[i]; i++) - crc = av_crc(av_crc_get_table(AV_CRC_32_IEEE), crc, dst[i], - dstStride[i] * dstH); -@@ -419,7 +440,14 @@ int main(int argc, char **argv) - char * p = NULL; - H = strtoul(arg2, &p, 0); - if (!H || *p) { -- fprintf(stderr, "bad height '%s' (H=%d, *p=%d)\n", arg2, H, *p); -+ fprintf(stderr, "bad height '%s'\n", arg2); -+ return -1; -+ } -+ } else if (!strcmp(argv[i], "-t")) { -+ char * p = NULL; -+ time_rep = (int)strtol(arg2, &p, 0); -+ if (*p) { -+ fprintf(stderr, "bad time repetitions '%s'\n", arg2); - return -1; - } - } else { - -From fabd4e3e197737f27684a17e817c186a350213d4 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 20 Apr 2023 13:40:36 +0000 -Subject: [PATCH 130/186] swscale: RGB->YUV420 fix C template to allow odd - widths - -(cherry picked from commit 08b2023e7b5292df0adc6593e4d20087f9cef5c8) ---- - libswscale/rgb2rgb_template.c | 44 +++++++++++++++++++++++++++++++++++ - libswscale/swscale_unscaled.c | 11 ++++----- - 2 files changed, 49 insertions(+), 6 deletions(-) - -diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c -index 703de90690d3..e711589e1e1a 100644 ---- a/libswscale/rgb2rgb_template.c -+++ b/libswscale/rgb2rgb_template.c -@@ -679,6 +679,19 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - ydst[2 * i + 1] = Y; - } -+ if ((width & 1) != 0) { -+ unsigned int b = src[6 * i + 0]; -+ unsigned int g = src[6 * i + 1]; -+ unsigned int r = src[6 * i + 2]; -+ -+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; -+ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; -+ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; -+ -+ udst[i] = U; -+ vdst[i] = V; -+ ydst[2 * i] = Y; -+ } - ydst += lumStride; - src += srcStride; - -@@ -701,6 +714,15 @@ static void rgb24toyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - ydst[2 * i + 1] = Y; - } -+ if ((width & 1) != 0) { -+ unsigned int b = src[6 * i + 0]; -+ unsigned int g = src[6 * i + 1]; -+ unsigned int r = src[6 * i + 2]; -+ -+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; -+ -+ ydst[2 * i] = Y; -+ } - udst += chromStride; - vdst += chromStride; - ydst += lumStride; -@@ -767,6 +789,19 @@ static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - ydst[2 * i + 1] = Y; - } -+ if ((width & 1) != 0) { -+ unsigned int b = src[8 * i + 2]; -+ unsigned int g = src[8 * i + 1]; -+ unsigned int r = src[8 * i + 0]; -+ -+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; -+ unsigned int V = ((rv * r + gv * g + bv * b) >> RGB2YUV_SHIFT) + 128; -+ unsigned int U = ((ru * r + gu * g + bu * b) >> RGB2YUV_SHIFT) + 128; -+ -+ udst[i] = U; -+ vdst[i] = V; -+ ydst[2 * i] = Y; -+ } - ydst += lumStride; - src += srcStride; - -@@ -789,6 +824,15 @@ static void rgbxtoyv12_x(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; - ydst[2 * i + 1] = Y; - } -+ if ((width & 1) != 0) { -+ unsigned int b = src[8 * i + 2]; -+ unsigned int g = src[8 * i + 1]; -+ unsigned int r = src[8 * i + 0]; -+ -+ unsigned int Y = ((ry * r + gy * g + by * b) >> RGB2YUV_SHIFT) + 16; -+ -+ ydst[2 * i] = Y; -+ } - udst += chromStride; - vdst += chromStride; - ydst += lumStride; -diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c -index 053c06adf5d1..52469b2e4a7b 100644 ---- a/libswscale/swscale_unscaled.c -+++ b/libswscale/swscale_unscaled.c -@@ -2062,7 +2062,6 @@ void ff_get_unscaled_swscale(SwsContext *c) - const enum AVPixelFormat dstFormat = c->dstFormat; - const int flags = c->flags; - const int dstH = c->dstH; -- const int dstW = c->dstW; - int needsDither; - - needsDither = isAnyRGB(dstFormat) && -@@ -2120,12 +2119,12 @@ void ff_get_unscaled_swscale(SwsContext *c) - /* bgr24toYV12 */ - if (srcFormat == AV_PIX_FMT_BGR24 && - (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && -- !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ !(flags & SWS_ACCURATE_RND)) - c->convert_unscaled = bgr24ToYv12Wrapper; - /* rgb24toYV12 */ - if (srcFormat == AV_PIX_FMT_RGB24 && - (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P) && -- !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ !(flags & SWS_ACCURATE_RND)) - c->convert_unscaled = rgb24ToYv12Wrapper; - - /* bgrxtoYV12 */ -@@ -2136,17 +2135,17 @@ void ff_get_unscaled_swscale(SwsContext *c) - /* rgbx24toYV12 */ - if (((srcFormat == AV_PIX_FMT_RGBA && dstFormat == AV_PIX_FMT_YUV420P) || - (srcFormat == AV_PIX_FMT_RGB0 && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && -- !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ !(flags & SWS_ACCURATE_RND)) - c->convert_unscaled = rgbxToYv12Wrapper; - /* xbgrtoYV12 */ - if (((srcFormat == AV_PIX_FMT_ABGR && dstFormat == AV_PIX_FMT_YUV420P) || - (srcFormat == AV_PIX_FMT_0BGR && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && -- !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ !(flags & SWS_ACCURATE_RND)) - c->convert_unscaled = xbgrToYv12Wrapper; - /* xrgb24toYV12 */ - if (((srcFormat == AV_PIX_FMT_ARGB && dstFormat == AV_PIX_FMT_YUV420P) || - (srcFormat == AV_PIX_FMT_0RGB && (dstFormat == AV_PIX_FMT_YUV420P || dstFormat == AV_PIX_FMT_YUVA420P))) && -- !(flags & SWS_ACCURATE_RND) && !(dstW&1)) -+ !(flags & SWS_ACCURATE_RND)) - c->convert_unscaled = xrgbToYv12Wrapper; - - /* RGB/BGR -> RGB/BGR (no dither needed forms) */ - -From 7d6f3a7ede0f4bf03a410bc2a8a8f38a47ac15a9 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 4 May 2023 14:26:14 +0000 -Subject: [PATCH 131/186] rtpenc: Add code to send H264 new extradata in - sidedata - -Fixes issue with pi V4L2 H264 encode which cannot create extradata -at init time. - -(cherry picked from commit 4f852b4b093f841b64b4934a6f1720e98e4e0f2c) ---- - libavformat/rtpenc.c | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c -index a8d296a1542f..f67dc2a15ae1 100644 ---- a/libavformat/rtpenc.c -+++ b/libavformat/rtpenc.c -@@ -19,6 +19,7 @@ - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -+#include "avc.h" - #include "avformat.h" - #include "mpegts.h" - #include "internal.h" -@@ -585,8 +586,25 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt) - ff_rtp_send_vc2hq(s1, pkt->data, size, st->codecpar->field_order != AV_FIELD_PROGRESSIVE ? 1 : 0); - break; - case AV_CODEC_ID_H264: -+ { -+ uint8_t *side_data; -+ int side_data_size = 0; -+ -+ side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, -+ &side_data_size); -+ -+ if (side_data_size != 0) { -+ int ps_size = side_data_size; -+ uint8_t * ps_buf = NULL; -+ -+ ff_avc_write_annexb_extradata(side_data, &ps_buf, &ps_size); -+ av_log(s1, AV_LOG_TRACE, "H264: write side data=%d\n", ps_size); -+ ff_rtp_send_h264_hevc(s1, ps_buf ? ps_buf : side_data, ps_size); -+ av_free(ps_buf); -+ } - ff_rtp_send_h264_hevc(s1, pkt->data, size); - break; -+ } - case AV_CODEC_ID_H261: - ff_rtp_send_h261(s1, pkt->data, size); - break; - -From 7ba7eb37305530b4eef1637c87088da8d93911aa Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 5 Jun 2023 08:34:38 +0000 -Subject: [PATCH 132/186] rgb2rgb: Fix luma narrow+saturation instruction - -(cherry picked from commit 9cdac1c08ad5c0aea28907d1d3fd0bdda387955a) ---- - libswscale/aarch64/rgb2rgb_neon.S | 16 ++++++++-------- - 1 file changed, 8 insertions(+), 8 deletions(-) - -diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S -index 978ab443ea52..476ca723a0ef 100644 ---- a/libswscale/aarch64/rgb2rgb_neon.S -+++ b/libswscale/aarch64/rgb2rgb_neon.S -@@ -203,11 +203,11 @@ function ff_bgr24toyv12_aarch64, export=1 - shrn v26.4h, v26.4s, #12 - shrn2 v26.8h, v27.4s, #12 - add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -- uqrshrn v26.8b, v26.8h, #3 -+ sqrshrun v26.8b, v26.8h, #3 - shrn v28.4h, v28.4s, #12 - shrn2 v28.8h, v29.4s, #12 - add v28.8h, v28.8h, v6.8h -- uqrshrn2 v26.16b, v28.8h, #3 -+ sqrshrun2 v26.16b, v28.8h, #3 - // Y0/Y1 - - // U -@@ -282,11 +282,11 @@ function ff_bgr24toyv12_aarch64, export=1 - shrn v26.4h, v26.4s, #12 - shrn2 v26.8h, v27.4s, #12 - add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -- uqrshrn v26.8b, v26.8h, #3 -+ sqrshrun v26.8b, v26.8h, #3 - shrn v28.4h, v28.4s, #12 - shrn2 v28.8h, v29.4s, #12 - add v28.8h, v28.8h, v6.8h -- uqrshrn2 v26.16b, v28.8h, #3 -+ sqrshrun2 v26.16b, v28.8h, #3 - // Y0/Y1 - - // U -@@ -416,11 +416,11 @@ function ff_bgr24toyv12_aarch64, export=1 - shrn v26.4h, v26.4s, #12 - shrn2 v26.8h, v27.4s, #12 - add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -- uqrshrn v26.8b, v26.8h, #3 -+ sqrshrun v26.8b, v26.8h, #3 - shrn v28.4h, v28.4s, #12 - shrn2 v28.8h, v29.4s, #12 - add v28.8h, v28.8h, v6.8h -- uqrshrn2 v26.16b, v28.8h, #3 -+ sqrshrun2 v26.16b, v28.8h, #3 - // Y0/Y1 - - subs w9, w9, #16 -@@ -464,11 +464,11 @@ function ff_bgr24toyv12_aarch64, export=1 - shrn v26.4h, v26.4s, #12 - shrn2 v26.8h, v27.4s, #12 - add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -- uqrshrn v26.8b, v26.8h, #3 -+ sqrshrun v26.8b, v26.8h, #3 - shrn v28.4h, v28.4s, #12 - shrn2 v28.8h, v29.4s, #12 - add v28.8h, v28.8h, v6.8h -- uqrshrn2 v26.16b, v28.8h, #3 -+ sqrshrun2 v26.16b, v28.8h, #3 - // Y0/Y1 - - // Here: - -From 0d553f498626e936ef1f48505ee260dbe2478d0c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Sun, 4 Jun 2023 13:37:59 +0000 -Subject: [PATCH 133/186] v4l2_m2m_dec: Tweak pending count to use dts & - reorder size - -(cherry picked from commit ca438b382c90f9a5f58f4708205e6ac25395db2a) ---- - libavcodec/v4l2_m2m.h | 1 + - libavcodec/v4l2_m2m_dec.c | 53 +++++++++++++++++++++++++++++++-------- - 2 files changed, 43 insertions(+), 11 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index ded1478a49da..a506e69d674b 100644 ---- a/libavcodec/v4l2_m2m.h -+++ b/libavcodec/v4l2_m2m.h -@@ -115,6 +115,7 @@ typedef struct V4L2m2mContext { - - /* req pkt */ - int req_pkt; -+ int reorder_size; - - /* Ext data sent */ - int extdata_sent; -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index d124c7b1fc43..13af62e819bc 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -121,13 +121,18 @@ log_dump(void * logctx, int lvl, const void * const data, const size_t len) - } - #endif - --static int64_t pts_stats_guess(const pts_stats_t * const stats) -+static unsigned int pts_stats_interval(const pts_stats_t * const stats) -+{ -+ return stats->last_interval; -+} -+ -+static int64_t pts_stats_guess(const pts_stats_t * const stats, const int fail_bad_guess) - { - if (stats->last_count <= 1) - return stats->last_pts; - if (stats->last_pts == AV_NOPTS_VALUE || -- stats->last_interval == 0 || -- stats->last_count >= STATS_LAST_COUNT_MAX) -+ fail_bad_guess && (stats->last_interval == 0 || -+ stats->last_count >= STATS_LAST_COUNT_MAX)) - return AV_NOPTS_VALUE; - return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval; - } -@@ -345,7 +350,7 @@ set_best_effort_pts(AVCodecContext *const avctx, - { - pts_stats_add(ps, frame->pts); - -- frame->best_effort_timestamp = pts_stats_guess(ps); -+ frame->best_effort_timestamp = pts_stats_guess(ps, 1); - // If we can't guess from just PTS - try DTS - if (frame->best_effort_timestamp == AV_NOPTS_VALUE) - frame->best_effort_timestamp = frame->pkt_dts; -@@ -380,15 +385,25 @@ xlat_init(xlat_track_t * const x) - } - - static int --xlat_pending(const xlat_track_t * const x) -+xlat_pending(const V4L2m2mContext * const s) - { -+ const xlat_track_t *const x = &s->xlat; - unsigned int n = x->track_no % FF_V4L2_M2M_TRACK_SIZE; - int i; -- const int64_t now = x->last_pts; -+ const int64_t now = pts_stats_guess(&s->pts_stat, 0); -+ int64_t first_dts = AV_NOPTS_VALUE; -+ int no_dts_count = 0; -+ unsigned int interval = pts_stats_interval(&s->pts_stat); - - for (i = 0; i < FF_V4L2_M2M_TRACK_SIZE; ++i, n = (n - 1) & (FF_V4L2_M2M_TRACK_SIZE - 1)) { - const V4L2m2mTrackEl * const t = x->track_els + n; - -+ if (first_dts == AV_NOPTS_VALUE) -+ if (t->dts == AV_NOPTS_VALUE) -+ ++no_dts_count; -+ else -+ first_dts = t->dts; -+ - // Discard only set on never-set or flushed entries - // So if we get here we've never successfully decoded a frame so allow - // more frames into the buffer before stalling -@@ -408,6 +423,18 @@ xlat_pending(const xlat_track_t * const x) - break; - } - -+ if (first_dts != AV_NOPTS_VALUE && now != AV_NOPTS_VALUE && interval != 0 && s->reorder_size != 0) { -+ const int iframes = (first_dts - now) / (int)interval; -+ const int t = iframes - s->reorder_size + no_dts_count; -+ -+// av_log(s->avctx, AV_LOG_DEBUG, "Last:%"PRId64", Now:%"PRId64", First:%"PRId64", delta=%"PRId64", frames=%d, nodts=%d\n", -+// x->last_dts, now, first_dts, first_dts - now, iframes, no_dts_count); -+ -+ if (iframes > 0 && iframes < 64 && t < i) { -+ return t; -+ } -+ } -+ - return i; - } - -@@ -585,12 +612,12 @@ static int qbuf_wait(AVCodecContext * const avctx, V4L2Context * const ctx) - static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) - { - V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context; -- int src_rv = NQ_OK; -+ int src_rv = -1; - int dst_rv = 1; // Non-zero (done), non-negative (error) number - unsigned int i = 0; - - do { -- const int pending = xlat_pending(&s->xlat); -+ const int pending = xlat_pending(s); - const int prefer_dq = (pending > 4); - const int last_src_rv = src_rv; - -@@ -966,8 +993,10 @@ static uint32_t max_coded_size(const AVCodecContext * const avctx) - } - - static void --parse_extradata(AVCodecContext *avctx) -+parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) - { -+ s->reorder_size = 0; -+ - if (!avctx->extradata || !avctx->extradata_size) - return; - -@@ -996,6 +1025,7 @@ parse_extradata(AVCodecContext *avctx) - avctx->profile = ff_h264_get_profile(sps); - avctx->level = sps->level_idc; - } -+ s->reorder_size = sps->num_reorder_frames; - } - ff_h264_ps_uninit(&ps); - break; -@@ -1025,6 +1055,7 @@ parse_extradata(AVCodecContext *avctx) - if (sps) { - avctx->profile = sps->ptl.general_ptl.profile_idc; - avctx->level = sps->ptl.general_ptl.level_idc; -+ s->reorder_size = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering; - } - } - ff_hevc_ps_uninit(&ps); -@@ -1057,12 +1088,12 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - avctx->ticks_per_frame = 2; - } - -- parse_extradata(avctx); -- - ret = ff_v4l2_m2m_create_context(priv, &s); - if (ret < 0) - return ret; - -+ parse_extradata(avctx, s); -+ - xlat_init(&s->xlat); - pts_stats_init(&s->pts_stat, avctx, "decoder"); - - -From 244b56393e0f6f1d63b894d942d148c6dd9a3862 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 7 Jun 2023 11:14:52 +0000 -Subject: [PATCH 134/186] v4l2_m2m: Add encode size check - -Previously an out of bounds size would fail whilst trying to copy the -buffer with an unhelpful message. This produces a better error at init -time. - -(cherry picked from commit 0b61c4617e26f043d28d44c8767f7b9fd4882f97) ---- - libavcodec/v4l2_m2m.c | 43 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 43 insertions(+) - -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index f802687b1bb2..28d9ed49887e 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -109,6 +109,44 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) - return AVERROR(EINVAL); - } - -+static int check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) -+{ -+ struct v4l2_format fmt = {.type = s->output.type}; -+ int rv; -+ uint32_t pixfmt = ff_v4l2_format_avfmt_to_v4l2(avctx->pix_fmt); -+ unsigned int w; -+ unsigned int h; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { -+ fmt.fmt.pix_mp.pixelformat = pixfmt; -+ fmt.fmt.pix_mp.width = avctx->width; -+ fmt.fmt.pix_mp.height = avctx->height; -+ } -+ else { -+ fmt.fmt.pix.pixelformat = pixfmt; -+ fmt.fmt.pix.width = avctx->width; -+ fmt.fmt.pix.height = avctx->height; -+ } -+ -+ rv = ioctl(s->fd, VIDIOC_TRY_FMT, &fmt); -+ -+ if (rv != 0) { -+ rv = AVERROR(errno); -+ av_log(avctx, AV_LOG_ERROR, "%s: Tryfmt failed: %s\n", __func__, av_err2str(rv)); -+ return rv; -+ } -+ -+ w = ff_v4l2_get_format_width(&fmt); -+ h = ff_v4l2_get_format_height(&fmt); -+ -+ if (w < avctx->width || h < avctx->height) { -+ av_log(avctx, AV_LOG_WARNING, "%s: Size check failed: asked for %dx%d, got: %dx%d\n", __func__, avctx->width, avctx->height, w, h); -+ return AVERROR(EINVAL); -+ } -+ -+ return 0; -+} -+ - static int v4l2_probe_driver(V4L2m2mContext *s) - { - void *log_ctx = s->avctx; -@@ -128,6 +166,11 @@ static int v4l2_probe_driver(V4L2m2mContext *s) - goto done; - } - -+ // If being given frames (encode) check that V4L2 can cope with the size -+ if (s->output.av_codec_id == AV_CODEC_ID_RAWVIDEO && -+ (ret = check_size(s->avctx, s)) != 0) -+ goto done; -+ - ret = ff_v4l2_context_get_format(&s->capture, 1); - if (ret) { - av_log(log_ctx, AV_LOG_DEBUG, "v4l2 capture format not supported\n"); - -From 834a78de7eda652d34ad72ff6d63d4ba86d22fa3 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 9 Jun 2023 10:28:12 +0000 -Subject: [PATCH 135/186] vf_bwdif: Add attributes to ask for vectorization - -(cherry picked from commit 281250290ba5c2dcd8676e9a261050e65c10bcb7) ---- - libavfilter/vf_bwdif.c | 29 +++++++++++++++-------------- - 1 file changed, 15 insertions(+), 14 deletions(-) - -diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c -index 65c617ebb335..09e68523bbfa 100644 ---- a/libavfilter/vf_bwdif.c -+++ b/libavfilter/vf_bwdif.c -@@ -74,10 +74,10 @@ typedef struct ThreadData { - int temporal_diff1 =(FFABS(prev[mrefs] - c) + FFABS(prev[prefs] - e)) >> 1; \ - int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e)) >> 1; \ - int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \ -- \ -+ {/*\ - if (!diff) { \ - dst[0] = d; \ -- } else { -+ } else {*/ - - #define SPAT_CHECK() \ - int b = ((prev2[mrefs2] + next2[mrefs2]) >> 1) - c; \ -@@ -89,15 +89,16 @@ typedef struct ThreadData { - diff = FFMAX3(diff, min, -max); - - #define FILTER_LINE() \ -+ int i1, i2; \ - SPAT_CHECK() \ -- if (FFABS(c - e) > temporal_diff0) { \ -- interpol = (((coef_hf[0] * (prev2[0] + next2[0]) \ -+ /*if (FFABS(c - e) > temporal_diff0)*/ { \ -+ i1 = (((coef_hf[0] * (prev2[0] + next2[0]) \ - - coef_hf[1] * (prev2[mrefs2] + next2[mrefs2] + prev2[prefs2] + next2[prefs2]) \ - + coef_hf[2] * (prev2[mrefs4] + next2[mrefs4] + prev2[prefs4] + next2[prefs4])) >> 2) \ - + coef_lf[0] * (c + e) - coef_lf[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ -- } else { \ -- interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ -- } -+ } /*else*/ { \ -+ i2 = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ -+ }interpol = FFABS(c - e) > temporal_diff0 ? i1:i2;\ - - #define FILTER_EDGE() \ - if (spat) { \ -@@ -111,7 +112,7 @@ typedef struct ThreadData { - else if (interpol < d - diff) \ - interpol = d - diff; \ - \ -- dst[0] = av_clip(interpol, 0, clip_max); \ -+ dst[0] = !diff ? d : av_clip(interpol, 0, clip_max); \ - } \ - \ - dst++; \ -@@ -122,7 +123,7 @@ typedef struct ThreadData { - next2++; \ - } - --static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, -+static void __attribute__((optimize("tree-vectorize"))) filter_intra(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max) - { - uint8_t *dst = dst1; -@@ -132,7 +133,7 @@ static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, - FILTER_INTRA() - } - --static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, -+static void __attribute__((optimize("tree-vectorize"))) filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int prefs3, int mrefs3, int prefs4, int mrefs4, - int parity, int clip_max) -@@ -150,7 +151,7 @@ static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, - FILTER2() - } - --static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1, -+static void __attribute__((optimize("tree-vectorize"))) filter_edge(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int parity, int clip_max, int spat) - { -@@ -167,7 +168,7 @@ static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1, - FILTER2() - } - --static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mrefs, -+static void __attribute__((optimize("tree-vectorize"))) filter_intra_16bit(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max) - { - uint16_t *dst = dst1; -@@ -177,7 +178,7 @@ static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mre - FILTER_INTRA() - } - --static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1, -+static void __attribute__((optimize("tree-vectorize"))) filter_line_c_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int prefs3, int mrefs3, int prefs4, int mrefs4, - int parity, int clip_max) -@@ -195,7 +196,7 @@ static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1 - FILTER2() - } - --static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1, -+static void __attribute__((optimize("tree-vectorize"))) filter_edge_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int parity, int clip_max, int spat) - { - -From b4f4f90c9bda485a07a894f2700aecad25ff1781 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 13 Jun 2023 13:07:55 +0000 -Subject: [PATCH 136/186] v4l2m2m_dec: Fix h264 reorder size if no sps - initially - -(cherry picked from commit 8832f7924bf47cbca0de251d7b406917f958ebf4) ---- - libavcodec/v4l2_m2m_dec.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 13af62e819bc..11c83b2d6643 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -1024,8 +1024,8 @@ parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) - if (sps) { - avctx->profile = ff_h264_get_profile(sps); - avctx->level = sps->level_idc; -+ s->reorder_size = sps->num_reorder_frames; - } -- s->reorder_size = sps->num_reorder_frames; - } - ff_h264_ps_uninit(&ps); - break; - -From f9124edee3874fc5ac9633f59248a9122f22e9a1 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 30 Jun 2023 18:03:29 +0000 -Subject: [PATCH 137/186] sand_fns: Add missing uxtw for neon stride - ---- - libavutil/aarch64/rpi_sand_neon.S | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S -index 2f07d9674c9f..19411cf3f19a 100644 ---- a/libavutil/aarch64/rpi_sand_neon.S -+++ b/libavutil/aarch64/rpi_sand_neon.S -@@ -469,6 +469,7 @@ endfunc - function ff_rpi_sand30_lines_to_planar_y16, export=1 - lsl w4, w4, #7 - sub w4, w4, #64 -+ uxtw x4, w4 - sub w1, w1, w7, lsl #1 - uxtw x6, w6 - add x8, x2, x6, lsl #7 -@@ -634,6 +635,7 @@ endfunc - function ff_rpi_sand30_lines_to_planar_y8, export=1 - lsl w4, w4, #7 - sub w4, w4, #64 -+ uxtw x4, w4 - sub w1, w1, w7 - uxtw x6, w6 - add x8, x2, x6, lsl #7 - -From feed60f18216ea49b2ec7d54c71d342cb7c16c6e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 30 Jun 2023 18:12:16 +0000 -Subject: [PATCH 138/186] sand_fns: Rework aarch64 neon - sand30_lines_to_planar_c16 - -Previous version could overflow its write buffer on small buffers -which sometimes crashed WPP_F_ericsson_MAIN10_2. - -This version is probably faster too ---- - libavutil/aarch64/rpi_sand_neon.S | 329 ++++++++++++++---------------- - 1 file changed, 151 insertions(+), 178 deletions(-) - -diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S -index 19411cf3f19a..af7e2a88c44b 100644 ---- a/libavutil/aarch64/rpi_sand_neon.S -+++ b/libavutil/aarch64/rpi_sand_neon.S -@@ -248,199 +248,172 @@ incomplete_block_loop_end_c8: - ret - endfunc - --//void ff_rpi_sand30_lines_to_planar_c16( --// uint8_t * dst_u, // [x0] --// unsigned int dst_stride_u, // [w1] == _w*2 --// uint8_t * dst_v, // [x2] --// unsigned int dst_stride_v, // [w3] == _w*2 --// const uint8_t * src, // [x4] --// unsigned int stride1, // [w5] == 128 --// unsigned int stride2, // [w6] --// unsigned int _x, // [w7] == 0 --// unsigned int y, // [sp, #0] == 0 --// unsigned int _w, // [sp, #8] -> w3 --// unsigned int h); // [sp, #16] -> w7 -- --.macro rpi_sand30_lines_to_planar_c16_block_half -- ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x13], #64 -- -- xtn v4.4h, v0.4s -- ushr v0.4s, v0.4s, #10 -- xtn v5.4h, v0.4s -- ushr v0.4s, v0.4s, #10 -- xtn v6.4h, v0.4s -- xtn2 v4.8h, v1.4s -- ushr v1.4s, v1.4s, #10 -- xtn2 v5.8h, v1.4s -- ushr v1.4s, v1.4s, #10 -- xtn2 v6.8h, v1.4s -- and v4.16b, v4.16b, v16.16b -- and v5.16b, v5.16b, v16.16b -- and v6.16b, v6.16b, v16.16b -- st3 { v4.8h, v5.8h, v6.8h }, [sp], #48 -- -- xtn v4.4h, v2.4s -- ushr v2.4s, v2.4s, #10 -- xtn v5.4h, v2.4s -- ushr v2.4s, v2.4s, #10 -- xtn v6.4h, v2.4s -- xtn2 v4.8h, v3.4s -- ushr v3.4s, v3.4s, #10 -- xtn2 v5.8h, v3.4s -- ushr v3.4s, v3.4s, #10 -- xtn2 v6.8h, v3.4s -- and v4.16b, v4.16b, v16.16b -- and v5.16b, v5.16b, v16.16b -- and v6.16b, v6.16b, v16.16b -- st3 { v4.8h, v5.8h, v6.8h }, [sp] -- sub sp, sp, #48 --.endm -- --function ff_rpi_sand30_lines_to_planar_c16, export=1 -- stp x19, x20, [sp, #-48]! -- stp x21, x22, [sp, #16] -- stp x23, x24, [sp, #32] -- -- ldr w3, [sp, #48+8] // w3 = width -- ldr w7, [sp, #48+16] // w7 = height -- -- // reserve space on the stack for intermediate results -- sub sp, sp, #256 -+// Unzip chroma -+// -+// On entry: -+// a0 = V0, U2, ... -+// a1 = U0, V1, ... -+// a2 = U1, V2, ... -+// b0 = V8, U10, ... -+// b1 = U8, V9, ... -+// b2 = U9, V10, ... -+// -+// On exit: -+// d0 = U0, U3, ... -+// ... -+// a0 = V0, V3, .. -+// ... -+// -+// Reg order for USAND is a1, a0, a2 (i.e. swap natural order of 1st 2 dest regs) - -- // number of 128byte blocks per row, w8 = width / 48 -- mov w9, #48 -- udiv w8, w3, w9 -+.macro UZPH_C d0, d1, d2, a0, a1, a2, b0, b1, b2 -+ uzp1 \d0\().8h, \a1\().8h, \b1\().8h -+ uzp1 \d1\().8h, \a2\().8h, \b2\().8h -+ uzp2 \d2\().8h, \a0\().8h, \b0\().8h - -- // remaining pixels (rem_pix) per row, w9 = width - w8 * 48 -- mul w9, w8, w9 -- sub w9, w3, w9 -+ uzp1 \a0\().8h, \a0\().8h, \b0\().8h -+ uzp2 \a1\().8h, \a1\().8h, \b1\().8h -+ uzp2 \a2\().8h, \a2\().8h, \b2\().8h -+.endm - -- // row offset, the beginning of the next row to process -- eor w10, w10, w10 -+// SAND30 -> 10bit -+.macro USAND10 d0, d1, d2, a0, a1 -+ shrn \d2\().4h, \a0\().4s, #14 -+ xtn \d0\().4h, \a0\().4s -+ shrn \d1\().4h, \a0\().4s, #10 - -- // offset to the beginning of the next block, w11 = stride2 * 128 - 128 -- lsl w11, w6, #7 -- sub w11, w11, #128 -+ shrn2 \d2\().8h, \a1\().4s, #14 -+ xtn2 \d0\().8h, \a1\().4s -+ shrn2 \d1\().8h, \a1\().4s, #10 - -- // decrease the height by one and in case of remaining pixels increase the block count by one -- sub w7, w7, #1 -- cmp w9, #0 -- cset w19, ne // w19 == 1 iff reamining pixels != 0 -- add w8, w8, w19 -+ ushr \d2\().8h, \d2\().8h, #6 -+ bic \d0\().8h, #0xfc, lsl #8 -+ bic \d1\().8h, #0xfc, lsl #8 -+.endm - -- // bytes we have to move dst back by at the end of every row -- mov w21, #48*2 -- mul w21, w21, w8 -- sub w21, w1, w21 -+// void ff_rpi_sand30_lines_to_planar_c16( -+// uint8_t * dst_u, // [x0] -+// unsigned int dst_stride_u, // [w1] -+// uint8_t * dst_v, // [x2] -+// unsigned int dst_stride_v, // [w3] -+// const uint8_t * src, // [x4] -+// unsigned int stride1, // [w5] 128 -+// unsigned int stride2, // [w6] -+// unsigned int _x, // [w7] 0 -+// unsigned int y, // [sp, #0] -+// unsigned int _w, // [sp, #8] w9 -+// unsigned int h); // [sp, #16] w10 - -- mov w20, #0 // w20 = flag, last row processed -+function ff_rpi_sand30_lines_to_planar_c16, export=1 -+ ldr w7, [sp, #0] // y -+ ldr w8, [sp, #8] // _w -+ ldr w10, [sp, #16] // h -+ lsl w6, w6, #7 // Fixup stride2 -+ sub w6, w6, #64 -+ uxtw x6, w6 -+ sub w1, w1, w8, LSL #1 // Fixup chroma strides -+ sub w3, w3, w8, LSL #1 -+ lsl w7, w7, #7 // Add y to src -+ add x4, x4, w7, UXTW -+10: -+ mov w13, #0 -+ mov x5, x4 -+ mov w9, w8 -+1: -+ ld1 {v0.4s-v3.4s}, [x5], #64 -+ ld1 {v4.4s-v7.4s}, [x5], x6 - -- mov x12, #0x03ff03ff03ff03ff -- dup v16.2d, x12 -+ USAND10 v17, v16, v18, v0, v1 -+ USAND10 v20, v19, v21, v2, v3 -+ UZPH_C v0, v1, v2, v16, v17, v18, v19, v20, v21 -+ USAND10 v23, v22, v24, v4, v5 -+ USAND10 v26, v25, v27, v6, v7 -+ UZPH_C v4, v5, v6, v22, v23, v24, v25, v26, v27 - -- // iterate through rows, row counter = w12 = 0 -- eor w12, w12, w12 --row_loop_c16: -- cmp w12, w7 -- bge row_loop_c16_fin -+ subs w9, w9, #48 -+ blt 2f - -- // address of row data = src + row_offset -- mov x13, x4 -- add x13, x13, x10 -+ st3 {v0.8h-v2.8h}, [x0], #48 -+ st3 {v4.8h-v6.8h}, [x0], #48 -+ st3 {v16.8h-v18.8h}, [x2], #48 -+ st3 {v22.8h-v24.8h}, [x2], #48 - -- eor w14, w14, w14 --block_loop_c16: -- cmp w14, w8 -- bge block_loop_c16_fin -- -- rpi_sand30_lines_to_planar_c16_block_half -- -- ld2 { v0.8h, v1.8h }, [sp], #32 -- ld2 { v2.8h, v3.8h }, [sp], #32 -- ld2 { v4.8h, v5.8h }, [sp] -- sub sp, sp, #64 -- -- st1 { v0.8h }, [x0], #16 -- st1 { v2.8h }, [x0], #16 -- st1 { v4.8h }, [x0], #16 -- st1 { v1.8h }, [x2], #16 -- st1 { v3.8h }, [x2], #16 -- st1 { v5.8h }, [x2], #16 -- -- rpi_sand30_lines_to_planar_c16_block_half -- -- ld2 { v0.8h, v1.8h }, [sp], #32 -- ld2 { v2.8h, v3.8h }, [sp], #32 -- ld2 { v4.8h, v5.8h }, [sp] -- sub sp, sp, #64 -- -- st1 { v0.8h }, [x0], #16 -- st1 { v2.8h }, [x0], #16 -- st1 { v4.8h }, [x0], #16 -- st1 { v1.8h }, [x2], #16 -- st1 { v3.8h }, [x2], #16 -- st1 { v5.8h }, [x2], #16 -- -- add x13, x13, x11 // offset to next block -- add w14, w14, #1 -- b block_loop_c16 --block_loop_c16_fin: -+ bne 1b -+11: -+ subs w10, w10, #1 -+ add x4, x4, #128 -+ add x0, x0, w1, UXTW -+ add x2, x2, w3, UXTW -+ bne 10b -+99: -+ ret - -- add w10, w10, #128 -- add w12, w12, #1 -- add x0, x0, w21, sxtw // move dst pointers back by x21 -- add x2, x2, w21, sxtw -- b row_loop_c16 --row_loop_c16_fin: -- -- cmp w20, #1 -- beq row_loop_c16_fin2 -- mov w20, #1 -- sub w8, w8, w19 // decrease block count by w19 -- add w7, w7, #1 // increase height -- b row_loop_c16 -- --row_loop_c16_fin2: -- sub x0, x0, w21, sxtw // readd x21 in case of the last row -- sub x2, x2, w21, sxtw // so that we can write out the few remaining pixels -- -- // last incomplete block to be finished -- // read operations are fine, stride2 is more than large enough even if rem_pix is 0 -- rpi_sand30_lines_to_planar_c16_block_half -- ld2 { v0.8h, v1.8h }, [sp], #32 -- ld2 { v2.8h, v3.8h }, [sp], #32 -- ld2 { v4.8h, v5.8h }, [sp], #32 -- rpi_sand30_lines_to_planar_c16_block_half -- ld2 { v0.8h, v1.8h }, [sp], #32 -- ld2 { v2.8h, v3.8h }, [sp], #32 -- ld2 { v4.8h, v5.8h }, [sp] -- sub sp, sp, #160 -- -- mov x4, sp -- eor w20, w20, w20 --rem_pix_c16_loop: -- cmp w20, w9 -- bge rem_pix_c16_fin -- -- ldr w22, [x4], #4 -- str w22, [x0], #2 -- lsr w22, w22, #16 -- str w22, [x2], #2 -- -- add w20, w20, #1 -- b rem_pix_c16_loop --rem_pix_c16_fin: -- -- add sp, sp, #256 -- -- ldp x23, x24, [sp, #32] -- ldp x21, x22, [sp, #16] -- ldp x19, x20, [sp], #48 -- ret -+// Partial final write -+2: -+ cmp w9, #24-48 -+ blt 1f -+ st3 {v0.8h - v2.8h}, [x0], #48 -+ st3 {v16.8h - v18.8h}, [x2], #48 -+ beq 11b -+ mov v0.16b, v4.16b -+ mov v1.16b, v5.16b -+ sub w9, w9, #24 -+ mov v2.16b, v6.16b -+ mov v16.16b, v22.16b -+ mov v17.16b, v23.16b -+ mov v18.16b, v24.16b -+1: -+ cmp w9, #12-48 -+ blt 1f -+ st3 {v0.4h - v2.4h}, [x0], #24 -+ st3 {v16.4h - v18.4h}, [x2], #24 -+ beq 11b -+ mov v0.2d[0], v0.2d[1] -+ sub w9, w9, #12 -+ mov v1.2d[0], v1.2d[1] -+ mov v2.2d[0], v2.2d[1] -+ mov v16.2d[0], v16.2d[1] -+ mov v17.2d[0], v17.2d[1] -+ mov v18.2d[0], v18.2d[1] -+1: -+ cmp w9, #6-48 -+ blt 1f -+ st3 {v0.h - v2.h}[0], [x0], #6 -+ st3 {v0.h - v2.h}[1], [x0], #6 -+ st3 {v16.h - v18.h}[0], [x2], #6 -+ st3 {v16.h - v18.h}[1], [x2], #6 -+ beq 11b -+ mov v0.s[0], v0.s[1] -+ sub w9, w9, #6 -+ mov v1.s[0], v1.s[1] -+ mov v2.s[0], v2.s[1] -+ mov v16.s[0], v16.s[1] -+ mov v17.s[0], v17.s[1] -+ mov v18.s[0], v18.s[1] -+1: -+ cmp w9, #3-48 -+ blt 1f -+ st3 {v0.h - v2.h}[0], [x0], #6 -+ st3 {v16.h - v18.h}[0], [x2], #6 -+ beq 11b -+ mov v0.h[0], v0.h[1] -+ sub w9, w9, #3 -+ mov v1.h[0], v1.h[1] -+ mov v16.h[0], v16.h[1] -+ mov v17.h[0], v17.h[1] -+1: -+ cmp w9, #2-48 -+ blt 1f -+ st2 {v0.h - v1.h}[0], [x0], #4 -+ st2 {v16.h - v17.h}[0], [x2], #4 -+ b 11b -+1: -+ st1 {v0.h}[0], [x0], #2 -+ st1 {v16.h}[0], [x2], #2 -+ b 11b - endfunc - - -- - //void ff_rpi_sand30_lines_to_planar_p010( - // uint8_t * dest, - // unsigned int dst_stride, - -From 7f8e8bb693607117f5f7bff2ee7ac7f841f3f726 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 30 Jun 2023 19:41:06 +0000 -Subject: [PATCH 139/186] sand_fns: Minor optimisations to aarch64 neon - ---- - libavutil/aarch64/rpi_sand_neon.S | 140 ++++++------------------------ - 1 file changed, 28 insertions(+), 112 deletions(-) - -diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S -index af7e2a88c44b..11658de0c8c2 100644 ---- a/libavutil/aarch64/rpi_sand_neon.S -+++ b/libavutil/aarch64/rpi_sand_neon.S -@@ -279,18 +279,37 @@ endfunc - // SAND30 -> 10bit - .macro USAND10 d0, d1, d2, a0, a1 - shrn \d2\().4h, \a0\().4s, #14 -- xtn \d0\().4h, \a0\().4s - shrn \d1\().4h, \a0\().4s, #10 - - shrn2 \d2\().8h, \a1\().4s, #14 -- xtn2 \d0\().8h, \a1\().4s - shrn2 \d1\().8h, \a1\().4s, #10 -+ uzp1 \d0\().8h, \a0\().8h, \a1\().8h - - ushr \d2\().8h, \d2\().8h, #6 - bic \d0\().8h, #0xfc, lsl #8 - bic \d1\().8h, #0xfc, lsl #8 - .endm - -+// SAND30 -> 8bit -+.macro USAND8 d0, d1, d2, a0, a1, a2, a3, t0, t1, t2 -+ shrn \d1\().4h, \a0\().4s, #12 -+ shrn2 \d1\().8h, \a1\().4s, #12 -+ uzp1 \d0\().8h, \a0\().8h, \a1\().8h -+ uzp2 \d2\().8h, \a0\().8h, \a1\().8h -+ -+ shrn \t1\().4h, \a2\().4s, #12 -+ shrn2 \t1\().8h, \a3\().4s, #12 -+ uzp1 \t0\().8h, \a2\().8h, \a3\().8h -+ uzp2 \t2\().8h, \a2\().8h, \a3\().8h -+ -+ shrn \d0\().8b, \d0\().8h, #2 -+ shrn2 \d0\().16b, \t0\().8h, #2 -+ shrn \d2\().8b, \d2\().8h, #6 -+ shrn2 \d2\().16b, \t2\().8h, #6 -+ uzp1 \d1\().16b, \d1\().16b, \t1\().16b -+.endm -+ -+ - // void ff_rpi_sand30_lines_to_planar_c16( - // uint8_t * dst_u, // [x0] - // unsigned int dst_stride_u, // [w1] -@@ -322,6 +341,7 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1 - 1: - ld1 {v0.4s-v3.4s}, [x5], #64 - ld1 {v4.4s-v7.4s}, [x5], x6 -+ subs w9, w9, #48 - - USAND10 v17, v16, v18, v0, v1 - USAND10 v20, v19, v21, v2, v3 -@@ -330,7 +350,6 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1 - USAND10 v26, v25, v27, v6, v7 - UZPH_C v4, v5, v6, v22, v23, v24, v25, v26, v27 - -- subs w9, w9, #48 - blt 2f - - st3 {v0.8h-v2.8h}, [x0], #48 -@@ -457,61 +476,10 @@ function ff_rpi_sand30_lines_to_planar_y16, export=1 - - subs w5, w5, #96 - -- // v0, v1 -- -- shrn v18.4h, v0.4s, #14 -- xtn v16.4h, v0.4s -- shrn v17.4h, v0.4s, #10 -- -- shrn2 v18.8h, v1.4s, #14 -- xtn2 v16.8h, v1.4s -- shrn2 v17.8h, v1.4s, #10 -- -- ushr v18.8h, v18.8h, #6 -- bic v16.8h, #0xfc, lsl #8 -- bic v17.8h, #0xfc, lsl #8 -- -- // v2, v3 -- -- shrn v21.4h, v2.4s, #14 -- xtn v19.4h, v2.4s -- shrn v20.4h, v2.4s, #10 -- -- shrn2 v21.8h, v3.4s, #14 -- xtn2 v19.8h, v3.4s -- shrn2 v20.8h, v3.4s, #10 -- -- ushr v21.8h, v21.8h, #6 -- bic v19.8h, #0xfc, lsl #8 -- bic v20.8h, #0xfc, lsl #8 -- -- // v4, v5 -- -- shrn v24.4h, v4.4s, #14 -- xtn v22.4h, v4.4s -- shrn v23.4h, v4.4s, #10 -- -- shrn2 v24.8h, v5.4s, #14 -- xtn2 v22.8h, v5.4s -- shrn2 v23.8h, v5.4s, #10 -- -- ushr v24.8h, v24.8h, #6 -- bic v22.8h, #0xfc, lsl #8 -- bic v23.8h, #0xfc, lsl #8 -- -- // v6, v7 -- -- shrn v27.4h, v6.4s, #14 -- xtn v25.4h, v6.4s -- shrn v26.4h, v6.4s, #10 -- -- shrn2 v27.8h, v7.4s, #14 -- xtn2 v25.8h, v7.4s -- shrn2 v26.8h, v7.4s, #10 -- -- ushr v27.8h, v27.8h, #6 -- bic v25.8h, #0xfc, lsl #8 -- bic v26.8h, #0xfc, lsl #8 -+ USAND10 v16, v17, v18, v0, v1 -+ USAND10 v19, v20, v21, v2, v3 -+ USAND10 v22, v23, v24, v4, v5 -+ USAND10 v25, v26, v27, v6, v7 - - blt 2f - -@@ -624,60 +592,8 @@ function ff_rpi_sand30_lines_to_planar_y8, export=1 - subs w5, w5, #96 - - // v0, v1 -- -- shrn v18.4h, v0.4s, #16 -- xtn v16.4h, v0.4s -- shrn v17.4h, v0.4s, #12 -- -- shrn2 v18.8h, v1.4s, #16 -- xtn2 v16.8h, v1.4s -- shrn2 v17.8h, v1.4s, #12 -- -- shrn v18.8b, v18.8h, #6 -- shrn v16.8b, v16.8h, #2 -- xtn v17.8b, v17.8h -- -- // v2, v3 -- -- shrn v21.4h, v2.4s, #16 -- xtn v19.4h, v2.4s -- shrn v20.4h, v2.4s, #12 -- -- shrn2 v21.8h, v3.4s, #16 -- xtn2 v19.8h, v3.4s -- shrn2 v20.8h, v3.4s, #12 -- -- shrn2 v18.16b, v21.8h, #6 -- shrn2 v16.16b, v19.8h, #2 -- xtn2 v17.16b, v20.8h -- -- // v4, v5 -- -- shrn v24.4h, v4.4s, #16 -- xtn v22.4h, v4.4s -- shrn v23.4h, v4.4s, #12 -- -- shrn2 v24.8h, v5.4s, #16 -- xtn2 v22.8h, v5.4s -- shrn2 v23.8h, v5.4s, #12 -- -- shrn v21.8b, v24.8h, #6 -- shrn v19.8b, v22.8h, #2 -- xtn v20.8b, v23.8h -- -- // v6, v7 -- -- shrn v27.4h, v6.4s, #16 -- xtn v25.4h, v6.4s -- shrn v26.4h, v6.4s, #12 -- -- shrn2 v27.8h, v7.4s, #16 -- xtn2 v25.8h, v7.4s -- shrn2 v26.8h, v7.4s, #12 -- -- shrn2 v21.16b, v27.8h, #6 -- shrn2 v19.16b, v25.8h, #2 -- xtn2 v20.16b, v26.8h -+ USAND8 v16, v17, v18, v0, v1, v2, v3, v22, v23, v24 -+ USAND8 v19, v20, v21, v4, v5, v6, v7, v22, v23, v24 - - blt 2f - - -From 700b43043a725509ef9cb6e1d51b28d1b96a6914 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Sat, 1 Jul 2023 18:43:32 +0000 -Subject: [PATCH 140/186] sand_fns: Add test for neon to sand30 fns so they can - be tested by checkasm - ---- - libavutil/rpi_sand_fns.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - -diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c -index b6071e2928f7..0626bb06cb1b 100644 ---- a/libavutil/rpi_sand_fns.c -+++ b/libavutil/rpi_sand_fns.c -@@ -35,10 +35,12 @@ Authors: John Cox - #include "frame.h" - - #if ARCH_ARM && HAVE_NEON --#include "arm/rpi_sand_neon.h" -+#include "libavutil/arm/cpu.h" -+#include "libavutil/arm/rpi_sand_neon.h" - #define HAVE_SAND_ASM 1 - #elif ARCH_AARCH64 && HAVE_NEON --#include "aarch64/rpi_sand_neon.h" -+#include "libavutil/aarch64/cpu.h" -+#include "libavutil/aarch64/rpi_sand_neon.h" - #define HAVE_SAND_ASM 1 - #else - #define HAVE_SAND_ASM 0 -@@ -97,7 +99,7 @@ void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride, - const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words - - #if HAVE_SAND_ASM -- if (_x == 0) { -+ if (_x == 0 && have_neon(av_get_cpu_flags())) { - ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); - return; - } -@@ -163,7 +165,7 @@ void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_ - const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words - - #if HAVE_SAND_ASM -- if (_x == 0) { -+ if (_x == 0 && have_neon(av_get_cpu_flags())) { - ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v, - src, stride1, stride2, _x, y, _w, h); - return; - -From a0a5898d3d19aaa5324e5c64e526c6bb7f39f62b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Sat, 1 Jul 2023 18:43:57 +0000 -Subject: [PATCH 141/186] checkasm: Add tests for rpi_sand sand30 fns - -Something of a kludge for function selection as, at the moment, the -rpi_sand fns don't have a jump table that we could use for selection. ---- - tests/checkasm/Makefile | 3 +- - tests/checkasm/checkasm.c | 3 + - tests/checkasm/checkasm.h | 1 + - tests/checkasm/rpi_sand.c | 118 ++++++++++++++++++++++++++++++++++++++ - tests/fate/checkasm.mak | 1 + - 5 files changed, 125 insertions(+), 1 deletion(-) - create mode 100644 tests/checkasm/rpi_sand.c - diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile -index a6f06c7007c2..66291baf3375 100644 +index ae324ced3f0c..3d1004f934f9 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile -@@ -59,8 +59,9 @@ CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS) - AVUTILOBJS += av_tx.o +@@ -73,8 +73,9 @@ AVUTILOBJS += av_tx.o AVUTILOBJS += fixed_dsp.o AVUTILOBJS += float_dsp.o + AVUTILOBJS += lls.o +AVUTILOBJS-$(CONFIG_SAND) += rpi_sand.o -CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) @@ -35635,12 +23439,12 @@ index a6f06c7007c2..66291baf3375 100644 CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c -index e96d84a7daef..57e0091b806a 100644 +index 73a998ae3a94..ed703c1956bd 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c -@@ -210,6 +210,9 @@ static const struct { - { "fixed_dsp", checkasm_check_fixed_dsp }, +@@ -290,6 +290,9 @@ static const struct { { "float_dsp", checkasm_check_float_dsp }, + { "lls", checkasm_check_lls }, { "av_tx", checkasm_check_av_tx }, + #if CONFIG_SAND + { "rpi_sand", checkasm_check_rpi_sand }, @@ -35649,23 +23453,23 @@ index e96d84a7daef..57e0091b806a 100644 { NULL } }; diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h -index 8744a81218a8..f4a0d20358c0 100644 +index 866eef01e98d..17584bf9c4e1 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h -@@ -73,6 +73,7 @@ void checkasm_check_motion(void); +@@ -114,6 +114,7 @@ void checkasm_check_mpegvideoencdsp(void); void checkasm_check_nlmeans(void); void checkasm_check_opusdsp(void); void checkasm_check_pixblockdsp(void); +void checkasm_check_rpi_sand(void); void checkasm_check_sbrdsp(void); - void checkasm_check_synth_filter(void); - void checkasm_check_sw_gbrp(void); + void checkasm_check_rv34dsp(void); + void checkasm_check_rv40dsp(void); diff --git a/tests/checkasm/rpi_sand.c b/tests/checkasm/rpi_sand.c new file mode 100644 -index 000000000000..0888714c4c5c +index 000000000000..dd9c1350bc2a --- /dev/null +++ b/tests/checkasm/rpi_sand.c -@@ -0,0 +1,118 @@ +@@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023 John Cox + * @@ -35697,6 +23501,10 @@ index 000000000000..0888714c4c5c +#elif ARCH_AARCH64 +#include "libavutil/aarch64/cpu.h" +#include "libavutil/aarch64/rpi_sand_neon.h" ++#else ++#define have_neon(flags) 0 ++#define ff_rpi_sand30_lines_to_planar_y16 NULL ++#define ff_rpi_sand30_lines_to_planar_c16 NULL +#endif + +static inline uint32_t pack30(unsigned int a, unsigned int b, unsigned int c) @@ -35785,4721 +23593,118 @@ index 000000000000..0888714c4c5c +} + diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak -index a4e95541f56c..6fda6d227e13 100644 +index d1396cb64161..7121d5461ef9 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak -@@ -27,6 +27,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \ - fate-checkasm-motion \ +@@ -36,6 +36,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp \ + fate-checkasm-mpegvideoencdsp \ fate-checkasm-opusdsp \ fate-checkasm-pixblockdsp \ + fate-checkasm-rpi_sand \ fate-checkasm-sbrdsp \ - fate-checkasm-synth_filter \ - fate-checkasm-sw_gbrp \ - -From 3d9471725138933bc137fff7ed85d2fcc277f2af Mon Sep 17 00:00:00 2001 -From: James Darnley -Date: Mon, 20 Feb 2023 20:55:08 +0100 -Subject: [PATCH 142/186] avfilter/bwdif: move filter_line init to a dedicated - function - -(cherry picked from commit b503b5a0cf80f38ecf4737c012b621b7e94f242a) ---- - libavfilter/bwdif.h | 3 ++- - libavfilter/vf_bwdif.c | 13 +++++++++---- - libavfilter/x86/vf_bwdif_init.c | 4 +--- - 3 files changed, 12 insertions(+), 8 deletions(-) - -diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h -index 889ff772edd8..5749345f784e 100644 ---- a/libavfilter/bwdif.h -+++ b/libavfilter/bwdif.h -@@ -37,6 +37,7 @@ typedef struct BWDIFContext { - int parity, int clip_max, int spat); - } BWDIFContext; - --void ff_bwdif_init_x86(BWDIFContext *bwdif); -+void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth); -+void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth); - - #endif /* AVFILTER_BWDIF_H */ -diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c -index 09e68523bbfa..539fabbd4686 100644 ---- a/libavfilter/vf_bwdif.c -+++ b/libavfilter/vf_bwdif.c -@@ -341,7 +341,14 @@ static int config_props(AVFilterLink *link) - - yadif->csp = av_pix_fmt_desc_get(link->format); - yadif->filter = filter; -- if (yadif->csp->comp[0].depth > 8) { -+ ff_bwdif_init_filter_line(s, yadif->csp->comp[0].depth); -+ -+ return 0; -+} -+ -+av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth) -+{ -+ if (bit_depth > 8) { - s->filter_intra = filter_intra_16bit; - s->filter_line = filter_line_c_16bit; - s->filter_edge = filter_edge_16bit; -@@ -352,10 +359,8 @@ static int config_props(AVFilterLink *link) - } - - #if ARCH_X86 -- ff_bwdif_init_x86(s); -+ ff_bwdif_init_x86(s, bit_depth); - #endif -- -- return 0; - } - - -diff --git a/libavfilter/x86/vf_bwdif_init.c b/libavfilter/x86/vf_bwdif_init.c -index e24e5cd9b1c5..ba7bc40c3d30 100644 ---- a/libavfilter/x86/vf_bwdif_init.c -+++ b/libavfilter/x86/vf_bwdif_init.c -@@ -42,11 +42,9 @@ void ff_bwdif_filter_line_12bit_ssse3(void *dst, void *prev, void *cur, void *ne - int mrefs2, int prefs3, int mrefs3, int prefs4, - int mrefs4, int parity, int clip_max); - --av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif) -+av_cold void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth) - { -- YADIFContext *yadif = &bwdif->yadif; - int cpu_flags = av_get_cpu_flags(); -- int bit_depth = (!yadif->csp) ? 8 : yadif->csp->comp[0].depth; - - if (bit_depth <= 8) { - if (EXTERNAL_SSE2(cpu_flags)) - -From be14915902bc8d4a9aff0776dc976a17f0a27215 Mon Sep 17 00:00:00 2001 -From: James Darnley -Date: Mon, 20 Feb 2023 20:55:08 +0100 -Subject: [PATCH 143/186] checkasm: add test for bwdif - -(cherry picked from commit 087faf8cac51e5e20a5f41b36b8d4c2705a10039) ---- - tests/checkasm/Makefile | 1 + - tests/checkasm/checkasm.c | 3 ++ - tests/checkasm/checkasm.h | 1 + - tests/checkasm/vf_bwdif.c | 84 +++++++++++++++++++++++++++++++++++++++ - tests/fate/checkasm.mak | 1 + - 5 files changed, 90 insertions(+) - create mode 100644 tests/checkasm/vf_bwdif.c - -diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile -index 66291baf3375..2c80d8e66116 100644 ---- a/tests/checkasm/Makefile -+++ b/tests/checkasm/Makefile -@@ -40,6 +40,7 @@ CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes) - # libavfilter tests - AVFILTEROBJS-$(CONFIG_AFIR_FILTER) += af_afir.o - AVFILTEROBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o -+AVFILTEROBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o - AVFILTEROBJS-$(CONFIG_COLORSPACE_FILTER) += vf_colorspace.o - AVFILTEROBJS-$(CONFIG_EQ_FILTER) += vf_eq.o - AVFILTEROBJS-$(CONFIG_GBLUR_FILTER) += vf_gblur.o -diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c -index 57e0091b806a..4f983d7fbc9a 100644 ---- a/tests/checkasm/checkasm.c -+++ b/tests/checkasm/checkasm.c -@@ -179,6 +179,9 @@ static const struct { - #if CONFIG_BLEND_FILTER - { "vf_blend", checkasm_check_blend }, - #endif -+ #if CONFIG_BWDIF_FILTER -+ { "vf_bwdif", checkasm_check_vf_bwdif }, -+ #endif - #if CONFIG_COLORSPACE_FILTER - { "vf_colorspace", checkasm_check_colorspace }, - #endif -diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h -index f4a0d20358c0..d69bc43999f5 100644 ---- a/tests/checkasm/checkasm.h -+++ b/tests/checkasm/checkasm.h -@@ -83,6 +83,7 @@ void checkasm_check_utvideodsp(void); - void checkasm_check_v210dec(void); - void checkasm_check_v210enc(void); - void checkasm_check_vc1dsp(void); -+void checkasm_check_vf_bwdif(void); - void checkasm_check_vf_eq(void); - void checkasm_check_vf_gblur(void); - void checkasm_check_vf_hflip(void); -diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c -new file mode 100644 -index 000000000000..46224bb57572 ---- /dev/null -+++ b/tests/checkasm/vf_bwdif.c -@@ -0,0 +1,84 @@ -+/* -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with FFmpeg; if not, write to the Free Software Foundation, Inc., -+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -+ */ -+ -+#include -+#include "checkasm.h" -+#include "libavcodec/internal.h" -+#include "libavfilter/bwdif.h" -+ -+#define WIDTH 256 -+ -+#define randomize_buffers(buf0, buf1, mask, count) \ -+ for (size_t i = 0; i < count; i++) \ -+ buf0[i] = buf1[i] = rnd() & mask -+ -+#define BODY(type, depth) \ -+ do { \ -+ type prev0[9*WIDTH], prev1[9*WIDTH]; \ -+ type next0[9*WIDTH], next1[9*WIDTH]; \ -+ type cur0[9*WIDTH], cur1[9*WIDTH]; \ -+ type dst0[WIDTH], dst1[WIDTH]; \ -+ const int stride = WIDTH; \ -+ const int mask = (1< -Date: Thu, 6 Jul 2023 13:56:18 +0000 -Subject: [PATCH 144/186] Revert "vf_bwdif: Add attributes to ask for - vectorization" - -This reverts commit 281250290ba5c2dcd8676e9a261050e65c10bcb7. -Will be replaced by hand coded asm as on upstream ---- - libavfilter/vf_bwdif.c | 29 ++++++++++++++--------------- - 1 file changed, 14 insertions(+), 15 deletions(-) - -diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c -index 539fabbd4686..34e8c5e234ee 100644 ---- a/libavfilter/vf_bwdif.c -+++ b/libavfilter/vf_bwdif.c -@@ -74,10 +74,10 @@ typedef struct ThreadData { - int temporal_diff1 =(FFABS(prev[mrefs] - c) + FFABS(prev[prefs] - e)) >> 1; \ - int temporal_diff2 =(FFABS(next[mrefs] - c) + FFABS(next[prefs] - e)) >> 1; \ - int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); \ -- {/*\ -+ \ - if (!diff) { \ - dst[0] = d; \ -- } else {*/ -+ } else { - - #define SPAT_CHECK() \ - int b = ((prev2[mrefs2] + next2[mrefs2]) >> 1) - c; \ -@@ -89,16 +89,15 @@ typedef struct ThreadData { - diff = FFMAX3(diff, min, -max); - - #define FILTER_LINE() \ -- int i1, i2; \ - SPAT_CHECK() \ -- /*if (FFABS(c - e) > temporal_diff0)*/ { \ -- i1 = (((coef_hf[0] * (prev2[0] + next2[0]) \ -+ if (FFABS(c - e) > temporal_diff0) { \ -+ interpol = (((coef_hf[0] * (prev2[0] + next2[0]) \ - - coef_hf[1] * (prev2[mrefs2] + next2[mrefs2] + prev2[prefs2] + next2[prefs2]) \ - + coef_hf[2] * (prev2[mrefs4] + next2[mrefs4] + prev2[prefs4] + next2[prefs4])) >> 2) \ - + coef_lf[0] * (c + e) - coef_lf[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ -- } /*else*/ { \ -- i2 = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ -- }interpol = FFABS(c - e) > temporal_diff0 ? i1:i2;\ -+ } else { \ -+ interpol = (coef_sp[0] * (c + e) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; \ -+ } - - #define FILTER_EDGE() \ - if (spat) { \ -@@ -112,7 +111,7 @@ typedef struct ThreadData { - else if (interpol < d - diff) \ - interpol = d - diff; \ - \ -- dst[0] = !diff ? d : av_clip(interpol, 0, clip_max); \ -+ dst[0] = av_clip(interpol, 0, clip_max); \ - } \ - \ - dst++; \ -@@ -123,7 +122,7 @@ typedef struct ThreadData { - next2++; \ - } - --static void __attribute__((optimize("tree-vectorize"))) filter_intra(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs, -+static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max) - { - uint8_t *dst = dst1; -@@ -133,7 +132,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_intra(void *restr - FILTER_INTRA() - } - --static void __attribute__((optimize("tree-vectorize"))) filter_line_c(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, -+static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int prefs3, int mrefs3, int prefs4, int mrefs4, - int parity, int clip_max) -@@ -151,7 +150,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_line_c(void *rest - FILTER2() - } - --static void __attribute__((optimize("tree-vectorize"))) filter_edge(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, -+static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int parity, int clip_max, int spat) - { -@@ -168,7 +167,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_edge(void *restri - FILTER2() - } - --static void __attribute__((optimize("tree-vectorize"))) filter_intra_16bit(void *restrict dst1, void *restrict cur1, int w, int prefs, int mrefs, -+static void filter_intra_16bit(void *dst1, void *cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max) - { - uint16_t *dst = dst1; -@@ -178,7 +177,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_intra_16bit(void - FILTER_INTRA() - } - --static void __attribute__((optimize("tree-vectorize"))) filter_line_c_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, -+static void filter_line_c_16bit(void *dst1, void *prev1, void *cur1, void *next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int prefs3, int mrefs3, int prefs4, int mrefs4, - int parity, int clip_max) -@@ -196,7 +195,7 @@ static void __attribute__((optimize("tree-vectorize"))) filter_line_c_16bit(void - FILTER2() - } - --static void __attribute__((optimize("tree-vectorize"))) filter_edge_16bit(void *restrict dst1, void *restrict prev1, void *restrict cur1, void *restrict next1, -+static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int parity, int clip_max, int spat) - { - -From 6c3566cf92cba9e2ccd87b53ac7f00f0ea431fb2 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 4 Jul 2023 14:04:39 +0000 -Subject: [PATCH 145/186] tests/checkasm: Add test for vf_bwdif filter_intra -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: John Cox -Signed-off-by: Martin Storsjö -(cherry picked from commit 7caa8d6b91e738ad2c1ea61746b6c062c470f7d3) ---- - tests/checkasm/vf_bwdif.c | 37 +++++++++++++++++++++++++++++++++++++ - 1 file changed, 37 insertions(+) - -diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c -index 46224bb57572..034bbabb4c5c 100644 ---- a/tests/checkasm/vf_bwdif.c -+++ b/tests/checkasm/vf_bwdif.c -@@ -20,6 +20,7 @@ - #include "checkasm.h" - #include "libavcodec/internal.h" - #include "libavfilter/bwdif.h" -+#include "libavutil/mem_internal.h" - - #define WIDTH 256 - -@@ -81,4 +82,40 @@ void checkasm_check_vf_bwdif(void) - BODY(uint16_t, 10); - report("bwdif10"); - } -+ -+ if (check_func(ctx_8.filter_intra, "bwdif8.intra")) { -+ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); -+ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); -+ const int stride = WIDTH; -+ const int mask = (1<<8)-1; -+ -+ declare_func(void, void *dst1, void *cur1, int w, int prefs, int mrefs, -+ int prefs3, int mrefs3, int parity, int clip_max); -+ -+ randomize_buffers( cur0, cur1, mask, 11*WIDTH); -+ memset(dst0, 0xba, WIDTH * 3); -+ memset(dst1, 0xba, WIDTH * 3); -+ -+ call_ref(dst0 + stride, -+ cur0 + stride * 4, WIDTH, -+ stride, -stride, stride * 3, -stride * 3, -+ 0, mask); -+ call_new(dst1 + stride, -+ cur0 + stride * 4, WIDTH, -+ stride, -stride, stride * 3, -stride * 3, -+ 0, mask); -+ -+ if (memcmp(dst0, dst1, WIDTH*3) -+ || memcmp( cur0, cur1, WIDTH*11)) -+ fail(); -+ -+ bench_new(dst1 + stride, -+ cur0 + stride * 4, WIDTH, -+ stride, -stride, stride * 3, -stride * 3, -+ 0, mask); -+ -+ report("bwdif8.intra"); -+ } - } - -From 652e80de533059d24e136242324d4129a545a158 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 4 Jul 2023 14:04:40 +0000 -Subject: [PATCH 146/186] avfilter/vf_bwdif: Add neon for filter_intra -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Adds an outline for aarch neon functions -Adds common macros and consts for aarch64 neon -Exports C filter_intra needed for tail fixup of neon code -Adds neon for filter_intra - -Signed-off-by: John Cox -Signed-off-by: Martin Storsjö -(cherry picked from commit 5075cfb4e6a21f6b4da9e62bdb0bad4cb32a4673) ---- - libavfilter/aarch64/Makefile | 2 + - libavfilter/aarch64/vf_bwdif_init_aarch64.c | 56 ++++++++ - libavfilter/aarch64/vf_bwdif_neon.S | 136 ++++++++++++++++++++ - libavfilter/bwdif.h | 4 + - libavfilter/vf_bwdif.c | 8 +- - 5 files changed, 203 insertions(+), 3 deletions(-) - create mode 100644 libavfilter/aarch64/vf_bwdif_init_aarch64.c - create mode 100644 libavfilter/aarch64/vf_bwdif_neon.S - -diff --git a/libavfilter/aarch64/Makefile b/libavfilter/aarch64/Makefile -index b58daa3a3fea..b68209bc94a6 100644 ---- a/libavfilter/aarch64/Makefile -+++ b/libavfilter/aarch64/Makefile -@@ -1,3 +1,5 @@ -+OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_init_aarch64.o - OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_init.o - -+NEON-OBJS-$(CONFIG_BWDIF_FILTER) += aarch64/vf_bwdif_neon.o - NEON-OBJS-$(CONFIG_NLMEANS_FILTER) += aarch64/vf_nlmeans_neon.o -diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c -new file mode 100644 -index 000000000000..3ffaa07ab369 ---- /dev/null -+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c -@@ -0,0 +1,56 @@ -+/* -+ * bwdif aarch64 NEON optimisations -+ * -+ * Copyright (c) 2023 John Cox -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "libavutil/common.h" -+#include "libavfilter/bwdif.h" -+#include "libavutil/aarch64/cpu.h" -+ -+void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs, -+ int prefs3, int mrefs3, int parity, int clip_max); -+ -+ -+static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs, -+ int prefs3, int mrefs3, int parity, int clip_max) -+{ -+ const int w0 = clip_max != 255 ? 0 : w & ~15; -+ -+ ff_bwdif_filter_intra_neon(dst1, cur1, w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max); -+ -+ if (w0 < w) -+ ff_bwdif_filter_intra_c((char *)dst1 + w0, (char *)cur1 + w0, -+ w - w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max); -+} -+ -+void -+ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth) -+{ -+ const int cpu_flags = av_get_cpu_flags(); -+ -+ if (bit_depth != 8) -+ return; -+ -+ if (!have_neon(cpu_flags)) -+ return; -+ -+ s->filter_intra = filter_intra_helper; -+} -+ -diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S -new file mode 100644 -index 000000000000..e288efbe6c33 ---- /dev/null -+++ b/libavfilter/aarch64/vf_bwdif_neon.S -@@ -0,0 +1,136 @@ -+/* -+ * bwdif aarch64 NEON optimisations -+ * -+ * Copyright (c) 2023 John Cox -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+ -+#include "libavutil/aarch64/asm.S" -+ -+// Space taken on the stack by an int (32-bit) -+#ifdef __APPLE__ -+.set SP_INT, 4 -+#else -+.set SP_INT, 8 -+#endif -+ -+.macro SQSHRUNN b, s0, s1, s2, s3, n -+ sqshrun \s0\().4h, \s0\().4s, #\n - 8 -+ sqshrun2 \s0\().8h, \s1\().4s, #\n - 8 -+ sqshrun \s1\().4h, \s2\().4s, #\n - 8 -+ sqshrun2 \s1\().8h, \s3\().4s, #\n - 8 -+ uzp2 \b\().16b, \s0\().16b, \s1\().16b -+.endm -+ -+.macro SMULL4K a0, a1, a2, a3, s0, s1, k -+ smull \a0\().4s, \s0\().4h, \k -+ smull2 \a1\().4s, \s0\().8h, \k -+ smull \a2\().4s, \s1\().4h, \k -+ smull2 \a3\().4s, \s1\().8h, \k -+.endm -+ -+.macro UMULL4K a0, a1, a2, a3, s0, s1, k -+ umull \a0\().4s, \s0\().4h, \k -+ umull2 \a1\().4s, \s0\().8h, \k -+ umull \a2\().4s, \s1\().4h, \k -+ umull2 \a3\().4s, \s1\().8h, \k -+.endm -+ -+.macro UMLAL4K a0, a1, a2, a3, s0, s1, k -+ umlal \a0\().4s, \s0\().4h, \k -+ umlal2 \a1\().4s, \s0\().8h, \k -+ umlal \a2\().4s, \s1\().4h, \k -+ umlal2 \a3\().4s, \s1\().8h, \k -+.endm -+ -+.macro UMLSL4K a0, a1, a2, a3, s0, s1, k -+ umlsl \a0\().4s, \s0\().4h, \k -+ umlsl2 \a1\().4s, \s0\().8h, \k -+ umlsl \a2\().4s, \s1\().4h, \k -+ umlsl2 \a3\().4s, \s1\().8h, \k -+.endm -+ -+.macro LDR_COEFFS d, t0 -+ movrel \t0, coeffs, 0 -+ ld1 {\d\().8h}, [\t0] -+.endm -+ -+// static const uint16_t coef_lf[2] = { 4309, 213 }; -+// static const uint16_t coef_hf[3] = { 5570, 3801, 1016 }; -+// static const uint16_t coef_sp[2] = { 5077, 981 }; -+ -+const coeffs, align=4 // align 4 means align on 2^4 boundry -+ .hword 4309 * 4, 213 * 4 // lf[0]*4 = v0.h[0] -+ .hword 5570, 3801, 1016, -3801 // hf[0] = v0.h[2], -hf[1] = v0.h[5] -+ .hword 5077, 981 // sp[0] = v0.h[6] -+endconst -+ -+// ============================================================================ -+// -+// void ff_bwdif_filter_intra_neon( -+// void *dst1, // x0 -+// void *cur1, // x1 -+// int w, // w2 -+// int prefs, // w3 -+// int mrefs, // w4 -+// int prefs3, // w5 -+// int mrefs3, // w6 -+// int parity, // w7 unused -+// int clip_max) // [sp, #0] unused -+ -+function ff_bwdif_filter_intra_neon, export=1 -+ cmp w2, #0 -+ ble 99f -+ -+ LDR_COEFFS v0, x17 -+ -+// for (x = 0; x < w; x++) { -+10: -+ -+// interpol = (coef_sp[0] * (cur[mrefs] + cur[prefs]) - coef_sp[1] * (cur[mrefs3] + cur[prefs3])) >> 13; -+ ldr q31, [x1, w4, sxtw] -+ ldr q30, [x1, w3, sxtw] -+ ldr q29, [x1, w6, sxtw] -+ ldr q28, [x1, w5, sxtw] -+ -+ uaddl v20.8h, v31.8b, v30.8b -+ uaddl2 v21.8h, v31.16b, v30.16b -+ -+ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[6] -+ -+ uaddl v20.8h, v29.8b, v28.8b -+ uaddl2 v21.8h, v29.16b, v28.16b -+ -+ UMLSL4K v2, v3, v4, v5, v20, v21, v0.h[7] -+ -+// dst[0] = av_clip(interpol, 0, clip_max); -+ SQSHRUNN v2, v2, v3, v4, v5, 13 -+ str q2, [x0], #16 -+ -+// dst++; -+// cur++; -+// } -+ -+ subs w2, w2, #16 -+ add x1, x1, #16 -+ bgt 10b -+ -+99: -+ ret -+endfunc -diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h -index 5749345f784e..ae6f6ce2233a 100644 ---- a/libavfilter/bwdif.h -+++ b/libavfilter/bwdif.h -@@ -39,5 +39,9 @@ typedef struct BWDIFContext { - - void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth); - void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth); -+void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth); -+ -+void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs, -+ int prefs3, int mrefs3, int parity, int clip_max); - - #endif /* AVFILTER_BWDIF_H */ -diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c -index 34e8c5e234ee..6ec8bbab5d72 100644 ---- a/libavfilter/vf_bwdif.c -+++ b/libavfilter/vf_bwdif.c -@@ -122,8 +122,8 @@ typedef struct ThreadData { - next2++; \ - } - --static void filter_intra(void *dst1, void *cur1, int w, int prefs, int mrefs, -- int prefs3, int mrefs3, int parity, int clip_max) -+void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs, -+ int prefs3, int mrefs3, int parity, int clip_max) - { - uint8_t *dst = dst1; - uint8_t *cur = cur1; -@@ -352,13 +352,15 @@ av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth) - s->filter_line = filter_line_c_16bit; - s->filter_edge = filter_edge_16bit; - } else { -- s->filter_intra = filter_intra; -+ s->filter_intra = ff_bwdif_filter_intra_c; - s->filter_line = filter_line_c; - s->filter_edge = filter_edge; - } - - #if ARCH_X86 - ff_bwdif_init_x86(s, bit_depth); -+#elif ARCH_AARCH64 -+ ff_bwdif_init_aarch64(s, bit_depth); - #endif - } - - -From 981053a48f80de6c0be0c3975f7aa877aacc842e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 4 Jul 2023 14:04:41 +0000 -Subject: [PATCH 147/186] tests/checkasm: Add test for vf_bwdif filter_edge -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: John Cox -Signed-off-by: Martin Storsjö -(cherry picked from commit 7ed7c00f55a50ac88589f9e17c172d4a4fce0581) ---- - tests/checkasm/vf_bwdif.c | 54 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 54 insertions(+) - -diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c -index 034bbabb4c5c..5fdba09fdc73 100644 ---- a/tests/checkasm/vf_bwdif.c -+++ b/tests/checkasm/vf_bwdif.c -@@ -83,6 +83,60 @@ void checkasm_check_vf_bwdif(void) - report("bwdif10"); - } - -+ { -+ LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); -+ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); -+ const int stride = WIDTH; -+ const int mask = (1<<8)-1; -+ int spat; -+ int parity; -+ -+ for (spat = 0; spat != 2; ++spat) { -+ for (parity = 0; parity != 2; ++parity) { -+ if (check_func(ctx_8.filter_edge, "bwdif8.edge.s%d.p%d", spat, parity)) { -+ -+ declare_func(void, void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int parity, int clip_max, int spat); -+ -+ randomize_buffers(prev0, prev1, mask, 11*WIDTH); -+ randomize_buffers(next0, next1, mask, 11*WIDTH); -+ randomize_buffers( cur0, cur1, mask, 11*WIDTH); -+ memset(dst0, 0xba, WIDTH * 3); -+ memset(dst1, 0xba, WIDTH * 3); -+ -+ call_ref(dst0 + stride, -+ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, WIDTH, -+ stride, -stride, stride * 2, -stride * 2, -+ parity, mask, spat); -+ call_new(dst1 + stride, -+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH, -+ stride, -stride, stride * 2, -stride * 2, -+ parity, mask, spat); -+ -+ if (memcmp(dst0, dst1, WIDTH*3) -+ || memcmp(prev0, prev1, WIDTH*11) -+ || memcmp(next0, next1, WIDTH*11) -+ || memcmp( cur0, cur1, WIDTH*11)) -+ fail(); -+ -+ bench_new(dst1 + stride, -+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, WIDTH, -+ stride, -stride, stride * 2, -stride * 2, -+ parity, mask, spat); -+ } -+ } -+ } -+ -+ report("bwdif8.edge"); -+ } -+ - if (check_func(ctx_8.filter_intra, "bwdif8.intra")) { - LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); - LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); - -From c48fed1cb66414179201cbac1ef6fb689cb4426a Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 4 Jul 2023 14:04:42 +0000 -Subject: [PATCH 148/186] avfilter/vf_bwdif: Add neon for filter_edge -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Adds clip and spatial macros for aarch64 neon -Exports C filter_edge needed for tail fixup of neon code -Adds neon for filter_edge - -Signed-off-by: John Cox -Signed-off-by: Martin Storsjö -(cherry picked from commit 8130df83e0fbd3264fe990fb4e084ecbd452d0b1) ---- - libavfilter/aarch64/vf_bwdif_init_aarch64.c | 20 +++ - libavfilter/aarch64/vf_bwdif_neon.S | 177 ++++++++++++++++++++ - libavfilter/bwdif.h | 4 + - libavfilter/vf_bwdif.c | 8 +- - 4 files changed, 205 insertions(+), 4 deletions(-) - -diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c -index 3ffaa07ab369..e75cf2f20459 100644 ---- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c -+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c -@@ -24,10 +24,29 @@ - #include "libavfilter/bwdif.h" - #include "libavutil/aarch64/cpu.h" - -+void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int parity, int clip_max, int spat); -+ - void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max); - - -+static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int parity, int clip_max, int spat) -+{ -+ const int w0 = clip_max != 255 ? 0 : w & ~15; -+ -+ ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, prefs2, mrefs2, -+ parity, clip_max, spat); -+ -+ if (w0 < w) -+ ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0, -+ w - w0, prefs, mrefs, prefs2, mrefs2, -+ parity, clip_max, spat); -+} -+ - static void filter_intra_helper(void *dst1, void *cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max) - { -@@ -52,5 +71,6 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth) - return; - - s->filter_intra = filter_intra_helper; -+ s->filter_edge = filter_edge_helper; - } - -diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S -index e288efbe6c33..389302b813aa 100644 ---- a/libavfilter/aarch64/vf_bwdif_neon.S -+++ b/libavfilter/aarch64/vf_bwdif_neon.S -@@ -66,6 +66,79 @@ - umlsl2 \a3\().4s, \s1\().8h, \k - .endm - -+// int b = m2s1 - m1; -+// int f = p2s1 - p1; -+// int dc = c0s1 - m1; -+// int de = c0s1 - p1; -+// int sp_max = FFMIN(p1 - c0s1, m1 - c0s1); -+// sp_max = FFMIN(sp_max, FFMAX(-b,-f)); -+// int sp_min = FFMIN(c0s1 - p1, c0s1 - m1); -+// sp_min = FFMIN(sp_min, FFMAX(b,f)); -+// diff = diff == 0 ? 0 : FFMAX3(diff, sp_min, sp_max); -+.macro SPAT_CHECK diff, m2s1, m1, c0s1, p1, p2s1, t0, t1, t2, t3 -+ uqsub \t0\().16b, \p1\().16b, \c0s1\().16b -+ uqsub \t2\().16b, \m1\().16b, \c0s1\().16b -+ umin \t2\().16b, \t0\().16b, \t2\().16b -+ -+ uqsub \t1\().16b, \m1\().16b, \m2s1\().16b -+ uqsub \t3\().16b, \p1\().16b, \p2s1\().16b -+ umax \t3\().16b, \t3\().16b, \t1\().16b -+ umin \t3\().16b, \t3\().16b, \t2\().16b -+ -+ uqsub \t0\().16b, \c0s1\().16b, \p1\().16b -+ uqsub \t2\().16b, \c0s1\().16b, \m1\().16b -+ umin \t2\().16b, \t0\().16b, \t2\().16b -+ -+ uqsub \t1\().16b, \m2s1\().16b, \m1\().16b -+ uqsub \t0\().16b, \p2s1\().16b, \p1\().16b -+ umax \t0\().16b, \t0\().16b, \t1\().16b -+ umin \t2\().16b, \t2\().16b, \t0\().16b -+ -+ cmeq \t1\().16b, \diff\().16b, #0 -+ umax \diff\().16b, \diff\().16b, \t3\().16b -+ umax \diff\().16b, \diff\().16b, \t2\().16b -+ bic \diff\().16b, \diff\().16b, \t1\().16b -+.endm -+ -+// i0 = s0; -+// if (i0 > d0 + diff0) -+// i0 = d0 + diff0; -+// else if (i0 < d0 - diff0) -+// i0 = d0 - diff0; -+// -+// i0 = s0 is safe -+.macro DIFF_CLIP i0, s0, d0, diff, t0, t1 -+ uqadd \t0\().16b, \d0\().16b, \diff\().16b -+ uqsub \t1\().16b, \d0\().16b, \diff\().16b -+ umin \i0\().16b, \s0\().16b, \t0\().16b -+ umax \i0\().16b, \i0\().16b, \t1\().16b -+.endm -+ -+// i0 = FFABS(m1 - p1) > td0 ? i1 : i2; -+// DIFF_CLIP -+// -+// i0 = i1 is safe -+.macro INTERPOL i0, i1, i2, m1, d0, p1, td0, diff, t0, t1, t2 -+ uabd \t0\().16b, \m1\().16b, \p1\().16b -+ cmhi \t0\().16b, \t0\().16b, \td0\().16b -+ bsl \t0\().16b, \i1\().16b, \i2\().16b -+ DIFF_CLIP \i0, \t0, \d0, \diff, \t1, \t2 -+.endm -+ -+.macro PUSH_VREGS -+ stp d8, d9, [sp, #-64]! -+ stp d10, d11, [sp, #16] -+ stp d12, d13, [sp, #32] -+ stp d14, d15, [sp, #48] -+.endm -+ -+.macro POP_VREGS -+ ldp d14, d15, [sp, #48] -+ ldp d12, d13, [sp, #32] -+ ldp d10, d11, [sp, #16] -+ ldp d8, d9, [sp], #64 -+.endm -+ - .macro LDR_COEFFS d, t0 - movrel \t0, coeffs, 0 - ld1 {\d\().8h}, [\t0] -@@ -81,6 +154,110 @@ const coeffs, align=4 // align 4 means align on 2^4 boundry - .hword 5077, 981 // sp[0] = v0.h[6] - endconst - -+// ============================================================================ -+// -+// void ff_bwdif_filter_edge_neon( -+// void *dst1, // x0 -+// void *prev1, // x1 -+// void *cur1, // x2 -+// void *next1, // x3 -+// int w, // w4 -+// int prefs, // w5 -+// int mrefs, // w6 -+// int prefs2, // w7 -+// int mrefs2, // [sp, #0] -+// int parity, // [sp, #SP_INT] -+// int clip_max, // [sp, #SP_INT*2] unused -+// int spat); // [sp, #SP_INT*3] -+ -+function ff_bwdif_filter_edge_neon, export=1 -+ // Sanity check w -+ cmp w4, #0 -+ ble 99f -+ -+// #define prev2 cur -+// const uint8_t * restrict next2 = parity ? prev : next; -+ -+ ldr w8, [sp, #0] // mrefs2 -+ -+ ldr w17, [sp, #SP_INT] // parity -+ ldr w16, [sp, #SP_INT*3] // spat -+ cmp w17, #0 -+ csel x17, x1, x3, ne -+ -+// for (x = 0; x < w; x++) { -+ -+10: -+// int m1 = cur[mrefs]; -+// int d = (prev2[0] + next2[0]) >> 1; -+// int p1 = cur[prefs]; -+// int temporal_diff0 = FFABS(prev2[0] - next2[0]); -+// int temporal_diff1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; -+// int temporal_diff2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; -+// int diff = FFMAX3(temporal_diff0 >> 1, temporal_diff1, temporal_diff2); -+ ldr q31, [x2] -+ ldr q21, [x17] -+ uhadd v16.16b, v31.16b, v21.16b // d0 = v16 -+ uabd v17.16b, v31.16b, v21.16b // td0 = v17 -+ ldr q24, [x2, w6, sxtw] // m1 = v24 -+ ldr q22, [x2, w5, sxtw] // p1 = v22 -+ -+ ldr q0, [x1, w6, sxtw] // prev[mrefs] -+ ldr q2, [x1, w5, sxtw] // prev[prefs] -+ ldr q1, [x3, w6, sxtw] // next[mrefs] -+ ldr q3, [x3, w5, sxtw] // next[prefs] -+ -+ ushr v29.16b, v17.16b, #1 -+ -+ uabd v31.16b, v0.16b, v24.16b -+ uabd v30.16b, v2.16b, v22.16b -+ uhadd v0.16b, v31.16b, v30.16b // td1 = q0 -+ -+ uabd v31.16b, v1.16b, v24.16b -+ uabd v30.16b, v3.16b, v22.16b -+ uhadd v1.16b, v31.16b, v30.16b // td2 = q1 -+ -+ umax v0.16b, v0.16b, v29.16b -+ umax v0.16b, v0.16b, v1.16b // diff = v0 -+ -+// if (spat) { -+// SPAT_CHECK() -+// } -+// i0 = (m1 + p1) >> 1; -+ cbz w16, 1f -+ -+ ldr q31, [x2, w8, sxtw] -+ ldr q18, [x17, w8, sxtw] -+ ldr q30, [x2, w7, sxtw] -+ ldr q19, [x17, w7, sxtw] -+ uhadd v18.16b, v18.16b, v31.16b -+ uhadd v19.16b, v19.16b, v30.16b -+ -+ SPAT_CHECK v0, v18, v24, v16, v22, v19, v31, v30, v29, v28 -+ -+1: -+ uhadd v2.16b, v22.16b, v24.16b -+ -+ // i0 = v2, s0 = v2, d0 = v16, diff = v0, t0 = v31, t1 = v30 -+ DIFF_CLIP v2, v2, v16, v0, v31, v30 -+ -+// dst[0] = av_clip(interpol, 0, clip_max); -+ str q2, [x0], #16 -+ -+// dst++; -+// cur++; -+// } -+ subs w4, w4, #16 -+ add x1, x1, #16 -+ add x2, x2, #16 -+ add x3, x3, #16 -+ add x17, x17, #16 -+ bgt 10b -+ -+99: -+ ret -+endfunc -+ - // ============================================================================ - // - // void ff_bwdif_filter_intra_neon( -diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h -index ae6f6ce2233a..ae1616d366cb 100644 ---- a/libavfilter/bwdif.h -+++ b/libavfilter/bwdif.h -@@ -41,6 +41,10 @@ void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth); - void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth); - void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth); - -+void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int parity, int clip_max, int spat); -+ - void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max); - -diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c -index 6ec8bbab5d72..688c2d257229 100644 ---- a/libavfilter/vf_bwdif.c -+++ b/libavfilter/vf_bwdif.c -@@ -150,9 +150,9 @@ static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, - FILTER2() - } - --static void filter_edge(void *dst1, void *prev1, void *cur1, void *next1, -- int w, int prefs, int mrefs, int prefs2, int mrefs2, -- int parity, int clip_max, int spat) -+void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int parity, int clip_max, int spat) - { - uint8_t *dst = dst1; - uint8_t *prev = prev1; -@@ -354,7 +354,7 @@ av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth) - } else { - s->filter_intra = ff_bwdif_filter_intra_c; - s->filter_line = filter_line_c; -- s->filter_edge = filter_edge; -+ s->filter_edge = ff_bwdif_filter_edge_c; - } - - #if ARCH_X86 - -From eb72477591f765f10373e64dd57745ebe908d01a Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 4 Jul 2023 14:04:43 +0000 -Subject: [PATCH 149/186] avfilter/vf_bwdif: Add neon for filter_line -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Exports C filter_line needed for tail fixup of neon code -Adds neon for filter_line - -Signed-off-by: John Cox -Signed-off-by: Martin Storsjö -(cherry picked from commit 94cb94a2c0910d364a7181fc5cc0e9556b777d0a) ---- - libavfilter/aarch64/vf_bwdif_init_aarch64.c | 21 ++ - libavfilter/aarch64/vf_bwdif_neon.S | 203 ++++++++++++++++++++ - libavfilter/bwdif.h | 5 + - libavfilter/vf_bwdif.c | 10 +- - 4 files changed, 234 insertions(+), 5 deletions(-) - -diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c -index e75cf2f20459..21e67884ab90 100644 ---- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c -+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c -@@ -31,6 +31,26 @@ void ff_bwdif_filter_edge_neon(void *dst1, void *prev1, void *cur1, void *next1, - void ff_bwdif_filter_intra_neon(void *dst1, void *cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max); - -+void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int prefs3, int mrefs3, int prefs4, int mrefs4, -+ int parity, int clip_max); -+ -+ -+static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int prefs3, int mrefs3, int prefs4, int mrefs4, -+ int parity, int clip_max) -+{ -+ const int w0 = clip_max != 255 ? 0 : w & ~15; -+ -+ ff_bwdif_filter_line_neon(dst1, prev1, cur1, next1, -+ w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max); -+ -+ if (w0 < w) -+ ff_bwdif_filter_line_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0, -+ w - w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max); -+} - - static void filter_edge_helper(void *dst1, void *prev1, void *cur1, void *next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, -@@ -71,6 +91,7 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth) - return; - - s->filter_intra = filter_intra_helper; -+ s->filter_line = filter_line_helper; - s->filter_edge = filter_edge_helper; - } - -diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S -index 389302b813aa..f185e94e3c6f 100644 ---- a/libavfilter/aarch64/vf_bwdif_neon.S -+++ b/libavfilter/aarch64/vf_bwdif_neon.S -@@ -154,6 +154,209 @@ const coeffs, align=4 // align 4 means align on 2^4 boundry - .hword 5077, 981 // sp[0] = v0.h[6] - endconst - -+// =========================================================================== -+// -+// void filter_line( -+// void *dst1, // x0 -+// void *prev1, // x1 -+// void *cur1, // x2 -+// void *next1, // x3 -+// int w, // w4 -+// int prefs, // w5 -+// int mrefs, // w6 -+// int prefs2, // w7 -+// int mrefs2, // [sp, #0] -+// int prefs3, // [sp, #SP_INT] -+// int mrefs3, // [sp, #SP_INT*2] -+// int prefs4, // [sp, #SP_INT*3] -+// int mrefs4, // [sp, #SP_INT*4] -+// int parity, // [sp, #SP_INT*5] -+// int clip_max) // [sp, #SP_INT*6] -+ -+function ff_bwdif_filter_line_neon, export=1 -+ // Sanity check w -+ cmp w4, #0 -+ ble 99f -+ -+ // Rearrange regs to be the same as line3 for ease of debug! -+ mov w10, w4 // w10 = loop count -+ mov w9, w6 // w9 = mref -+ mov w12, w7 // w12 = pref2 -+ mov w11, w5 // w11 = pref -+ ldr w8, [sp, #0] // w8 = mref2 -+ ldr w7, [sp, #SP_INT*2] // w7 = mref3 -+ ldr w6, [sp, #SP_INT*4] // w6 = mref4 -+ ldr w13, [sp, #SP_INT] // w13 = pref3 -+ ldr w14, [sp, #SP_INT*3] // w14 = pref4 -+ -+ mov x4, x3 -+ mov x3, x2 -+ mov x2, x1 -+ -+ LDR_COEFFS v0, x17 -+ -+// #define prev2 cur -+// const uint8_t * restrict next2 = parity ? prev : next; -+ ldr w17, [sp, #SP_INT*5] // parity -+ cmp w17, #0 -+ csel x17, x2, x4, ne -+ -+ PUSH_VREGS -+ -+// for (x = 0; x < w; x++) { -+// int diff0, diff2; -+// int d0, d2; -+// int temporal_diff0, temporal_diff2; -+// -+// int i1, i2; -+// int j1, j2; -+// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4; -+ -+10: -+// c0 = prev2[0] + next2[0]; // c0 = v20, v21 -+// d0 = c0 >> 1; // d0 = v10 -+// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11 -+ ldr q31, [x3] -+ ldr q21, [x17] -+ uhadd v10.16b, v31.16b, v21.16b -+ uabd v11.16b, v31.16b, v21.16b -+ uaddl v20.8h, v21.8b, v31.8b -+ uaddl2 v21.8h, v21.16b, v31.16b -+ -+ ldr q31, [x3, w6, sxtw] -+ ldr q23, [x17, w6, sxtw] -+ -+// i1 = coef_hf[0] * c0; // i1 = v2-v5 -+ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2] -+ -+ ldr q30, [x3, w14, sxtw] -+ ldr q25, [x17, w14, sxtw] -+ -+// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23 -+ uaddl v22.8h, v23.8b, v31.8b -+ uaddl2 v23.8h, v23.16b, v31.16b -+ -+// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12 -+ uhadd v12.16b, v25.16b, v30.16b -+ uaddl v24.8h, v25.8b, v30.8b -+ uaddl2 v25.8h, v25.16b, v30.16b -+ -+// m3 = cur[mrefs3]; // m3 = v20 -+ ldr q20, [x3, w7, sxtw] -+ -+// p3 = cur[prefs3]; // p3 = v21 -+ ldr q21, [x3, w13, sxtw] -+ -+// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25) -+ add v22.8h, v22.8h, v24.8h -+ add v23.8h, v23.8h, v25.8h -+ UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4] -+ -+ ldr q29, [x3, w8, sxtw] -+ ldr q23, [x17, w8, sxtw] -+ -+// i1 -= coef_lf[1] * 4 * (m3 + p3); // - -+ uaddl v30.8h, v20.8b, v21.8b -+ uaddl2 v31.8h, v20.16b, v21.16b -+ -+ UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1] -+ -+ ldr q31, [x3, w12, sxtw] -+ ldr q27, [x17, w12, sxtw] -+ -+// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13 -+ uhadd v13.16b, v23.16b, v29.16b -+ uaddl v22.8h, v23.8b, v29.8b -+ uaddl2 v23.8h, v23.16b, v29.16b -+ -+// m1 = cur[mrefs]; // m1 = v24 -+ ldr q24, [x3, w9, sxtw] -+ -+// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27 -+// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14 -+// d2 = p2 >> 1; // d2 = v15 -+ uabd v14.16b, v31.16b, v27.16b -+ uhadd v15.16b, v31.16b, v27.16b -+ uaddl v26.8h, v27.8b, v31.8b -+ uaddl2 v27.8h, v27.16b, v31.16b -+ -+// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*) -+ add v22.8h, v22.8h, v26.8h -+ add v23.8h, v23.8h, v27.8h -+ UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3] -+ -+// p1 = cur[prefs]; // p1 = v22 -+ ldr q22, [x3, w11, sxtw] -+ -+// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17 -+ uaddl v18.8h, v22.8b, v24.8b -+ uaddl2 v19.8h, v22.16b, v24.16b -+ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] -+ -+ uaddl v18.8h, v20.8b, v21.8b -+ uaddl2 v19.8h, v20.16b, v21.16b -+ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] -+ -+ SQSHRUNN v17, v28, v29, v30, v31, 13 -+ -+// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24 -+ uaddl v26.8h, v24.8b, v22.8b -+ uaddl2 v27.8h, v24.16b, v22.16b -+ UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0] -+ -+ ldr q31, [x2, w9, sxtw] -+ ldr q29, [x4, w9, sxtw] -+ -+ ldr q30, [x2, w11, sxtw] -+ ldr q28, [x4, w11, sxtw] -+ -+// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5* -+ SQSHRUNN v2, v2, v3, v4, v5, 15 -+ -+// { -+// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; -+// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; -+ uabd v30.16b, v22.16b, v30.16b -+ uabd v31.16b, v24.16b, v31.16b -+ uabd v28.16b, v22.16b, v28.16b -+ uabd v29.16b, v24.16b, v29.16b -+ uhadd v31.16b, v31.16b, v30.16b -+ uhadd v29.16b, v29.16b, v28.16b -+ -+// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18 -+ ushr v18.16b, v11.16b, #1 -+ umax v18.16b, v18.16b, v31.16b -+ umax v18.16b, v18.16b, v29.16b -+ -+ // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15 -+ SPAT_CHECK v18, v13, v24, v10, v22, v15, v31, v30, v29, v28 -+ -+ // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18 -+ INTERPOL v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29 -+ -+// dst[0] = av_clip_uint8(interpol); -+ str q2, [x0], #16 -+// } -+// -+// dst++; -+// cur++; -+// prev++; -+// prev2++; -+// next++; -+// } -+ -+ subs w10, w10, #16 -+ add x2, x2, #16 -+ add x3, x3, #16 -+ add x4, x4, #16 -+ add x17, x17, #16 -+ bgt 10b -+ -+ POP_VREGS -+99: -+ ret -+endfunc -+ - // ============================================================================ - // - // void ff_bwdif_filter_edge_neon( -diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h -index ae1616d366cb..cce99953f363 100644 ---- a/libavfilter/bwdif.h -+++ b/libavfilter/bwdif.h -@@ -48,4 +48,9 @@ void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1, - void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs, - int prefs3, int mrefs3, int parity, int clip_max); - -+void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int prefs3, int mrefs3, int prefs4, int mrefs4, -+ int parity, int clip_max); -+ - #endif /* AVFILTER_BWDIF_H */ -diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c -index 688c2d257229..2dc47f9614f8 100644 ---- a/libavfilter/vf_bwdif.c -+++ b/libavfilter/vf_bwdif.c -@@ -132,10 +132,10 @@ void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs - FILTER_INTRA() - } - --static void filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, -- int w, int prefs, int mrefs, int prefs2, int mrefs2, -- int prefs3, int mrefs3, int prefs4, int mrefs4, -- int parity, int clip_max) -+void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, -+ int w, int prefs, int mrefs, int prefs2, int mrefs2, -+ int prefs3, int mrefs3, int prefs4, int mrefs4, -+ int parity, int clip_max) - { - uint8_t *dst = dst1; - uint8_t *prev = prev1; -@@ -353,7 +353,7 @@ av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth) - s->filter_edge = filter_edge_16bit; - } else { - s->filter_intra = ff_bwdif_filter_intra_c; -- s->filter_line = filter_line_c; -+ s->filter_line = ff_bwdif_filter_line_c; - s->filter_edge = ff_bwdif_filter_edge_c; - } - - -From 88f7f8eb8b2fe1e006fdc05564259f6a2448fdbc Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 4 Jul 2023 14:04:44 +0000 -Subject: [PATCH 150/186] avfilter/vf_bwdif: Add a filter_line3 method for - optimisation -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add an optional filter_line3 to the available optimisations. - -filter_line3 is equivalent to filter_line, memcpy, filter_line - -filter_line shares quite a number of loads and some calculations in -common with its next iteration and testing shows that using aarch64 -neon filter_line3s performance is 30% better than two filter_lines -and a memcpy. - -Adds a test for vf_bwdif filter_line3 to checkasm - -Rounds job start lines down to a multiple of 4. This means that if -filter_line3 exists then filter_line will not sometimes be called -once at the end of a slice depending on thread count. The final slice -may do up to 3 extra lines but filter_edge is faster than filter_line -so it is unlikely to create any noticable thread load variation. - -Signed-off-by: John Cox -Signed-off-by: Martin Storsjö -(cherry picked from commit 697533e76dbea8cc7fd6a0642bc60050cc05ead8) ---- - libavfilter/bwdif.h | 7 ++++ - libavfilter/vf_bwdif.c | 44 +++++++++++++++++++-- - tests/checkasm/vf_bwdif.c | 81 +++++++++++++++++++++++++++++++++++++++ - 3 files changed, 129 insertions(+), 3 deletions(-) - -diff --git a/libavfilter/bwdif.h b/libavfilter/bwdif.h -index cce99953f363..496cec72ef01 100644 ---- a/libavfilter/bwdif.h -+++ b/libavfilter/bwdif.h -@@ -35,6 +35,9 @@ typedef struct BWDIFContext { - void (*filter_edge)(void *dst, void *prev, void *cur, void *next, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int parity, int clip_max, int spat); -+ void (*filter_line3)(void *dst, int dstride, -+ const void *prev, const void *cur, const void *next, int prefs, -+ int w, int parity, int clip_max); - } BWDIFContext; - - void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth); -@@ -53,4 +56,8 @@ void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, - int prefs3, int mrefs3, int prefs4, int mrefs4, - int parity, int clip_max); - -+void ff_bwdif_filter_line3_c(void * dst1, int d_stride, -+ const void * prev1, const void * cur1, const void * next1, int s_stride, -+ int w, int parity, int clip_max); -+ - #endif /* AVFILTER_BWDIF_H */ -diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c -index 2dc47f9614f8..9847d38b6a63 100644 ---- a/libavfilter/vf_bwdif.c -+++ b/libavfilter/vf_bwdif.c -@@ -150,6 +150,31 @@ void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1, - FILTER2() - } - -+#define NEXT_LINE()\ -+ dst += d_stride; \ -+ prev += prefs; \ -+ cur += prefs; \ -+ next += prefs; -+ -+void ff_bwdif_filter_line3_c(void * dst1, int d_stride, -+ const void * prev1, const void * cur1, const void * next1, int s_stride, -+ int w, int parity, int clip_max) -+{ -+ const int prefs = s_stride; -+ uint8_t * dst = dst1; -+ const uint8_t * prev = prev1; -+ const uint8_t * cur = cur1; -+ const uint8_t * next = next1; -+ -+ ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, -+ prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max); -+ NEXT_LINE(); -+ memcpy(dst, cur, w); -+ NEXT_LINE(); -+ ff_bwdif_filter_line_c(dst, (void*)prev, (void*)cur, (void*)next, w, -+ prefs, -prefs, prefs * 2, - prefs * 2, prefs * 3, -prefs * 3, prefs * 4, -prefs * 4, parity, clip_max); -+} -+ - void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, - int parity, int clip_max, int spat) -@@ -212,6 +237,13 @@ static void filter_edge_16bit(void *dst1, void *prev1, void *cur1, void *next1, - FILTER2() - } - -+// Round job start line down to multiple of 4 so that if filter_line3 exists -+// and the frame is a multiple of 4 high then filter_line will never be called -+static inline int job_start(const int jobnr, const int nb_jobs, const int h) -+{ -+ return jobnr >= nb_jobs ? h : ((h * jobnr) / nb_jobs) & ~3; -+} -+ - static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) - { - BWDIFContext *s = ctx->priv; -@@ -221,8 +253,8 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) - int clip_max = (1 << (yadif->csp->comp[td->plane].depth)) - 1; - int df = (yadif->csp->comp[td->plane].depth + 7) / 8; - int refs = linesize / df; -- int slice_start = (td->h * jobnr ) / nb_jobs; -- int slice_end = (td->h * (jobnr+1)) / nb_jobs; -+ int slice_start = job_start(jobnr, nb_jobs, td->h); -+ int slice_end = job_start(jobnr + 1, nb_jobs, td->h); - int y; - - for (y = slice_start; y < slice_end; y++) { -@@ -244,6 +276,11 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) - refs << 1, -(refs << 1), - td->parity ^ td->tff, clip_max, - (y < 2) || ((y + 3) > td->h) ? 0 : 1); -+ } else if (s->filter_line3 && y + 2 < slice_end && y + 6 < td->h) { -+ s->filter_line3(dst, td->frame->linesize[td->plane], -+ prev, cur, next, linesize, td->w, -+ td->parity ^ td->tff, clip_max); -+ y += 2; - } else { - s->filter_line(dst, prev, cur, next, td->w, - refs, -refs, refs << 1, -(refs << 1), -@@ -280,7 +317,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic, - td.plane = i; - - ff_filter_execute(ctx, filter_slice, &td, NULL, -- FFMIN(h, ff_filter_get_nb_threads(ctx))); -+ FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx))); - } - if (yadif->current_field == YADIF_FIELD_END) { - yadif->current_field = YADIF_FIELD_NORMAL; -@@ -347,6 +384,7 @@ static int config_props(AVFilterLink *link) - - av_cold void ff_bwdif_init_filter_line(BWDIFContext *s, int bit_depth) - { -+ s->filter_line3 = 0; - if (bit_depth > 8) { - s->filter_intra = filter_intra_16bit; - s->filter_line = filter_line_c_16bit; -diff --git a/tests/checkasm/vf_bwdif.c b/tests/checkasm/vf_bwdif.c -index 5fdba09fdc73..3399cacdf790 100644 ---- a/tests/checkasm/vf_bwdif.c -+++ b/tests/checkasm/vf_bwdif.c -@@ -28,6 +28,10 @@ - for (size_t i = 0; i < count; i++) \ - buf0[i] = buf1[i] = rnd() & mask - -+#define randomize_overflow_check(buf0, buf1, mask, count) \ -+ for (size_t i = 0; i < count; i++) \ -+ buf0[i] = buf1[i] = (rnd() & 1) != 0 ? mask : 0; -+ - #define BODY(type, depth) \ - do { \ - type prev0[9*WIDTH], prev1[9*WIDTH]; \ -@@ -83,6 +87,83 @@ void checkasm_check_vf_bwdif(void) - report("bwdif10"); - } - -+ if (!ctx_8.filter_line3) -+ ctx_8.filter_line3 = ff_bwdif_filter_line3_c; -+ -+ { -+ LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, next0, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, next1, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, cur0, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, cur1, [11*WIDTH]); -+ LOCAL_ALIGNED_16(uint8_t, dst0, [WIDTH*3]); -+ LOCAL_ALIGNED_16(uint8_t, dst1, [WIDTH*3]); -+ const int stride = WIDTH; -+ const int mask = (1<<8)-1; -+ int parity; -+ -+ for (parity = 0; parity != 2; ++parity) { -+ if (check_func(ctx_8.filter_line3, "bwdif8.line3.rnd.p%d", parity)) { -+ -+ declare_func(void, void * dst1, int d_stride, -+ const void * prev1, const void * cur1, const void * next1, int prefs, -+ int w, int parity, int clip_max); -+ -+ randomize_buffers(prev0, prev1, mask, 11*WIDTH); -+ randomize_buffers(next0, next1, mask, 11*WIDTH); -+ randomize_buffers( cur0, cur1, mask, 11*WIDTH); -+ -+ call_ref(dst0, stride, -+ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride, -+ WIDTH, parity, mask); -+ call_new(dst1, stride, -+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, -+ WIDTH, parity, mask); -+ -+ if (memcmp(dst0, dst1, WIDTH*3) -+ || memcmp(prev0, prev1, WIDTH*11) -+ || memcmp(next0, next1, WIDTH*11) -+ || memcmp( cur0, cur1, WIDTH*11)) -+ fail(); -+ -+ bench_new(dst1, stride, -+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, -+ WIDTH, parity, mask); -+ } -+ } -+ -+ // Use just 0s and ~0s to try to provoke bad cropping or overflow -+ // Parity makes no difference to this test so just test 0 -+ if (check_func(ctx_8.filter_line3, "bwdif8.line3.overflow")) { -+ -+ declare_func(void, void * dst1, int d_stride, -+ const void * prev1, const void * cur1, const void * next1, int prefs, -+ int w, int parity, int clip_max); -+ -+ randomize_overflow_check(prev0, prev1, mask, 11*WIDTH); -+ randomize_overflow_check(next0, next1, mask, 11*WIDTH); -+ randomize_overflow_check( cur0, cur1, mask, 11*WIDTH); -+ -+ call_ref(dst0, stride, -+ prev0 + stride * 4, cur0 + stride * 4, next0 + stride * 4, stride, -+ WIDTH, 0, mask); -+ call_new(dst1, stride, -+ prev1 + stride * 4, cur1 + stride * 4, next1 + stride * 4, stride, -+ WIDTH, 0, mask); -+ -+ if (memcmp(dst0, dst1, WIDTH*3) -+ || memcmp(prev0, prev1, WIDTH*11) -+ || memcmp(next0, next1, WIDTH*11) -+ || memcmp( cur0, cur1, WIDTH*11)) -+ fail(); -+ -+ // No point to benching -+ } -+ -+ report("bwdif8.line3"); -+ } -+ - { - LOCAL_ALIGNED_16(uint8_t, prev0, [11*WIDTH]); - LOCAL_ALIGNED_16(uint8_t, prev1, [11*WIDTH]); - -From 27eb1719ee70a8ccaf2acac2161b84e283ac5b1f Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 4 Jul 2023 14:04:45 +0000 -Subject: [PATCH 151/186] avfilter/vf_bwdif: Add neon for filter_line3 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Signed-off-by: John Cox -Signed-off-by: Martin Storsjö -(cherry picked from commit f00222e81f7d6a59d977fbb280d67989818e0ad2) ---- - libavfilter/aarch64/vf_bwdif_init_aarch64.c | 28 ++ - libavfilter/aarch64/vf_bwdif_neon.S | 272 ++++++++++++++++++++ - 2 files changed, 300 insertions(+) - -diff --git a/libavfilter/aarch64/vf_bwdif_init_aarch64.c b/libavfilter/aarch64/vf_bwdif_init_aarch64.c -index 21e67884ab90..f52bc4b9b431 100644 ---- a/libavfilter/aarch64/vf_bwdif_init_aarch64.c -+++ b/libavfilter/aarch64/vf_bwdif_init_aarch64.c -@@ -36,6 +36,33 @@ void ff_bwdif_filter_line_neon(void *dst1, void *prev1, void *cur1, void *next1, - int prefs3, int mrefs3, int prefs4, int mrefs4, - int parity, int clip_max); - -+void ff_bwdif_filter_line3_neon(void * dst1, int d_stride, -+ const void * prev1, const void * cur1, const void * next1, int s_stride, -+ int w, int parity, int clip_max); -+ -+ -+static void filter_line3_helper(void * dst1, int d_stride, -+ const void * prev1, const void * cur1, const void * next1, int s_stride, -+ int w, int parity, int clip_max) -+{ -+ // Asm works on 16 byte chunks -+ // If w is a multiple of 16 then all is good - if not then if width rounded -+ // up to nearest 16 will fit in both src & dst strides then allow the asm -+ // to write over the padding bytes as that is almost certainly faster than -+ // having to invoke the C version to clean up the tail. -+ const int w1 = FFALIGN(w, 16); -+ const int w0 = clip_max != 255 ? 0 : -+ d_stride <= w1 && s_stride <= w1 ? w : w & ~15; -+ -+ ff_bwdif_filter_line3_neon(dst1, d_stride, -+ prev1, cur1, next1, s_stride, -+ w0, parity, clip_max); -+ -+ if (w0 < w) -+ ff_bwdif_filter_line3_c((char *)dst1 + w0, d_stride, -+ (const char *)prev1 + w0, (const char *)cur1 + w0, (const char *)next1 + w0, s_stride, -+ w - w0, parity, clip_max); -+} - - static void filter_line_helper(void *dst1, void *prev1, void *cur1, void *next1, - int w, int prefs, int mrefs, int prefs2, int mrefs2, -@@ -93,5 +120,6 @@ ff_bwdif_init_aarch64(BWDIFContext *s, int bit_depth) - s->filter_intra = filter_intra_helper; - s->filter_line = filter_line_helper; - s->filter_edge = filter_edge_helper; -+ s->filter_line3 = filter_line3_helper; - } - -diff --git a/libavfilter/aarch64/vf_bwdif_neon.S b/libavfilter/aarch64/vf_bwdif_neon.S -index f185e94e3c6f..ae9aab20cdd9 100644 ---- a/libavfilter/aarch64/vf_bwdif_neon.S -+++ b/libavfilter/aarch64/vf_bwdif_neon.S -@@ -154,6 +154,278 @@ const coeffs, align=4 // align 4 means align on 2^4 boundry - .hword 5077, 981 // sp[0] = v0.h[6] - endconst - -+// =========================================================================== -+// -+// void ff_bwdif_filter_line3_neon( -+// void * dst1, // x0 -+// int d_stride, // w1 -+// const void * prev1, // x2 -+// const void * cur1, // x3 -+// const void * next1, // x4 -+// int s_stride, // w5 -+// int w, // w6 -+// int parity, // w7 -+// int clip_max); // [sp, #0] (Ignored) -+ -+function ff_bwdif_filter_line3_neon, export=1 -+ // Sanity check w -+ cmp w6, #0 -+ ble 99f -+ -+ LDR_COEFFS v0, x17 -+ -+// #define prev2 cur -+// const uint8_t * restrict next2 = parity ? prev : next; -+ cmp w7, #0 -+ csel x17, x2, x4, ne -+ -+ // We want all the V registers - save all the ones we must -+ PUSH_VREGS -+ -+ // Some rearrangement of initial values for nice layout of refs in regs -+ mov w10, w6 // w10 = loop count -+ neg w9, w5 // w9 = mref -+ lsl w8, w9, #1 // w8 = mref2 -+ add w7, w9, w9, LSL #1 // w7 = mref3 -+ lsl w6, w9, #2 // w6 = mref4 -+ mov w11, w5 // w11 = pref -+ lsl w12, w5, #1 // w12 = pref2 -+ add w13, w5, w5, LSL #1 // w13 = pref3 -+ lsl w14, w5, #2 // w14 = pref4 -+ add w15, w5, w5, LSL #2 // w15 = pref5 -+ add w16, w14, w12 // w16 = pref6 -+ -+ lsl w5, w1, #1 // w5 = d_stride * 2 -+ -+// for (x = 0; x < w; x++) { -+// int diff0, diff2; -+// int d0, d2; -+// int temporal_diff0, temporal_diff2; -+// -+// int i1, i2; -+// int j1, j2; -+// int p6, p5, p4, p3, p2, p1, c0, m1, m2, m3, m4; -+ -+10: -+// c0 = prev2[0] + next2[0]; // c0 = v20, v21 -+// d0 = c0 >> 1; // d0 = v10 -+// temporal_diff0 = FFABS(prev2[0] - next2[0]); // td0 = v11 -+ ldr q31, [x3] -+ ldr q21, [x17] -+ uhadd v10.16b, v31.16b, v21.16b -+ uabd v11.16b, v31.16b, v21.16b -+ uaddl v20.8h, v21.8b, v31.8b -+ uaddl2 v21.8h, v21.16b, v31.16b -+ -+ ldr q31, [x3, w6, sxtw] -+ ldr q23, [x17, w6, sxtw] -+ -+// i1 = coef_hf[0] * c0; // i1 = v2-v5 -+ UMULL4K v2, v3, v4, v5, v20, v21, v0.h[2] -+ -+ ldr q30, [x3, w14, sxtw] -+ ldr q25, [x17, w14, sxtw] -+ -+// m4 = prev2[mrefs4] + next2[mrefs4]; // m4 = v22,v23 -+ uaddl v22.8h, v23.8b, v31.8b -+ uaddl2 v23.8h, v23.16b, v31.16b -+ -+// p4 = prev2[prefs4] + next2[prefs4]; // p4 = v24,v25, (p4 >> 1) = v12 -+ uhadd v12.16b, v25.16b, v30.16b -+ uaddl v24.8h, v25.8b, v30.8b -+ uaddl2 v25.8h, v25.16b, v30.16b -+ -+// j1 = -coef_hf[1] * (c0 + p4); // j1 = v6-v9 (-c0:v20,v21) -+ add v20.8h, v20.8h, v24.8h -+ add v21.8h, v21.8h, v25.8h -+ SMULL4K v6, v7, v8, v9, v20, v21, v0.h[5] -+ -+// m3 = cur[mrefs3]; // m3 = v20 -+ ldr q20, [x3, w7, sxtw] -+ -+// p3 = cur[prefs3]; // p3 = v21 -+ ldr q21, [x3, w13, sxtw] -+ -+// i1 += coef_hf[2] * (m4 + p4); // (-m4:v22,v23) (-p4:v24,v25) -+ add v22.8h, v22.8h, v24.8h -+ add v23.8h, v23.8h, v25.8h -+ UMLAL4K v2, v3, v4, v5, v22, v23, v0.h[4] -+ -+ ldr q29, [x3, w8, sxtw] -+ ldr q23, [x17, w8, sxtw] -+ -+// i1 -= coef_lf[1] * 4 * (m3 + p3); // - -+ uaddl v30.8h, v20.8b, v21.8b -+ uaddl2 v31.8h, v20.16b, v21.16b -+ -+ ldr q28, [x3, w16, sxtw] -+ ldr q25, [x17, w16, sxtw] -+ -+ UMLSL4K v2, v3, v4, v5, v30, v31, v0.h[1] -+ -+// m2 = prev2[mrefs2] + next2[mrefs2]; // m2 = v22,v23, (m2 >> 1) = v13 -+ uhadd v13.16b, v23.16b, v29.16b -+ uaddl v22.8h, v23.8b, v29.8b -+ uaddl2 v23.8h, v23.16b, v29.16b -+ -+ ldr q31, [x3, w12, sxtw] -+ ldr q27, [x17, w12, sxtw] -+ -+// p6 = prev2[prefs6] + next2[prefs6]; // p6 = v24,v25 -+ uaddl v24.8h, v25.8b, v28.8b -+ uaddl2 v25.8h, v25.16b, v28.16b -+ -+// j1 += coef_hf[2] * (m2 + p6); // (-p6:v24,v25) -+ add v24.8h, v24.8h, v22.8h -+ add v25.8h, v25.8h, v23.8h -+ UMLAL4K v6, v7, v8, v9, v24, v25, v0.h[4] -+ -+// m1 = cur[mrefs]; // m1 = v24 -+ ldr q24, [x3, w9, sxtw] -+ -+// p5 = cur[prefs5]; // p5 = v25 -+ ldr q25, [x3, w15, sxtw] -+ -+// p2 = prev2[prefs2] + next2[prefs2]; // p2 = v26, v27 -+// temporal_diff2 = FFABS(prev2[prefs2] - next2[prefs2]); // td2 = v14 -+// d2 = p2 >> 1; // d2 = v15 -+ uabd v14.16b, v31.16b, v27.16b -+ uhadd v15.16b, v31.16b, v27.16b -+ uaddl v26.8h, v27.8b, v31.8b -+ uaddl2 v27.8h, v27.16b, v31.16b -+ -+// j1 += coef_hf[0] * p2; // - -+ UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[2] -+ -+// i1 -= coef_hf[1] * (m2 + p2); // (-m2:v22,v23*) (-p2:v26*,v27*) -+ add v22.8h, v22.8h, v26.8h -+ add v23.8h, v23.8h, v27.8h -+ UMLSL4K v2, v3, v4, v5, v22, v23, v0.h[3] -+ -+// p1 = cur[prefs]; // p1 = v22 -+ ldr q22, [x3, w11, sxtw] -+ -+// j1 -= coef_lf[1] * 4 * (m1 + p5); // - -+ uaddl v26.8h, v24.8b, v25.8b -+ uaddl2 v27.8h, v24.16b, v25.16b -+ UMLSL4K v6, v7, v8, v9, v26, v27, v0.h[1] -+ -+// j2 = (coef_sp[0] * (p1 + p3) - coef_sp[1] * (m1 + p5)) >> 13; // (-p5:v25*) j2=v16 -+ uaddl v18.8h, v22.8b, v21.8b -+ uaddl2 v19.8h, v22.16b, v21.16b -+ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] -+ -+ uaddl v18.8h, v24.8b, v25.8b -+ uaddl2 v19.8h, v24.16b, v25.16b -+ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] -+ -+ SQSHRUNN v16, v28, v29, v30, v31, 13 -+ -+// i2 = (coef_sp[0] * (m1 + p1) - coef_sp[1] * (m3 + p3)) >> 13; // (-m3:v20*) i2=v17 -+ uaddl v18.8h, v22.8b, v24.8b -+ uaddl2 v19.8h, v22.16b, v24.16b -+ UMULL4K v28, v29, v30, v31, v18, v19, v0.h[6] -+ -+ uaddl v18.8h, v20.8b, v21.8b -+ uaddl2 v19.8h, v20.16b, v21.16b -+ UMLSL4K v28, v29, v30, v31, v18, v19, v0.h[7] -+ -+ SQSHRUNN v17, v28, v29, v30, v31, 13 -+ -+// i1 += coef_lf[0] * 4 * (m1 + p1); // p1 = v22, m1 = v24 -+ uaddl v26.8h, v24.8b, v22.8b -+ uaddl2 v27.8h, v24.16b, v22.16b -+ UMLAL4K v2, v3, v4, v5, v26, v27, v0.h[0] -+ -+ ldr q31, [x2, w9, sxtw] -+ ldr q29, [x4, w9, sxtw] -+ -+// j1 += coef_lf[0] * 4 * (p1 + p3); // p1 = v22, p3 = v21 -+ uaddl v26.8h, v21.8b, v22.8b -+ uaddl2 v27.8h, v21.16b, v22.16b -+ UMLAL4K v6, v7, v8, v9, v26, v27, v0.h[0] -+ -+ ldr q30, [x2, w11, sxtw] -+ ldr q28, [x4, w11, sxtw] -+ -+// i1 >>= 15; // i1 = v2, -v3, -v4*, -v5* -+ SQSHRUNN v2, v2, v3, v4, v5, 15 -+ -+// j1 >>= 15; // j1 = v3, -v6*, -v7*, -v8*, -v9* -+ SQSHRUNN v3, v6, v7, v8, v9, 15 -+ -+// { -+// int t1 =(FFABS(prev[mrefs] - m1) + FFABS(prev[prefs] - p1)) >> 1; -+// int t2 =(FFABS(next[mrefs] - m1) + FFABS(next[prefs] - p1)) >> 1; -+ uabd v30.16b, v22.16b, v30.16b -+ uabd v31.16b, v24.16b, v31.16b -+ uabd v28.16b, v22.16b, v28.16b -+ uabd v29.16b, v24.16b, v29.16b -+ uhadd v31.16b, v31.16b, v30.16b -+ uhadd v29.16b, v29.16b, v28.16b -+ -+ ldr q27, [x2, w13, sxtw] -+ ldr q26, [x4, w13, sxtw] -+ -+// diff0 = FFMAX3(temporal_diff0 >> 1, t1, t2); // diff0=v18 -+ ushr v18.16b, v11.16b, #1 -+ umax v18.16b, v18.16b, v31.16b -+ umax v18.16b, v18.16b, v29.16b -+// } // v28, v30 preserved for next block -+// { // tdiff2 = v14 -+// int t1 =(FFABS(prev[prefs] - p1) + FFABS(prev[prefs3] - p3)) >> 1; -+// int t2 =(FFABS(next[prefs] - p1) + FFABS(next[prefs3] - p3)) >> 1; -+ uabd v31.16b, v21.16b, v27.16b -+ uabd v29.16b, v21.16b, v26.16b -+ uhadd v31.16b, v31.16b, v30.16b -+ uhadd v29.16b, v29.16b, v28.16b -+ -+// diff2 = FFMAX3(temporal_diff2 >> 1, t1, t2); // diff2=v19 -+ ushr v19.16b, v14.16b, #1 -+ umax v19.16b, v19.16b, v31.16b -+ umax v19.16b, v19.16b, v29.16b -+// } -+ -+ // diff0 = v18, (m2 >> 1) = v13, m1 = v24, d0 = v10, p1 = v22, d2 = v15 -+ SPAT_CHECK v18, v13, v24, v10, v22, v15, v31, v30, v29, v28 -+ -+ // diff2 = v19, d0 = v10, p1 = v22, d2 = v15, p3 = v21, (p4 >> 1) = v12 -+ SPAT_CHECK v19, v10, v22, v15, v21, v12, v31, v30, v29, v28 -+ -+ // j1 = v3, j2 = v16, p1 = v22, d2 = v15, p3 = v21, td2 = v14, diff2 = v19 -+ INTERPOL v3, v3, v16, v22, v15, v21, v14, v19, v31, v30, v29 -+ -+// dst[d_stride * 2] = av_clip_uint8(interpol); -+ str q3, [x0, w5, sxtw] -+ -+// dst[d_stride] = p1; -+ str q22, [x0, w1, sxtw] -+ -+ // i1 = v2, i2 = v17, m1 = v24, d0 = v10, p1 = v22, td2 = v11, diff2 = v18 -+ INTERPOL v2, v2, v17, v24, v10, v22, v11, v18, v31, v30, v29 -+ -+// dst[0] = av_clip_uint8(interpol); -+ str q2, [x0], #16 -+// } -+// -+// dst++; -+// cur++; -+// prev++; -+// prev2++; -+// next++; -+// } -+ subs w10, w10, #16 -+ add x2, x2, #16 -+ add x3, x3, #16 -+ add x4, x4, #16 -+ add x17, x17, #16 -+ bgt 10b -+ -+ POP_VREGS -+99: -+ ret -+endfunc -+ - // =========================================================================== - // - // void filter_line( - -From 953a9d1e34140084185e747ebf3f3877b2779267 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 24 Jul 2023 16:39:06 +0100 -Subject: [PATCH 152/186] weak_link: Fix ref count init - -(cherry picked from commit d6de45b15a0c96bfdc96bbc441963a60945e5eba) ---- - libavcodec/weak_link.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/libavcodec/weak_link.c b/libavcodec/weak_link.c -index f234a985b9c1..5a79e89ed7c2 100644 ---- a/libavcodec/weak_link.c -+++ b/libavcodec/weak_link.c -@@ -19,6 +19,7 @@ struct ff_weak_link_master * ff_weak_link_new(void * p) - struct ff_weak_link_master * w = malloc(sizeof(*w)); - if (!w) - return NULL; -+ atomic_init(&w->ref_count, 0); - w->ptr = p; - if (pthread_rwlock_init(&w->lock, NULL)) { - free(w); - -From 8ffc0497c128a4d5f60849fe56e7f985e6a7ef71 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 24 Jul 2023 17:28:06 +0100 -Subject: [PATCH 153/186] v4l2_m2m: Check fd before attempting to close (fix - valgrind warn) - -(cherry picked from commit befa42878d054d1fba53d5da14406faaae224daf) ---- - libavcodec/v4l2_m2m.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index 28d9ed49887e..238ceea23546 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -340,8 +340,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) - ff_v4l2_context_release(&s->output); - - dmabufs_ctl_unref(&s->db_ctl); -- close(s->fd); -- s->fd = -1; -+ -+ if (s->fd != -1) { -+ close(s->fd); -+ s->fd = -1; -+ } - - s->self_ref = NULL; - // This is only called on avctx close so after this point we don't have that - -From 567f293b764431685f83b368736b18cfdc42e29e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 26 Jul 2023 16:29:39 +0000 -Subject: [PATCH 154/186] v4l2_req_devscan: Fix udev leak - -(cherry picked from commit 53b17ffd8a8890ef483163f3c9b0f96b437303f1) ---- - libavcodec/v4l2_req_devscan.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_req_devscan.c b/libavcodec/v4l2_req_devscan.c -index cfa94d55c49b..ee8527ba1f35 100644 ---- a/libavcodec/v4l2_req_devscan.c -+++ b/libavcodec/v4l2_req_devscan.c -@@ -437,12 +437,14 @@ int devscan_build(void * const dc, struct devscan **pscan) - } - - udev_enumerate_unref(enumerate); -+ udev_unref(udev); - - *pscan = scan; - return 0; - - fail: -- udev_unref(udev); -+ if (udev) -+ udev_unref(udev); - devscan_delete(&scan); - return ret; - } - -From 74a85ee6638b4fdedeaadd0d62c6f6c46345ecad Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 26 Jul 2023 16:42:27 +0000 -Subject: [PATCH 155/186] v4l2_m2m: Fix device_ref leak - -(cherry picked from commit bfea15c07b4301cd1208981c8f221e5e3a598b34) ---- - libavcodec/v4l2_m2m.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index 238ceea23546..add64b8e63e6 100644 ---- a/libavcodec/v4l2_m2m.c -+++ b/libavcodec/v4l2_m2m.c -@@ -338,6 +338,7 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) - } - - ff_v4l2_context_release(&s->output); -+ av_buffer_unref(&s->device_ref); - - dmabufs_ctl_unref(&s->db_ctl); - - -From d018ca7bb4428e622e0acb5f3fa6b3ae6a52873e Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 28 Jul 2023 16:10:01 +0000 -Subject: [PATCH 156/186] v4l2_m2m_dec: Avoid structure init warnings when - struct changes - -(cherry picked from commit 8a836af420ed8c8dba90e2fd88691bcaa0668f8a) ---- - libavcodec/v4l2_m2m_dec.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 11c83b2d6643..584e0b8825f9 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -1004,11 +1004,13 @@ parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) - #if CONFIG_H264_DECODER - case AV_CODEC_ID_H264: - { -- H264ParamSets ps = {{NULL}}; -+ H264ParamSets ps; - int is_avc = 0; - int nal_length_size = 0; - int ret; - -+ memset(&ps, 0, sizeof(ps)); -+ - ret = ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size, - &ps, &is_avc, &nal_length_size, - avctx->err_recognition, avctx); -@@ -1034,12 +1036,15 @@ parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) - #if CONFIG_HEVC_DECODER - case AV_CODEC_ID_HEVC: - { -- HEVCParamSets ps = {{NULL}}; -- HEVCSEI sei = {{{{0}}}}; -+ HEVCParamSets ps; -+ HEVCSEI sei; - int is_nalff = 0; - int nal_length_size = 0; - int ret; - -+ memset(&ps, 0, sizeof(ps)); -+ memset(&sei, 0, sizeof(sei)); -+ - ret = ff_hevc_decode_extradata(avctx->extradata, avctx->extradata_size, - &ps, &sei, &is_nalff, &nal_length_size, - avctx->err_recognition, 0, avctx); - -From a466a89ac3c68a4bb41af63760caef0a624c86dd Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Sat, 22 Jul 2023 12:33:50 +0000 -Subject: [PATCH 157/186] v4l2_m2m_dec: Avoid calling get_format if no V4L2 - decoder device - -Move the get_format callback to after the decoder device has been found. -This means that get_format will never be called if there is no chance -that init will succeed which helps programs (such as VLC) that do -significant processing in that callback to avoid it. It also means that -the list of formats availible can actually represent reality. - -(cherry picked from commit 3b27cb41d7df73c054452fa49269988d4df32409) ---- - libavcodec/v4l2_context.c | 41 +++++++++++++ - libavcodec/v4l2_context.h | 13 ++++ - libavcodec/v4l2_m2m_dec.c | 122 ++++++++++++++++++++++++++++---------- - 3 files changed, 145 insertions(+), 31 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 79a31cf9300b..978a487ca98d 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -1064,6 +1064,47 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt, int timeout) - return 0; - } - -+// Return 0 terminated list of drm fourcc video formats for this context -+// NULL if none found or error -+// Returned list is malloced so must be freed -+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN) -+{ -+ unsigned int i; -+ unsigned int n = 0; -+ unsigned int size = 0; -+ uint32_t * e = NULL; -+ *pN = 0; -+ -+ for (i = 0; i < 1024; ++i) { -+ struct v4l2_fmtdesc fdesc = { -+ .index = i, -+ .type = ctx->type -+ }; -+ -+ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_ENUM_FMT, &fdesc)) -+ return e; -+ -+ if (n + 1 >= size) { -+ unsigned int newsize = (size == 0) ? 16 : size * 2; -+ uint32_t * t = av_realloc(e, newsize * sizeof(*t)); -+ if (!t) -+ return e; -+ e = t; -+ size = newsize; -+ } -+ -+ e[n] = fdesc.pixelformat; -+ e[++n] = 0; -+ if (pN) -+ *pN = n; -+ } -+ -+ // If we've looped 1024 times we are clearly confused -+ *pN = 0; -+ av_free(e); -+ return NULL; -+} -+ - int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) - { - struct v4l2_format_update fmt = { 0 }; -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 5afed3e6ecb4..f4240f7dddb2 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -151,6 +151,19 @@ int ff_v4l2_context_set_format(V4L2Context* ctx); - */ - int ff_v4l2_context_get_format(V4L2Context* ctx, int probe); - -+/** -+ * Get the list of drm fourcc pixel formats for this context -+ * -+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context -+ * description for required variables. -+ * @param[in] pN A pointer to receive the number of formats -+ * found. May be NULL if not wanted. -+ * @return Pointer to malloced list of zero terminated formats, -+ * NULL if none or error. As list is malloced it must be -+ * freed. -+ */ -+uint32_t * ff_v4l2_context_enum_drm_formats(V4L2Context *ctx, unsigned int *pN); -+ - /** - * Releases a V4L2Context. - * -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 584e0b8825f9..c4f38cc24e1f 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -873,10 +873,9 @@ check_profile(AVCodecContext *const avctx, V4L2m2mContext *const s) - }; - - static int --check_size(AVCodecContext * const avctx, V4L2m2mContext * const s) -+check_size(AVCodecContext * const avctx, V4L2m2mContext * const s, const uint32_t fcc) - { - unsigned int i; -- const uint32_t fcc = ff_v4l2_get_format_pixelformat(&s->capture.format); - const uint32_t w = avctx->coded_width; - const uint32_t h = avctx->coded_height; - -@@ -1073,12 +1072,91 @@ parse_extradata(AVCodecContext * const avctx, V4L2m2mContext * const s) - } - } - -+static int -+choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) -+{ -+ const V4L2m2mPriv * const priv = avctx->priv_data; -+ unsigned int fmts_n; -+ uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n); -+ enum AVPixelFormat *fmts2 = NULL; -+ enum AVPixelFormat t; -+ enum AVPixelFormat gf_pix_fmt; -+ unsigned int i; -+ unsigned int n = 0; -+ unsigned int pref_n = 1; -+ int rv = AVERROR(ENOENT); -+ -+ if (!fmts) -+ return AVERROR(ENOENT); -+ -+ if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 2))) == NULL) { -+ rv = AVERROR(ENOMEM); -+ goto error; -+ } -+ -+ // Filter for formats that are supported by ffmpeg and -+ // can accomodate the stream size -+ fmts2[n++] = AV_PIX_FMT_DRM_PRIME; -+ for (i = 0; i != fmts_n; ++i) { -+ const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO); -+ if (f == AV_PIX_FMT_NONE) -+ continue; -+ -+ if (check_size(avctx, s, fmts[i]) != 0) -+ continue; -+ -+ if (f == priv->pix_fmt) -+ pref_n = n; -+ fmts2[n++] = f; -+ } -+ fmts2[n] = AV_PIX_FMT_NONE; -+ -+ if (n < 2) { -+ av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__); -+ goto error; -+ } -+ -+ // Put preferred s/w format at the end - ff_get_format will put it in sw_pix_fmt -+ t = fmts2[n - 1]; -+ fmts2[n - 1] = fmts2[pref_n]; -+ fmts2[pref_n] = t; -+ -+ gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); -+ av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", -+ avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), -+ avctx->coded_width, avctx->coded_height, -+ gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); -+ -+ if (gf_pix_fmt == AV_PIX_FMT_NONE) -+ goto error; -+ -+ if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { -+ avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; -+ s->capture.av_pix_fmt = avctx->sw_pix_fmt; -+ s->output_drm = 1; -+ } -+ else { -+ avctx->pix_fmt = gf_pix_fmt; -+ s->capture.av_pix_fmt = gf_pix_fmt; -+ s->output_drm = 0; -+ } -+ -+ // Get format converts capture.av_pix_fmt back into a V4L2 format in the context -+ if ((rv = ff_v4l2_context_get_format(&s->capture, 0)) != 0) -+ goto error; -+ rv = ff_v4l2_context_set_format(&s->capture); -+ -+error: -+ av_free(fmts2); -+ av_free(fmts); -+ return rv; -+} -+ - static av_cold int v4l2_decode_init(AVCodecContext *avctx) - { - V4L2Context *capture, *output; - V4L2m2mContext *s; - V4L2m2mPriv *priv = avctx->priv_data; -- int gf_pix_fmt; - int ret; - - av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__); -@@ -1122,28 +1200,8 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - capture->av_pix_fmt = avctx->pix_fmt; - capture->min_buf_size = 0; - -- /* the client requests the codec to generate DRM frames: -- * - data[0] will therefore point to the returned AVDRMFrameDescriptor -- * check the ff_v4l2_buffer_to_avframe conversion function. -- * - the DRM frame format is passed in the DRM frame descriptor layer. -- * check the v4l2_get_drm_frame function. -- */ -- -- avctx->sw_pix_fmt = avctx->pix_fmt; -- gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); -- av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", -- avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), -- avctx->coded_width, avctx->coded_height, -- gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt)); -- -- if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) { -- avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; -- s->output_drm = 1; -- } -- else { -- capture->av_pix_fmt = gf_pix_fmt; -- s->output_drm = 0; -- } -+ capture->av_pix_fmt = AV_PIX_FMT_NONE; -+ s->output_drm = 0; - - s->db_ctl = NULL; - if (priv->dmabuf_alloc != NULL && strcmp(priv->dmabuf_alloc, "v4l2") != 0) { -@@ -1185,19 +1243,21 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) - return ret; - } - -- if ((ret = v4l2_prepare_decoder(s)) < 0) -- return ret; -- - if ((ret = get_quirks(avctx, s)) != 0) - return ret; - -- if ((ret = check_size(avctx, s)) != 0) -- return ret; -- - if ((ret = check_profile(avctx, s)) != 0) { - av_log(avctx, AV_LOG_WARNING, "Profile %d not supported by decode\n", avctx->profile); - return ret; - } -+ -+ // Size check done as part of format filtering -+ if ((ret = choose_capture_format(avctx, s)) != 0) -+ return ret; -+ -+ if ((ret = v4l2_prepare_decoder(s)) < 0) -+ return ret; -+ - return 0; - } - - -From ff30fb1c60c9753eb42d107f083bdadea7918ebe Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 10 Aug 2023 06:26:35 +0000 -Subject: [PATCH 158/186] v4l2_req_dmabufs: Fix crash on free if dmabuf - imported - -Thanks to Ratchanan Srirattanamet for finding this ---- - libavcodec/v4l2_req_dmabufs.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c -index acc0366e7630..017c3892a593 100644 ---- a/libavcodec/v4l2_req_dmabufs.c -+++ b/libavcodec/v4l2_req_dmabufs.c -@@ -232,7 +232,8 @@ void dmabuf_free(struct dmabuf_h * dh) - request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs); - #endif - -- dh->fns->buf_free(dh); -+ if (dh->fns != NULL && dh->fns->buf_free) -+ dh->fns->buf_free(dh); - - if (dh->mapptr != MAP_FAILED && dh->mapptr != NULL) - munmap(dh->mapptr, dh->size); - -From 7342db8c6a46af0ebe38369cb1cce6eacaeed528 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 10 Aug 2023 06:34:47 +0000 -Subject: [PATCH 159/186] aarch64/rgb2rgb_neon: Fix bgr24->yuv matrix read to - flip correct way - ---- - libswscale/aarch64/rgb2rgb_neon.S | 15 ++++++--------- - 1 file changed, 6 insertions(+), 9 deletions(-) - -diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S -index 476ca723a0ef..077d1dd5938a 100644 ---- a/libswscale/aarch64/rgb2rgb_neon.S -+++ b/libswscale/aarch64/rgb2rgb_neon.S -@@ -92,15 +92,12 @@ endfunc - - function ff_rgb24toyv12_aarch64, export=1 - ldr x15, [sp, #8] -- ld1 {v3.s}[2], [x15], #4 -- ld1 {v3.s}[1], [x15], #4 -- ld1 {v3.s}[0], [x15], #4 -- ld1 {v4.s}[2], [x15], #4 -- ld1 {v4.s}[1], [x15], #4 -- ld1 {v4.s}[0], [x15], #4 -- ld1 {v5.s}[2], [x15], #4 -- ld1 {v5.s}[1], [x15], #4 -- ld1 {v5.s}[0], [x15] -+ ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 -+ ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 -+ ld3 {v3.s, v4.s, v5.s}[2], [x15] -+ mov v6.16b, v3.16b -+ mov v3.16b, v5.16b -+ mov v5.16b, v6.16b - b 99f - endfunc - - -From 2de40aab0aa4e2ee9be96b60efda8f5e99a4e445 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 10 Aug 2023 06:36:51 +0000 -Subject: [PATCH 160/186] aarch64/rgb2rgb_neon: Add macros to make common code - explicit - ---- - libswscale/aarch64/rgb2rgb_neon.S | 276 ++++++++++-------------------- - 1 file changed, 95 insertions(+), 181 deletions(-) - -diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S -index 077d1dd5938a..0956800b4185 100644 ---- a/libswscale/aarch64/rgb2rgb_neon.S -+++ b/libswscale/aarch64/rgb2rgb_neon.S -@@ -78,6 +78,67 @@ function ff_interleave_bytes_neon, export=1 - ret - endfunc - -+// Expand rgb2 into r0+r1/g0+g1/b0+b1 -+.macro XRGB3Y r0, g0, b0, r1, g1, b1, r2, g2, b2 -+ uxtl \r0\().8h, \r2\().8b -+ uxtl \g0\().8h, \g2\().8b -+ uxtl \b0\().8h, \b2\().8b -+ -+ uxtl2 \r1\().8h, \r2\().16b -+ uxtl2 \g1\().8h, \g2\().16b -+ uxtl2 \b1\().8h, \b2\().16b -+.endm -+ -+// Expand rgb2 into r0+r1/g0+g1/b0+b1 -+// and pick every other el to put back into rgb2 for chroma -+.macro XRGB3YC r0, g0, b0, r1, g1, b1, r2, g2, b2 -+ XRGB3Y \r0, \g0, \b0, \r1, \g1, \b1, \r2, \g2, \b2 -+ -+ bic \r2\().8h, #0xff, LSL #8 -+ bic \g2\().8h, #0xff, LSL #8 -+ bic \b2\().8h, #0xff, LSL #8 -+.endm -+ -+.macro SMLAL3 d0, d1, s0, s1, s2, c0, c1, c2 -+ smull \d0\().4s, \s0\().4h, \c0 -+ smlal \d0\().4s, \s1\().4h, \c1 -+ smlal \d0\().4s, \s2\().4h, \c2 -+ smull2 \d1\().4s, \s0\().8h, \c0 -+ smlal2 \d1\().4s, \s1\().8h, \c1 -+ smlal2 \d1\().4s, \s2\().8h, \c2 -+.endm -+ -+// d0 may be s0 -+// s0, s2 corrupted -+.macro SHRN_Y d0, s0, s1, s2, s3, k128h -+ shrn \s0\().4h, \s0\().4s, #12 -+ shrn2 \s0\().8h, \s1\().4s, #12 -+ add \s0\().8h, \s0\().8h, \k128h\().8h // +128 (>> 3 = 16) -+ sqrshrun \d0\().8b, \s0\().8h, #3 -+ shrn \s2\().4h, \s2\().4s, #12 -+ shrn2 \s2\().8h, \s3\().4s, #12 -+ add \s2\().8h, \s2\().8h, \k128h\().8h -+ sqrshrun2 \d0\().16b, v28.8h, #3 -+.endm -+ -+.macro SHRN_C d0, s0, s1, k128b -+ shrn \s0\().4h, \s0\().4s, #14 -+ shrn2 \s0\().8h, \s1\().4s, #14 -+ sqrshrn \s0\().8b, \s0\().8h, #1 -+ add \d0\().8b, \s0\().8b, \k128b\().8b // +128 -+.endm -+ -+.macro STB2V s0, n, a -+ st1 {\s0\().b}[(\n+0)], [\a], #1 -+ st1 {\s0\().b}[(\n+1)], [\a], #1 -+.endm -+ -+.macro STB4V s0, n, a -+ STB2V \s0, (\n+0), \a -+ STB2V \s0, (\n+2), \a -+.endm -+ -+ - // void ff_rgb24toyv12_aarch64( - // const uint8_t *src, // x0 - // uint8_t *ydst, // x1 -@@ -111,7 +172,7 @@ endfunc - // int lumStride, // w6 - // int chromStride, // w7 - // int srcStr, // [sp, #0] --// int32_t *rgb2yuv); // [sp, #8] -+// int32_t *rgb2yuv); // [sp, #8] (including Mac) - - // regs - // v0-2 Src bytes - reused as chroma src -@@ -130,13 +191,12 @@ endfunc - // v30 V out - // v31 V tmp - --// Assumes Little Endian in tail stores & conversion matrix -- - function ff_bgr24toyv12_aarch64, export=1 - ldr x15, [sp, #8] - ld3 {v3.s, v4.s, v5.s}[0], [x15], #12 - ld3 {v3.s, v4.s, v5.s}[1], [x15], #12 - ld3 {v3.s, v4.s, v5.s}[2], [x15] -+ - 99: - ldr w14, [sp, #0] - movi v7.8b, #128 -@@ -167,73 +227,29 @@ function ff_bgr24toyv12_aarch64, export=1 - b.le 13f - - 10: -- uxtl v16.8h, v0.8b -- uxtl v17.8h, v1.8b -- uxtl v18.8h, v2.8b -- -- uxtl2 v20.8h, v0.16b -- uxtl2 v21.8h, v1.16b -- uxtl2 v22.8h, v2.16b -- -- bic v0.8h, #0xff, LSL #8 -- bic v1.8h, #0xff, LSL #8 -- bic v2.8h, #0xff, LSL #8 -+ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2 - - // Testing shows it is faster to stack the smull/smlal ops together - // rather than interleave them between channels and indeed even the - // shift/add sections seem happier not interleaved - - // Y0 -- smull v26.4s, v16.4h, v3.h[0] -- smlal v26.4s, v17.4h, v4.h[0] -- smlal v26.4s, v18.4h, v5.h[0] -- smull2 v27.4s, v16.8h, v3.h[0] -- smlal2 v27.4s, v17.8h, v4.h[0] -- smlal2 v27.4s, v18.8h, v5.h[0] -+ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] - // Y1 -- smull v28.4s, v20.4h, v3.h[0] -- smlal v28.4s, v21.4h, v4.h[0] -- smlal v28.4s, v22.4h, v5.h[0] -- smull2 v29.4s, v20.8h, v3.h[0] -- smlal2 v29.4s, v21.8h, v4.h[0] -- smlal2 v29.4s, v22.8h, v5.h[0] -- shrn v26.4h, v26.4s, #12 -- shrn2 v26.8h, v27.4s, #12 -- add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -- sqrshrun v26.8b, v26.8h, #3 -- shrn v28.4h, v28.4s, #12 -- shrn2 v28.8h, v29.4s, #12 -- add v28.8h, v28.8h, v6.8h -- sqrshrun2 v26.16b, v28.8h, #3 -- // Y0/Y1 -+ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] -+ SHRN_Y v26, v26, v27, v28, v29, v6 - - // U - // Vector subscript *2 as we loaded into S but are only using H -- smull v24.4s, v0.4h, v3.h[2] -- smlal v24.4s, v1.4h, v4.h[2] -- smlal v24.4s, v2.4h, v5.h[2] -- smull2 v25.4s, v0.8h, v3.h[2] -- smlal2 v25.4s, v1.8h, v4.h[2] -- smlal2 v25.4s, v2.8h, v5.h[2] -+ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2] - - // V -- smull v30.4s, v0.4h, v3.h[4] -- smlal v30.4s, v1.4h, v4.h[4] -- smlal v30.4s, v2.4h, v5.h[4] -- smull2 v31.4s, v0.8h, v3.h[4] -- smlal2 v31.4s, v1.8h, v4.h[4] -- smlal2 v31.4s, v2.8h, v5.h[4] -+ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4] - - ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 - -- shrn v24.4h, v24.4s, #14 -- shrn2 v24.8h, v25.4s, #14 -- sqrshrn v24.8b, v24.8h, #1 -- add v24.8b, v24.8b, v7.8b // +128 -- shrn v30.4h, v30.4s, #14 -- shrn2 v30.8h, v31.4s, #14 -- sqrshrn v30.8b, v30.8h, #1 -- add v30.8b, v30.8b, v7.8b // +128 -+ SHRN_C v24, v24, v25, v7 -+ SHRN_C v30, v30, v31, v7 - - subs w9, w9, #16 - -@@ -250,69 +266,21 @@ function ff_bgr24toyv12_aarch64, export=1 - 13: - // Body is simple copy of main loop body minus preload - -- uxtl v16.8h, v0.8b -- uxtl v17.8h, v1.8b -- uxtl v18.8h, v2.8b -- -- uxtl2 v20.8h, v0.16b -- uxtl2 v21.8h, v1.16b -- uxtl2 v22.8h, v2.16b -- -- bic v0.8h, #0xff, LSL #8 -- bic v1.8h, #0xff, LSL #8 -- bic v2.8h, #0xff, LSL #8 -- -+ XRGB3YC v16, v17, v18, v20, v21, v22, v0, v1, v2 - // Y0 -- smull v26.4s, v16.4h, v3.h[0] -- smlal v26.4s, v17.4h, v4.h[0] -- smlal v26.4s, v18.4h, v5.h[0] -- smull2 v27.4s, v16.8h, v3.h[0] -- smlal2 v27.4s, v17.8h, v4.h[0] -- smlal2 v27.4s, v18.8h, v5.h[0] -+ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] - // Y1 -- smull v28.4s, v20.4h, v3.h[0] -- smlal v28.4s, v21.4h, v4.h[0] -- smlal v28.4s, v22.4h, v5.h[0] -- smull2 v29.4s, v20.8h, v3.h[0] -- smlal2 v29.4s, v21.8h, v4.h[0] -- smlal2 v29.4s, v22.8h, v5.h[0] -- shrn v26.4h, v26.4s, #12 -- shrn2 v26.8h, v27.4s, #12 -- add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -- sqrshrun v26.8b, v26.8h, #3 -- shrn v28.4h, v28.4s, #12 -- shrn2 v28.8h, v29.4s, #12 -- add v28.8h, v28.8h, v6.8h -- sqrshrun2 v26.16b, v28.8h, #3 -- // Y0/Y1 -- -+ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] -+ SHRN_Y v26, v26, v27, v28, v29, v6 - // U -- // Vector subscript *2 as we loaded into S but are only using H -- smull v24.4s, v0.4h, v3.h[2] -- smlal v24.4s, v1.4h, v4.h[2] -- smlal v24.4s, v2.4h, v5.h[2] -- smull2 v25.4s, v0.8h, v3.h[2] -- smlal2 v25.4s, v1.8h, v4.h[2] -- smlal2 v25.4s, v2.8h, v5.h[2] -- -+ SMLAL3 v24, v25, v0, v1, v2, v3.h[2], v4.h[2], v5.h[2] - // V -- smull v30.4s, v0.4h, v3.h[4] -- smlal v30.4s, v1.4h, v4.h[4] -- smlal v30.4s, v2.4h, v5.h[4] -- smull2 v31.4s, v0.8h, v3.h[4] -- smlal2 v31.4s, v1.8h, v4.h[4] -- smlal2 v31.4s, v2.8h, v5.h[4] -+ SMLAL3 v30, v31, v0, v1, v2, v3.h[4], v4.h[4], v5.h[4] - - cmp w9, #-16 - -- shrn v24.4h, v24.4s, #14 -- shrn2 v24.8h, v25.4s, #14 -- sqrshrn v24.8b, v24.8h, #1 -- add v24.8b, v24.8b, v7.8b // +128 -- shrn v30.4h, v30.4s, #14 -- shrn2 v30.8h, v31.4s, #14 -- sqrshrn v30.8b, v30.8h, #1 -- add v30.8b, v30.8b, v7.8b // +128 -+ SHRN_C v24, v24, v25, v7 -+ SHRN_C v30, v30, v31, v7 - - // Here: - // w9 == 0 width % 16 == 0, tail done -@@ -347,14 +315,14 @@ function ff_bgr24toyv12_aarch64, export=1 - 2: - tbz w9, #3, 1f - st1 {v26.8b}, [x11], #8 -- st1 {v24.s}[0], [x12], #4 -- st1 {v30.s}[0], [x13], #4 -+ STB4V v24, 0, x12 -+ STB4V v30, 0, x13 - 1: tbz w9, #2, 1f -- st1 {v26.s}[2], [x11], #4 -- st1 {v24.h}[2], [x12], #2 -- st1 {v30.h}[2], [x13], #2 -+ STB4V v26 8, x11 -+ STB2V v24, 4, x12 -+ STB2V v30, 4, x13 - 1: tbz w9, #1, 1f -- st1 {v26.h}[6], [x11], #2 -+ STB2V v26, 12, x11 - st1 {v24.b}[6], [x12], #1 - st1 {v30.b}[6], [x13], #1 - 1: tbz w9, #0, 1f -@@ -381,44 +349,15 @@ function ff_bgr24toyv12_aarch64, export=1 - b.le 13f - - 10: -- uxtl v16.8h, v0.8b -- uxtl v17.8h, v1.8b -- uxtl v18.8h, v2.8b -- -- uxtl2 v20.8h, v0.16b -- uxtl2 v21.8h, v1.16b -- uxtl2 v22.8h, v2.16b -- -- // Testing shows it is faster to stack the smull/smlal ops together -- // rather than interleave them between channels and indeed even the -- // shift/add sections seem happier not interleaved -- -+ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2 - // Y0 -- smull v26.4s, v16.4h, v3.h[0] -- smlal v26.4s, v17.4h, v4.h[0] -- smlal v26.4s, v18.4h, v5.h[0] -- smull2 v27.4s, v16.8h, v3.h[0] -- smlal2 v27.4s, v17.8h, v4.h[0] -- smlal2 v27.4s, v18.8h, v5.h[0] -+ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] - // Y1 -- smull v28.4s, v20.4h, v3.h[0] -- smlal v28.4s, v21.4h, v4.h[0] -- smlal v28.4s, v22.4h, v5.h[0] -- smull2 v29.4s, v20.8h, v3.h[0] -- smlal2 v29.4s, v21.8h, v4.h[0] -- smlal2 v29.4s, v22.8h, v5.h[0] -+ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] - - ld3 {v0.16b, v1.16b, v2.16b}, [x10], #48 - -- shrn v26.4h, v26.4s, #12 -- shrn2 v26.8h, v27.4s, #12 -- add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -- sqrshrun v26.8b, v26.8h, #3 -- shrn v28.4h, v28.4s, #12 -- shrn2 v28.8h, v29.4s, #12 -- add v28.8h, v28.8h, v6.8h -- sqrshrun2 v26.16b, v28.8h, #3 -- // Y0/Y1 -+ SHRN_Y v26, v26, v27, v28, v29, v6 - - subs w9, w9, #16 - -@@ -433,40 +372,15 @@ function ff_bgr24toyv12_aarch64, export=1 - 13: - // Body is simple copy of main loop body minus preload - -- uxtl v16.8h, v0.8b -- uxtl v17.8h, v1.8b -- uxtl v18.8h, v2.8b -- -- uxtl2 v20.8h, v0.16b -- uxtl2 v21.8h, v1.16b -- uxtl2 v22.8h, v2.16b -- -+ XRGB3Y v16, v17, v18, v20, v21, v22, v0, v1, v2 - // Y0 -- smull v26.4s, v16.4h, v3.h[0] -- smlal v26.4s, v17.4h, v4.h[0] -- smlal v26.4s, v18.4h, v5.h[0] -- smull2 v27.4s, v16.8h, v3.h[0] -- smlal2 v27.4s, v17.8h, v4.h[0] -- smlal2 v27.4s, v18.8h, v5.h[0] -+ SMLAL3 v26, v27, v16, v17, v18, v3.h[0], v4.h[0], v5.h[0] - // Y1 -- smull v28.4s, v20.4h, v3.h[0] -- smlal v28.4s, v21.4h, v4.h[0] -- smlal v28.4s, v22.4h, v5.h[0] -- smull2 v29.4s, v20.8h, v3.h[0] -- smlal2 v29.4s, v21.8h, v4.h[0] -- smlal2 v29.4s, v22.8h, v5.h[0] -+ SMLAL3 v28, v29, v20, v21, v22, v3.h[0], v4.h[0], v5.h[0] - - cmp w9, #-16 - -- shrn v26.4h, v26.4s, #12 -- shrn2 v26.8h, v27.4s, #12 -- add v26.8h, v26.8h, v6.8h // +128 (>> 3 = 16) -- sqrshrun v26.8b, v26.8h, #3 -- shrn v28.4h, v28.4s, #12 -- shrn2 v28.8h, v29.4s, #12 -- add v28.8h, v28.8h, v6.8h -- sqrshrun2 v26.16b, v28.8h, #3 -- // Y0/Y1 -+ SHRN_Y v26, v26, v27, v28, v29, v6 - - // Here: - // w9 == 0 width % 16 == 0, tail done -@@ -500,9 +414,9 @@ function ff_bgr24toyv12_aarch64, export=1 - tbz w9, #3, 1f - st1 {v26.8b}, [x11], #8 - 1: tbz w9, #2, 1f -- st1 {v26.s}[2], [x11], #4 -+ STB4V v26, 8, x11 - 1: tbz w9, #1, 1f -- st1 {v26.h}[6], [x11], #2 -+ STB2V v26, 12, x11 - 1: tbz w9, #0, 1f - st1 {v26.b}[14], [x11] - 1: - -From d711e7b9dde2ff557dc28f7456a6986db32ed52b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 10 Aug 2023 08:11:21 +0000 -Subject: [PATCH 161/186] v4l2_req_media: Fix dmabuf fd leak in MMAP mode - ---- - libavcodec/v4l2_req_media.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c -index 1a9944774a48..0394bb2b23f3 100644 ---- a/libavcodec/v4l2_req_media.c -+++ b/libavcodec/v4l2_req_media.c -@@ -1205,8 +1205,10 @@ qe_import_from_buf(struct mediabufs_ctl *const mbc, struct qent_base * const be, - .plane = i, - .flags = O_RDWR, // *** Arguably O_RDONLY would be fine - }; -- if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) -+ if (ioctl(mbc->vfd, VIDIOC_EXPBUF, &xbuf) == 0) { - be->dh[i] = dmabuf_import(xbuf.fd, planes[i].length); -+ close(xbuf.fd); // dmabuf_import dups the fd so close this one -+ } - } - else { - be->dh[i] = dmabuf_import_mmap( - -From 3b3a95a51be4c5187808b55d41d98c71a67b91f6 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 6 Sep 2023 14:36:41 +0100 -Subject: [PATCH 162/186] v4l2m2m_dec: Having calculated available pixfmt - actually pass them to user - ---- - libavcodec/v4l2_m2m_dec.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index c4f38cc24e1f..f67dd23ba1cc 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -1099,6 +1099,7 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) - fmts2[n++] = AV_PIX_FMT_DRM_PRIME; - for (i = 0; i != fmts_n; ++i) { - const enum AVPixelFormat f = ff_v4l2_format_v4l2_to_avfmt(fmts[i], AV_CODEC_ID_RAWVIDEO); -+ av_log(avctx, AV_LOG_TRACE, "VLC pix %s -> %s\n", av_fourcc2str(fmts[i]), av_get_pix_fmt_name(f)); - if (f == AV_PIX_FMT_NONE) - continue; - -@@ -1121,7 +1122,7 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) - fmts2[n - 1] = fmts2[pref_n]; - fmts2[pref_n] = t; - -- gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts); -+ gf_pix_fmt = ff_get_format(avctx, fmts2); - av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", - avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), - avctx->coded_width, avctx->coded_height, - -From 1760b9854cda4fd94d19d2529a71134c69809b98 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 6 Sep 2023 14:45:16 +0100 -Subject: [PATCH 163/186] v4l2m2m: Simplify reinit - also fixes fmt selection - ---- - libavcodec/v4l2_context.c | 41 +++++++++++++++------------------------ - 1 file changed, 16 insertions(+), 25 deletions(-) - -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 978a487ca98d..ed126f8f2b35 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -28,6 +28,7 @@ - #include - #include - #include "libavutil/avassert.h" -+#include "libavutil/pixdesc.h" - #include "libavcodec/avcodec.h" - #include "decode.h" - #include "v4l2_buffers.h" -@@ -357,13 +358,23 @@ static int do_source_change(V4L2m2mContext * const s) - - s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture); - -- av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n", -+ av_log(avctx, AV_LOG_DEBUG, "Source change: Fmt: %s, SAR: %d/%d, wxh %dx%d crop %dx%d @ %d,%d, reinit=%d\n", -+ av_fourcc2str(ff_v4l2_get_format_pixelformat(&cap_fmt)), - s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den, - s->capture.width, s->capture.height, - s->capture.selection.width, s->capture.selection.height, - s->capture.selection.left, s->capture.selection.top, reinit); - -- if (reinit) { -+ ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); -+ if (ret) -+ av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n"); -+ s->draining = 0; -+ -+ if (!reinit) { -+ /* Buffers are OK so just stream off to ack */ -+ av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__); -+ } -+ else { - if (avctx) - ret = ff_set_dimensions(s->avctx, - s->capture.selection.width != 0 ? s->capture.selection.width : s->capture.width, -@@ -371,11 +382,7 @@ static int do_source_change(V4L2m2mContext * const s) - if (ret < 0) - av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n"); - -- ret = ff_v4l2_m2m_codec_reinit(s); -- if (ret) { -- av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n"); -- return AVERROR(EINVAL); -- } -+ ff_v4l2_context_release(&s->capture); - - if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) || - s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) { -@@ -388,26 +395,10 @@ static int do_source_change(V4L2m2mContext * const s) - // Update pixel format - should only actually do something on initial change - s->capture.av_pix_fmt = - ff_v4l2_format_v4l2_to_avfmt(ff_v4l2_get_format_pixelformat(&s->capture.format), AV_CODEC_ID_RAWVIDEO); -- if (s->output_drm) { -- avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME; -- avctx->sw_pix_fmt = s->capture.av_pix_fmt; -- } -- else -- avctx->pix_fmt = s->capture.av_pix_fmt; -- -- goto reinit_run; -+ avctx->pix_fmt = s->output_drm ? AV_PIX_FMT_DRM_PRIME : s->capture.av_pix_fmt; -+ avctx->sw_pix_fmt = s->capture.av_pix_fmt; - } - -- /* Buffers are OK so just stream off to ack */ -- av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only - restart decode\n", __func__); -- -- ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF); -- if (ret) -- av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n"); -- s->draining = 0; -- -- /* reinit executed */ --reinit_run: - ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON); - return 1; - } - -From caf47c030e05c78d18db72d3ba979b933c0579e5 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 8 Sep 2023 12:13:38 +0000 -Subject: [PATCH 164/186] v4l2: Add (more) RGB formats to DRM & V4L2 - ---- - libavcodec/v4l2_buffers.c | 33 +++++++++++++++++++++++++++++++++ - libavcodec/v4l2_fmt.c | 8 ++++++++ - 2 files changed, 41 insertions(+) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 8d80d1978830..e7b57322162f 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -390,6 +390,39 @@ static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) - } - - switch (avbuf->context->av_pix_fmt) { -+ case AV_PIX_FMT_0BGR: -+ layer->format = DRM_FORMAT_RGBX8888; -+ break; -+ case AV_PIX_FMT_RGB0: -+ layer->format = DRM_FORMAT_XBGR8888; -+ break; -+ case AV_PIX_FMT_0RGB: -+ layer->format = DRM_FORMAT_BGRX8888; -+ break; -+ case AV_PIX_FMT_BGR0: -+ layer->format = DRM_FORMAT_XRGB8888; -+ break; -+ -+ case AV_PIX_FMT_ABGR: -+ layer->format = DRM_FORMAT_RGBA8888; -+ break; -+ case AV_PIX_FMT_RGBA: -+ layer->format = DRM_FORMAT_ABGR8888; -+ break; -+ case AV_PIX_FMT_ARGB: -+ layer->format = DRM_FORMAT_BGRA8888; -+ break; -+ case AV_PIX_FMT_BGRA: -+ layer->format = DRM_FORMAT_ARGB8888; -+ break; -+ -+ case AV_PIX_FMT_BGR24: -+ layer->format = DRM_FORMAT_BGR888; -+ break; -+ case AV_PIX_FMT_RGB24: -+ layer->format = DRM_FORMAT_RGB888; -+ break; -+ - case AV_PIX_FMT_YUYV422: - - layer->format = DRM_FORMAT_YUYV; -diff --git a/libavcodec/v4l2_fmt.c b/libavcodec/v4l2_fmt.c -index 6df47e3f5a3c..c820a1d5227b 100644 ---- a/libavcodec/v4l2_fmt.c -+++ b/libavcodec/v4l2_fmt.c -@@ -42,6 +42,14 @@ static const struct fmt_conversion { - { AV_FMT(RGB24), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB24) }, - { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGR32) }, - { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(RGB32) }, -+ { AV_FMT(BGR0), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRX32) }, -+ { AV_FMT(RGB0), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBX32) }, -+ { AV_FMT(0BGR), AV_CODEC(RAWVIDEO), V4L2_FMT(XBGR32) }, -+ { AV_FMT(0RGB), AV_CODEC(RAWVIDEO), V4L2_FMT(XRGB32) }, -+ { AV_FMT(BGRA), AV_CODEC(RAWVIDEO), V4L2_FMT(BGRA32) }, -+ { AV_FMT(RGBA), AV_CODEC(RAWVIDEO), V4L2_FMT(RGBA32) }, -+ { AV_FMT(ABGR), AV_CODEC(RAWVIDEO), V4L2_FMT(ABGR32) }, -+ { AV_FMT(ARGB), AV_CODEC(RAWVIDEO), V4L2_FMT(ARGB32) }, - { AV_FMT(GRAY8), AV_CODEC(RAWVIDEO), V4L2_FMT(GREY) }, - { AV_FMT(YUV420P), AV_CODEC(RAWVIDEO), V4L2_FMT(YUV420) }, - { AV_FMT(YUYV422), AV_CODEC(RAWVIDEO), V4L2_FMT(YUYV) }, - -From 975422cbef7048a32ef0f8768a07a96d2d63a42d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 24 Oct 2023 12:54:02 +0100 -Subject: [PATCH 165/186] dmabuf: Use vidbuf_cached for dmabuf allocation - -Gates usage to kernel 6.1.57 and later as that is when the rpivid iommu -patch was merged. - -(cherry picked from commit 9a898f4ea127b30f1ca81eb98dfba3dd101db179) ---- - libavcodec/v4l2_req_dmabufs.c | 73 ++++++++++++++++++++++++++-------- - libavcodec/v4l2_req_dmabufs.h | 1 + - libavcodec/v4l2_request_hevc.c | 49 ++++++++++++----------- - 3 files changed, 83 insertions(+), 40 deletions(-) - -diff --git a/libavcodec/v4l2_req_dmabufs.c b/libavcodec/v4l2_req_dmabufs.c -index 017c3892a593..9a4b69d3fa5a 100644 ---- a/libavcodec/v4l2_req_dmabufs.c -+++ b/libavcodec/v4l2_req_dmabufs.c -@@ -15,11 +15,12 @@ - #include "v4l2_req_dmabufs.h" - #include "v4l2_req_utils.h" - --#define DMABUF_NAME1 "/dev/dma_heap/linux,cma" --#define DMABUF_NAME2 "/dev/dma_heap/reserved" -- - #define TRACE_ALLOC 0 - -+#ifndef __O_CLOEXEC -+#define __O_CLOEXEC 0 -+#endif -+ - struct dmabufs_ctl; - struct dmabuf_h; - -@@ -297,23 +298,33 @@ struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc) - // - // Alloc dmabuf via CMA - --static int ctl_cma_new(struct dmabufs_ctl * dbsc) -+static int ctl_cma_new2(struct dmabufs_ctl * dbsc, const char * const * names) - { -- while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 && -- errno == EINTR) -- /* Loop */; -- -- if (dbsc->fd == -1) { -- while ((dbsc->fd = open(DMABUF_NAME2, O_RDWR)) == -1 && -+ for (; *names != NULL; ++names) -+ { -+ while ((dbsc->fd = open(*names, O_RDWR | __O_CLOEXEC)) == -1 && - errno == EINTR) - /* Loop */; -- if (dbsc->fd == -1) { -- request_log("Unable to open either %s or %s\n", -- DMABUF_NAME1, DMABUF_NAME2); -- return -1; -+ if (dbsc->fd != -1) -+ { -+ request_debug(NULL, "%s: Using dma_heap device %s\n", __func__, *names); -+ return 0; - } -+ request_debug(NULL, "%s: Not using dma_heap device %s: %s\n", __func__, *names, strerror(errno)); - } -- return 0; -+ request_log("Unable to open any dma_heap device\n"); -+ return -1; -+} -+ -+static int ctl_cma_new(struct dmabufs_ctl * dbsc) -+{ -+ static const char * const names[] = { -+ "/dev/dma_heap/linux,cma", -+ "/dev/dma_heap/reserved", -+ NULL -+ }; -+ -+ return ctl_cma_new2(dbsc, names); - } - - static void ctl_cma_free(struct dmabufs_ctl * dbsc) -@@ -321,7 +332,6 @@ static void ctl_cma_free(struct dmabufs_ctl * dbsc) - if (dbsc->fd != -1) - while (close(dbsc->fd) == -1 && errno == EINTR) - /* loop */; -- - } - - static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, size_t size) -@@ -347,6 +357,10 @@ static int buf_cma_alloc(struct dmabufs_ctl * const dbsc, struct dmabuf_h * dh, - - dh->fd = data.fd; - dh->size = (size_t)data.len; -+ -+// fprintf(stderr, "%s: size=%#zx, ftell=%#zx\n", __func__, -+// dh->size, (size_t)lseek(dh->fd, 0, SEEK_END)); -+ - return 0; - } - -@@ -364,7 +378,32 @@ static const struct dmabuf_fns dmabuf_cma_fns = { - - struct dmabufs_ctl * dmabufs_ctl_new(void) - { -- request_debug(NULL, "Dmabufs using CMA\n");; -+ request_debug(NULL, "Dmabufs using CMA\n"); - return dmabufs_ctl_new2(&dmabuf_cma_fns); - } - -+static int ctl_cma_new_vidbuf_cached(struct dmabufs_ctl * dbsc) -+{ -+ static const char * const names[] = { -+ "/dev/dma_heap/vidbuf_cached", -+ "/dev/dma_heap/linux,cma", -+ "/dev/dma_heap/reserved", -+ NULL -+ }; -+ -+ return ctl_cma_new2(dbsc, names); -+} -+ -+static const struct dmabuf_fns dmabuf_vidbuf_cached_fns = { -+ .buf_alloc = buf_cma_alloc, -+ .buf_free = buf_cma_free, -+ .ctl_new = ctl_cma_new_vidbuf_cached, -+ .ctl_free = ctl_cma_free, -+}; -+ -+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void) -+{ -+ request_debug(NULL, "Dmabufs using Vidbuf\n"); -+ return dmabufs_ctl_new2(&dmabuf_vidbuf_cached_fns); -+} -+ -diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h -index 381ba2708da6..8c1ab0b5df5e 100644 ---- a/libavcodec/v4l2_req_dmabufs.h -+++ b/libavcodec/v4l2_req_dmabufs.h -@@ -7,6 +7,7 @@ struct dmabufs_ctl; - struct dmabuf_h; - - struct dmabufs_ctl * dmabufs_ctl_new(void); -+struct dmabufs_ctl * dmabufs_ctl_new_vidbuf_cached(void); - void dmabufs_ctl_unref(struct dmabufs_ctl ** const pdbsc); - struct dmabufs_ctl * dmabufs_ctl_ref(struct dmabufs_ctl * const dbsc); - -diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c -index db7ed13b6d76..5b37319d6aa9 100644 ---- a/libavcodec/v4l2_request_hevc.c -+++ b/libavcodec/v4l2_request_hevc.c -@@ -176,17 +176,6 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n", - decdev_media_path(decdev), decdev_video_path(decdev)); - -- if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) { -- av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n"); -- src_memtype = MEDIABUFS_MEMORY_MMAP; -- dst_memtype = MEDIABUFS_MEMORY_MMAP; -- } -- else { -- av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n"); -- src_memtype = MEDIABUFS_MEMORY_DMABUF; -- dst_memtype = MEDIABUFS_MEMORY_DMABUF; -- } -- - if ((ctx->pq = pollqueue_new()) == NULL) { - av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n"); - goto fail1; -@@ -202,6 +191,25 @@ static int v4l2_request_hevc_init(AVCodecContext *avctx) - goto fail3; - } - -+ // Version test for functional Pi5 HEVC iommu. -+ // rpivid kernel patch was merged in 6.1.57 -+ // *** Remove when it is unlikely that there are any broken kernels left -+ if (mediabufs_ctl_driver_version(ctx->mbufs) >= MEDIABUFS_DRIVER_VERSION(6,1,57)) -+ ctx->dbufs = dmabufs_ctl_new_vidbuf_cached(); -+ else -+ ctx->dbufs = dmabufs_ctl_new(); -+ -+ if (ctx->dbufs == NULL) { -+ av_log(avctx, AV_LOG_DEBUG, "Unable to open dmabufs - try mmap buffers\n"); -+ src_memtype = MEDIABUFS_MEMORY_MMAP; -+ dst_memtype = MEDIABUFS_MEMORY_MMAP; -+ } -+ else { -+ av_log(avctx, AV_LOG_DEBUG, "Dmabufs opened - try dmabuf buffers\n"); -+ src_memtype = MEDIABUFS_MEMORY_DMABUF; -+ dst_memtype = MEDIABUFS_MEMORY_DMABUF; -+ } -+ - // Ask for an initial bitbuf size of max size / 4 - // We will realloc if we need more - // Must use sps->h/w as avctx contains cropped size -@@ -229,23 +237,15 @@ retry_src_memtype: - goto fail4; - } - -- if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) { -- av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n"); -+ if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) - ctx->fns = &V2(ff_v4l2_req_hevc, 4); -- } - #if CONFIG_V4L2_REQ_HEVC_VX -- else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { -- av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n"); -+ else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) - ctx->fns = &V2(ff_v4l2_req_hevc, 3); -- } -- else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) { -- av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n"); -+ else if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) - ctx->fns = &V2(ff_v4l2_req_hevc, 2); -- } -- else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) { -- av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n"); -+ else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) - ctx->fns = &V2(ff_v4l2_req_hevc, 1); -- } - #endif - else { - av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n"); -@@ -253,6 +253,9 @@ retry_src_memtype: - goto fail4; - } - -+ av_log(avctx, AV_LOG_DEBUG, "%s probed successfully: driver v %#x\n", -+ ctx->fns->name, mediabufs_ctl_driver_version(ctx->mbufs)); -+ - if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) { - char tbuf1[5]; - av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height); - -From dd548f4cf1828f957b6d72021233813c49ac5c7c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 11 Jan 2024 18:27:35 +0000 -Subject: [PATCH 166/186] v4l2_m2m_dec: Move drm_prime hwframecontext setup to - after 1st frame - -(cherry picked from commit 2b9675fdbb3c99a08055366a3a9a216d1369ec9c) ---- - libavcodec/v4l2_buffers.c | 4 +++ - libavcodec/v4l2_context.c | 63 ++++++++++++++++++++++++--------------- - libavcodec/v4l2_context.h | 8 +++++ - 3 files changed, 51 insertions(+), 24 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index e7b57322162f..e412636a7a13 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -607,6 +607,10 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) - - if (buf_to_m2mctx(avbuf)->output_drm) { - /* 1. get references to the actual data */ -+ const int rv = ff_v4l2_context_frames_set(avbuf->context); -+ if (rv != 0) -+ return rv; -+ - frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf); - frame->format = AV_PIX_FMT_DRM_PRIME; - frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref); -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index ed126f8f2b35..a01a105892ab 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -1227,6 +1227,42 @@ fail_release: - return ret; - } - -+int ff_v4l2_context_frames_set(V4L2Context *const ctx) -+{ -+ AVHWFramesContext *hwframes; -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -+ const int w = ctx->width != 0 ? ctx->width : s->avctx->width; -+ const int h = ctx->height != 0 ? ctx->height : s->avctx->height; -+ int ret; -+ -+ if (ctx->frames_ref != NULL) { -+ const AVHWFramesContext * const hwf = (AVHWFramesContext*)ctx->frames_ref->data; -+ if (hwf->sw_format == ctx->av_pix_fmt && hwf->width == w && hwf->height == h) -+ return 0; -+ av_buffer_unref(&ctx->frames_ref); -+ } -+ -+ ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref); -+ if (!ctx->frames_ref) -+ return AVERROR(ENOMEM); -+ -+ hwframes = (AVHWFramesContext*)ctx->frames_ref->data; -+ hwframes->format = AV_PIX_FMT_DRM_PRIME; -+ hwframes->sw_format = ctx->av_pix_fmt; -+ hwframes->width = w; -+ hwframes->height = h; -+ ret = av_hwframe_ctx_init(ctx->frames_ref); -+ if (ret < 0) { -+ av_log(s->avctx, AV_LOG_ERROR, "Failed to create hwframes context: %s\n", av_err2str(ret)); -+ av_buffer_unref(&ctx->frames_ref); -+ return ret; -+ } -+ -+ av_log(s->avctx, AV_LOG_DEBUG, "%s: HWFramesContext set to %s, %dx%d\n", __func__, -+ av_get_pix_fmt_name(ctx->av_pix_fmt), w, h); -+ return 0; -+} -+ - int ff_v4l2_context_init(V4L2Context* ctx) - { - struct v4l2_queryctrl qctrl; -@@ -1245,30 +1281,11 @@ int ff_v4l2_context_init(V4L2Context* ctx) - pthread_cond_init(&ctx->cond, NULL); - atomic_init(&ctx->q_count, 0); - -- if (s->output_drm) { -- AVHWFramesContext *hwframes; -- -- ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref); -- if (!ctx->frames_ref) { -- ret = AVERROR(ENOMEM); -- goto fail_unlock; -- } -- -- hwframes = (AVHWFramesContext*)ctx->frames_ref->data; -- hwframes->format = AV_PIX_FMT_DRM_PRIME; -- hwframes->sw_format = ctx->av_pix_fmt; -- hwframes->width = ctx->width != 0 ? ctx->width : s->avctx->width; -- hwframes->height = ctx->height != 0 ? ctx->height : s->avctx->height; -- ret = av_hwframe_ctx_init(ctx->frames_ref); -- if (ret < 0) -- goto fail_unref_hwframes; -- } -- - ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); - if (ret) { - ret = AVERROR(errno); - av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret)); -- goto fail_unref_hwframes; -+ goto fail_unlock; - } - - memset(&qctrl, 0, sizeof(qctrl)); -@@ -1277,7 +1294,7 @@ int ff_v4l2_context_init(V4L2Context* ctx) - ret = AVERROR(errno); - if (ret != AVERROR(EINVAL)) { - av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret)); -- goto fail_unref_hwframes; -+ goto fail_unlock; - } - // Control unsupported - set default if wanted - if (ctx->num_buffers < 2) -@@ -1291,12 +1308,10 @@ int ff_v4l2_context_init(V4L2Context* ctx) - - ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); - if (ret < 0) -- goto fail_unref_hwframes; -+ goto fail_unlock; - - return 0; - --fail_unref_hwframes: -- av_buffer_unref(&ctx->frames_ref); - fail_unlock: - ff_mutex_destroy(&ctx->lock); - return ret; -diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index f4240f7dddb2..9f1c05a918ff 100644 ---- a/libavcodec/v4l2_context.h -+++ b/libavcodec/v4l2_context.h -@@ -134,6 +134,14 @@ typedef struct V4L2Context { - */ - int ff_v4l2_context_init(V4L2Context* ctx); - -+/** -+ * (re)set the hwframecontext from the current v4l2 context -+ * -+ * @param[in] ctx A pointer to a V4L2Context. See V4L2Context description for required variables. -+ * @return 0 in case of success, a negative value representing the error otherwise. -+ */ -+int ff_v4l2_context_frames_set(V4L2Context *const ctx); -+ - /** - * Sets the V4L2Context format in the v4l2 driver. - * - -From 08c71f5f211216b2f9c5b5317682c639cf6c300f Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 12 Jan 2024 15:17:43 +0000 -Subject: [PATCH 167/186] vf_bwdif: Add capability to deinterlace NV12 - -As bwdif takes no account of horizontally adjacent pixels the same -code can be used on planes that have multiple components as is used -on single component planes. Update the filtering code to cope with -multi-component planes and add NV12 to the list of supported formats. - -Signed-off-by: John Cox -(cherry picked from commit 38338fe9123a01210695e63f05e929f53d6868ff) ---- - libavfilter/vf_bwdif.c | 16 +++++++++++++--- - 1 file changed, 13 insertions(+), 3 deletions(-) - -diff --git a/libavfilter/vf_bwdif.c b/libavfilter/vf_bwdif.c -index 9847d38b6a63..4d69b3039ddf 100644 ---- a/libavfilter/vf_bwdif.c -+++ b/libavfilter/vf_bwdif.c -@@ -302,19 +302,28 @@ static void filter(AVFilterContext *ctx, AVFrame *dstpic, - YADIFContext *yadif = &bwdif->yadif; - ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff }; - int i; -+ int last_plane = -1; - - for (i = 0; i < yadif->csp->nb_components; i++) { - int w = dstpic->width; - int h = dstpic->height; -+ const AVComponentDescriptor * const comp = yadif->csp->comp + i; -+ -+ // If the last plane was the same as this plane assume we've dealt -+ // with all the pels already -+ if (last_plane == comp->plane) -+ continue; -+ last_plane = comp->plane; - - if (i == 1 || i == 2) { - w = AV_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w); - h = AV_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h); - } - -- td.w = w; -- td.h = h; -- td.plane = i; -+ // comp step is in bytes but td.w is in pels -+ td.w = w * comp->step / ((comp->depth + 7) / 8); -+ td.h = h; -+ td.plane = comp->plane; - - ff_filter_execute(ctx, filter_slice, &td, NULL, - FFMIN((h+3)/4, ff_filter_get_nb_threads(ctx))); -@@ -350,6 +359,7 @@ static const enum AVPixelFormat pix_fmts[] = { - AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, - AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, - AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, -+ AV_PIX_FMT_NV12, - AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, - AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, - AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP16, - -From 5b910aca795d4a352697fa749fce720a3643178d Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 12 Jan 2024 16:46:27 +0000 -Subject: [PATCH 168/186] v4l2_m2m_dec: Try to accomodate ffmpegs ideas about - default s/w fmts - -(cherry picked from commit c61de480d628ad60292f3695d7d29b9edd880be3) ---- - libavcodec/v4l2_m2m_dec.c | 21 ++++++++++++++------- - 1 file changed, 14 insertions(+), 7 deletions(-) - -diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index f67dd23ba1cc..38832230794d 100644 ---- a/libavcodec/v4l2_m2m_dec.c -+++ b/libavcodec/v4l2_m2m_dec.c -@@ -1079,7 +1079,6 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) - unsigned int fmts_n; - uint32_t *fmts = ff_v4l2_context_enum_drm_formats(&s->capture, &fmts_n); - enum AVPixelFormat *fmts2 = NULL; -- enum AVPixelFormat t; - enum AVPixelFormat gf_pix_fmt; - unsigned int i; - unsigned int n = 0; -@@ -1089,7 +1088,7 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) - if (!fmts) - return AVERROR(ENOENT); - -- if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 2))) == NULL) { -+ if ((fmts2 = av_malloc(sizeof(*fmts2) * (fmts_n + 3))) == NULL) { - rv = AVERROR(ENOMEM); - goto error; - } -@@ -1110,17 +1109,25 @@ choose_capture_format(AVCodecContext * const avctx, V4L2m2mContext * const s) - pref_n = n; - fmts2[n++] = f; - } -- fmts2[n] = AV_PIX_FMT_NONE; - - if (n < 2) { - av_log(avctx, AV_LOG_DEBUG, "%s: No usable formats found\n", __func__); - goto error; - } - -- // Put preferred s/w format at the end - ff_get_format will put it in sw_pix_fmt -- t = fmts2[n - 1]; -- fmts2[n - 1] = fmts2[pref_n]; -- fmts2[pref_n] = t; -+ if (n != 2) { -+ // ffmpeg.c really only expects one s/w format. It thinks that the -+ // last format in the list is the s/w format of the h/w format but -+ // also chooses the first non-h/w format as the preferred s/w format. -+ // The only way of reconciling this is to dup our preferred format into -+ // both last & first place :-( -+ const enum AVPixelFormat t = fmts2[pref_n]; -+ fmts2[pref_n] = fmts2[1]; -+ fmts2[1] = t; -+ fmts2[n++] = t; -+ } -+ -+ fmts2[n] = AV_PIX_FMT_NONE; - - gf_pix_fmt = ff_get_format(avctx, fmts2); - av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s) %dx%d; get_format requested=%d (%s)\n", - -From d140f93c6d1c54c6bb28ef31051c77e6a3ac8c78 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 18 Jan 2024 15:57:30 +0000 -Subject: [PATCH 169/186] v4l2_m2m_dec: Fix cma allocated s/w output - -(cherry picked from commit 87cefe4ddad7c36faad052e12268d7e05c5b694a) ---- - libavcodec/v4l2_buffers.c | 33 +++++++++++++++++++++++---------- - 1 file changed, 23 insertions(+), 10 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index e412636a7a13..b3ef74bcd48c 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -486,6 +486,11 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data) - // Buffer still attached to context - V4L2m2mContext *s = buf_to_m2mctx(avbuf); - -+ if (!s->output_drm && avbuf->dmabuf[0] != NULL) { -+ for (unsigned int i = 0; i != avbuf->num_planes; ++i) -+ dmabuf_read_end(avbuf->dmabuf[i]); -+ } -+ - ff_mutex_lock(&ctx->lock); - - ff_v4l2_buffer_set_avail(avbuf); -@@ -533,6 +538,9 @@ static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) - avbuf->buf.m.planes[i].m.fd = dma_fd; - else - avbuf->buf.m.fd = dma_fd; -+ -+ if (!s->output_drm) -+ avbuf->plane_info[i].mm_addr = dmabuf_map(avbuf->dmabuf[i]); - } - else { - struct v4l2_exportbuffer expbuf; -@@ -647,6 +655,11 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) - break; - } - -+ if (avbuf->dmabuf[0] != NULL) { -+ for (unsigned int i = 0; i != avbuf->num_planes; ++i) -+ dmabuf_read_start(avbuf->dmabuf[i]); -+ } -+ - return 0; - } - -@@ -947,6 +960,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); - AVBufferRef * bufref; - V4L2m2mContext * const s = ctx_to_m2mctx(ctx); -+ int want_mmap; - - *pbufref = NULL; - if (avbuf == NULL) -@@ -988,10 +1002,10 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - } else - avbuf->num_planes = 1; - -- for (i = 0; i < avbuf->num_planes; i++) { -- const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && -- (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); -+ want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && -+ (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); - -+ for (i = 0; i < avbuf->num_planes; i++) { - avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? - ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline : - ctx->format.fmt.pix.bytesperline; -@@ -1032,13 +1046,12 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - avbuf->buf.length = avbuf->planes[0].length; - } - -- if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) { -- if (s->output_drm) { -- ret = v4l2_buffer_export_drm(avbuf); -- if (ret) { -- av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n"); -- goto fail; -- } -+ if (!want_mmap) { -+ // export_drm does dmabuf alloc if we aren't using v4l2 alloc -+ ret = v4l2_buffer_export_drm(avbuf); -+ if (ret) { -+ av_log(logger(avbuf), AV_LOG_ERROR, "Failed to get exported drm handles\n"); -+ goto fail; - } - } - - -From 79d7b3b96768de5d65cdee2b6cd2e91b827f0776 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 29 Jan 2024 15:12:34 +0000 -Subject: [PATCH 170/186] v4l2_req: Fix media pool delete race - -fds & polltasks associated with media fds that are still in flight are -not freed on delete but the main pool is leading to use after free when -they finally do complete. Stop scanning the free chain on delete and -simply delete everything, in-flight or not. This requires changing alloc -as the buffers weren't previously tracked in-flight. - -(cherry picked from commit 6599b6d1fa1c75d295d5f568a48f8d250250fb7c) ---- - libavcodec/v4l2_req_media.c | 38 ++++++++++++++++++++----------------- - 1 file changed, 21 insertions(+), 17 deletions(-) - -diff --git a/libavcodec/v4l2_req_media.c b/libavcodec/v4l2_req_media.c -index 0394bb2b23f3..c94cc5b0f684 100644 ---- a/libavcodec/v4l2_req_media.c -+++ b/libavcodec/v4l2_req_media.c -@@ -86,6 +86,8 @@ struct media_pool { - int fd; - sem_t sem; - pthread_mutex_t lock; -+ unsigned int pool_n; -+ struct media_request * pool_reqs; - struct media_request * free_reqs; - struct pollqueue * pq; - }; -@@ -251,18 +253,17 @@ int media_request_abort(struct media_request ** const preq) - return 0; - } - --static void delete_req_chain(struct media_request * const chain) -+static void free_req_pool(struct media_request * const pool, const unsigned int n) - { -- struct media_request * next = chain; -- while (next) { -- struct media_request * const req = next; -- next = req->next; -+ unsigned int i; -+ for (i = 0; i != n; ++i) { -+ struct media_request * const req = pool + i; - if (req->pt) - polltask_delete(&req->pt); - if (req->fd != -1) - close(req->fd); -- free(req); - } -+ free(pool); - } - - struct media_pool * media_pool_new(const char * const media_path, -@@ -283,17 +284,16 @@ struct media_pool * media_pool_new(const char * const media_path, - goto fail1; - } - -+ if ((mp->pool_reqs = calloc(n, sizeof(*mp->pool_reqs))) == NULL) -+ goto fail3; -+ mp->pool_n = n; - for (i = 0; i != n; ++i) { -- struct media_request * req = malloc(sizeof(*req)); -- if (!req) -- goto fail4; -+ mp->pool_reqs[i].mp = mp; -+ mp->pool_reqs[i].fd = -1; -+ } - -- *req = (struct media_request){ -- .next = mp->free_reqs, -- .mp = mp, -- .fd = -1 -- }; -- mp->free_reqs = req; -+ for (i = 0; i != n; ++i) { -+ struct media_request * const req = mp->pool_reqs + i; - - if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) { - request_log("Failed to alloc request %d: %s\n", i, strerror(errno)); -@@ -303,6 +303,9 @@ struct media_pool * media_pool_new(const char * const media_path, - req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req); - if (!req->pt) - goto fail4; -+ -+ req->next = mp->free_reqs, -+ mp->free_reqs = req; - } - - sem_init(&mp->sem, 0, n); -@@ -310,7 +313,8 @@ struct media_pool * media_pool_new(const char * const media_path, - return mp; - - fail4: -- delete_req_chain(mp->free_reqs); -+ free_req_pool(mp->pool_reqs, mp->pool_n); -+fail3: - close(mp->fd); - pthread_mutex_destroy(&mp->lock); - fail1: -@@ -327,7 +331,7 @@ void media_pool_delete(struct media_pool ** pMp) - return; - *pMp = NULL; - -- delete_req_chain(mp->free_reqs); -+ free_req_pool(mp->pool_reqs, mp->pool_n); - close(mp->fd); - sem_destroy(&mp->sem); - pthread_mutex_destroy(&mp->lock); - -From 041844bf65a92134d2d4cb4036b6a31ce1bf2693 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 30 Jan 2024 14:24:59 +0000 -Subject: [PATCH 171/186] drm_vout: Fix connector etc. desc memory leak - -(cherry picked from commit 2f95ad366697901acb114a6d2a45810180f3652d) ---- - libavdevice/drm_vout.c | 7 +++++++ - 1 file changed, 7 insertions(+) - -diff --git a/libavdevice/drm_vout.c b/libavdevice/drm_vout.c -index 491e1dc60861..275748abdcee 100644 ---- a/libavdevice/drm_vout.c -+++ b/libavdevice/drm_vout.c -@@ -501,6 +501,13 @@ static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm - crtc ? crtc->height : 0, - (s->conId == (int)con->connector_id ? - " (chosen)" : "")); -+ -+ if (crtc) -+ drmModeFreeCrtc(crtc); -+ if (enc) -+ drmModeFreeEncoder(enc); -+ if (con) -+ drmModeFreeConnector(con); - } - - if (!s->conId) { - -From 3300e10481d711b3a872cf0b9a13bd4009011b0b Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 30 Jan 2024 16:20:53 +0000 -Subject: [PATCH 172/186] conf_native: Add --tsan option - -(cherry picked from commit 3b5aa0d31bd420f8a642f6fc7919674b8a5d5b31) ---- - pi-util/conf_native.sh | 11 +++++++++-- - 1 file changed, 9 insertions(+), 2 deletions(-) - -diff --git a/pi-util/conf_native.sh b/pi-util/conf_native.sh -index f0ed1595948b..0dbaa53e97e0 100755 ---- a/pi-util/conf_native.sh -+++ b/pi-util/conf_native.sh -@@ -10,6 +10,8 @@ RPI_KEEPS="" - NOSHARED= - MMAL= - USR_PREFIX= -+TOOLCHAIN= -+R=rel - - while [ "$1" != "" ] ; do - case $1 in -@@ -22,6 +24,10 @@ while [ "$1" != "" ] ; do - --usr) - USR_PREFIX=/usr - ;; -+ --tsan) -+ TOOLCHAIN="--toolchain=gcc-tsan" -+ R=tsan -+ ;; - *) - echo "Usage $0: [--noshared] [--mmal] [--usr]" - echo " noshared Build static libs and executable - good for testing" -@@ -82,11 +88,11 @@ V=`cat RELEASE` - SHARED_LIBS="--enable-shared" - if [ $NOSHARED ]; then - SHARED_LIBS="--disable-shared" -- OUT=$BUILDBASE/$B-$C-$V-static-rel -+ OUT=$BUILDBASE/$B-$C-$V-static-$R - echo Static libs - else - echo Shared libs -- OUT=$BUILDBASE/$B-$C-$V-shared-rel -+ OUT=$BUILDBASE/$B-$C-$V-shared-$R - fi - - if [ ! $USR_PREFIX ]; then -@@ -106,6 +112,7 @@ $FFSRC/configure \ - --libdir=$LIB_PREFIX\ - --incdir=$INC_PREFIX\ - $MCOPTS\ -+ $TOOLCHAIN\ - --disable-stripping\ - --disable-thumb\ - --enable-sand\ - -From 8f62193afd1823882eee4d7ce81b989dd881640c Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 30 Jan 2024 16:25:53 +0000 -Subject: [PATCH 173/186] v4l2_m2m: Rework use of ctx->lock to avoid use while - uninit - -(cherry picked from commit 024508c338bd707f0a9d34cdf660984171da1a6a) ---- - libavcodec/v4l2_buffers.c | 9 ++-- - libavcodec/v4l2_context.c | 86 +++++++++++++++++++++++++++------------ - 2 files changed, 63 insertions(+), 32 deletions(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index b3ef74bcd48c..e844a1a0b67d 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -484,7 +484,7 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data) - - if (ctx != NULL) { - // Buffer still attached to context -- V4L2m2mContext *s = buf_to_m2mctx(avbuf); -+ V4L2m2mContext * const s = ctx_to_m2mctx(ctx); - - if (!s->output_drm && avbuf->dmabuf[0] != NULL) { - for (unsigned int i = 0; i != avbuf->num_planes; ++i) -@@ -494,15 +494,14 @@ static void v4l2_free_bufref(void *opaque, uint8_t *data) - ff_mutex_lock(&ctx->lock); - - ff_v4l2_buffer_set_avail(avbuf); -+ avbuf->buf.timestamp.tv_sec = 0; -+ avbuf->buf.timestamp.tv_usec = 0; - -- if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) { -+ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { - av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name); -- /* no need to queue more buffers to the driver */ - } - else if (ctx->streamon) { - av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name); -- avbuf->buf.timestamp.tv_sec = 0; -- avbuf->buf.timestamp.tv_usec = 0; - ff_v4l2_buffer_enqueue(avbuf); // will set to IN_DRIVER - } - else { -diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index a01a105892ab..0d61a432c3c8 100644 ---- a/libavcodec/v4l2_context.c -+++ b/libavcodec/v4l2_context.c -@@ -906,56 +906,88 @@ static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx) - } - } - -+ ff_mutex_lock(&ctx->lock); - for (i = 0; i < ctx->num_buffers; ++i) { - struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; - if (buf->status == V4L2BUF_AVAILABLE) { - rv = ff_v4l2_buffer_enqueue(buf); - if (rv < 0) -- return rv; -+ break; - } - } -- return 0; -+ ff_mutex_unlock(&ctx->lock); -+ return rv; - } - --int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) -+static int set_streamon(AVCodecContext * const avctx, V4L2Context*const ctx) - { - int type = ctx->type; - int ret = 0; -- AVCodecContext * const avctx = logger(ctx); -+ -+ if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) -+ stuff_all_buffers(avctx, ctx); -+ -+ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMON, &type) < 0) { -+ ret = AVERROR(errno); -+ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name, -+ av_err2str(ret)); -+ return ret; -+ } -+ -+ ctx->first_buf = 1; -+ ctx->streamon = 1; -+ ctx->flag_last = 0; -+ av_log(avctx, AV_LOG_DEBUG, "%s set status ON OK\n", ctx->name); -+ return ret; -+} -+ -+static int set_streamoff(AVCodecContext * const avctx, V4L2Context*const ctx) -+{ -+ int type = ctx->type; -+ int ret = 0; -+ const int has_bufs = ctx_buffers_alloced(ctx); - - // Avoid doing anything if there is nothing we can do -- if (cmd == VIDIOC_STREAMOFF && !ctx_buffers_alloced(ctx) && !ctx->streamon) -+ if (!has_bufs && !ctx->streamon) - return 0; - -- ff_mutex_lock(&ctx->lock); -- -- if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type)) -- stuff_all_buffers(avctx, ctx); -+ if (has_bufs) -+ ff_mutex_lock(&ctx->lock); - -- if (ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type) < 0) { -- const int err = errno; -- av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name, -- cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err); -- ret = AVERROR(err); -+ if (ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_STREAMOFF, &type) < 0) { -+ ret = AVERROR(errno); -+ av_log(avctx, AV_LOG_ERROR, "%s set status ON failed: err=%s\n", ctx->name, -+ av_err2str(ret)); - } -- else -- { -- if (cmd == VIDIOC_STREAMOFF) -- flush_all_buffers_status(ctx); -- else -- ctx->first_buf = 1; -+ else { -+ flush_all_buffers_status(ctx); - -- ctx->streamon = (cmd == VIDIOC_STREAMON); -- av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name, -- cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF"); -+ ctx->streamon = 0; -+ ctx->flag_last = 0; -+ -+ av_log(avctx, AV_LOG_DEBUG, "%s set status OFF OK\n", ctx->name); - } - -- // Both stream off & on effectively clear flag_last -- ctx->flag_last = 0; -+ if (has_bufs) -+ ff_mutex_unlock(&ctx->lock); -+ return ret; -+} - -- ff_mutex_unlock(&ctx->lock); - -- return ret; -+int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd) -+{ -+ AVCodecContext * const avctx = logger(ctx); -+ -+ switch (cmd) { -+ case VIDIOC_STREAMOFF: -+ return set_streamoff(avctx, ctx); -+ case VIDIOC_STREAMON: -+ return set_streamon(avctx, ctx); -+ default: -+ av_log(avctx, AV_LOG_ERROR, "%s: Unexpected cmd: %d\n", __func__, cmd); -+ break; -+ } -+ return AVERROR_BUG; - } - - int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) - -From 76b95699abad71a942280db3b60c7f906b705166 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 1 Feb 2024 18:11:06 +0000 -Subject: [PATCH 174/186] matroskaenc: Fix H264 delayed extradata creation - -(cherry picked from commit bd60b02509168625e12889068e3f0834148334ca) ---- - libavformat/matroskaenc.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c -index 61e4c976ef76..a6a00f03e7c9 100644 ---- a/libavformat/matroskaenc.c -+++ b/libavformat/matroskaenc.c -@@ -1125,7 +1125,7 @@ static int mkv_assemble_native_codecprivate(AVFormatContext *s, AVIOContext *dyn - case AV_CODEC_ID_WAVPACK: - return put_wv_codecpriv(dyn_cp, extradata, extradata_size); - case AV_CODEC_ID_H264: -- if (par->extradata_size) -+ if (extradata_size) - return ff_isom_write_avcc(dyn_cp, extradata, - extradata_size); - else - -From 83da9f5444e5c938bf623622d163c9559aa57cef Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Thu, 1 Feb 2024 18:12:38 +0000 -Subject: [PATCH 175/186] matroskaenc: Assume H264 is Annex B if no extradata - -(cherry picked from commit 7d0cf1279dcf8e97d7dec7f2b7dcd0379b335e3d) ---- - libavformat/matroskaenc.c | 10 ++++++++-- - 1 file changed, 8 insertions(+), 2 deletions(-) - -diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c -index a6a00f03e7c9..131ad31d1bda 100644 ---- a/libavformat/matroskaenc.c -+++ b/libavformat/matroskaenc.c -@@ -3194,9 +3194,15 @@ static int mkv_init(struct AVFormatContext *s) - track->reformat = mkv_reformat_wavpack; - break; - case AV_CODEC_ID_H264: -+ // Default to reformat if no extradata as the only current -+ // encoder which does this is v4l2m2m which needs reformat -+ if (par->extradata_size == 0 || -+ (par->extradata_size > 3 && -+ (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1))) -+ track->reformat = mkv_reformat_h2645; -+ break; - case AV_CODEC_ID_HEVC: -- if ((par->codec_id == AV_CODEC_ID_H264 && par->extradata_size > 0 || -- par->codec_id == AV_CODEC_ID_HEVC && par->extradata_size > 6) && -+ if (par->extradata_size > 6 && - (AV_RB24(par->extradata) == 1 || AV_RB32(par->extradata) == 1)) - track->reformat = mkv_reformat_h2645; - break; - -From 02eec233a033b677709c641fe3f03a2a8714d750 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Fri, 16 Feb 2024 11:50:56 +0000 -Subject: [PATCH 176/186] aarch64/rgb2rgb: Change incorrect SXTX to stxw - -(cherry picked from commit 4c362244e0f1c41d5af6c95a71da7b32029fa982) ---- - libswscale/aarch64/rgb2rgb_neon.S | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/libswscale/aarch64/rgb2rgb_neon.S b/libswscale/aarch64/rgb2rgb_neon.S -index 0956800b4185..38f83a1b7878 100644 ---- a/libswscale/aarch64/rgb2rgb_neon.S -+++ b/libswscale/aarch64/rgb2rgb_neon.S -@@ -338,8 +338,8 @@ function ff_bgr24toyv12_aarch64, export=1 - b.eq 90f - - subs w9, w4, #0 -- add x0, x0, w14, SXTX -- add x1, x1, w6, SXTX -+ add x0, x0, w14, sxtw -+ add x1, x1, w6, sxtw - mov x10, x0 - mov x11, x1 - b.lt 12f -@@ -424,10 +424,10 @@ function ff_bgr24toyv12_aarch64, export=1 - - // ------------------- Loop to start - -- add x0, x0, w14, SXTX -- add x1, x1, w6, SXTX -- add x2, x2, w7, SXTX -- add x3, x3, w7, SXTX -+ add x0, x0, w14, sxtw -+ add x1, x1, w6, sxtw -+ add x2, x2, w7, sxtw -+ add x3, x3, w7, sxtw - subs w5, w5, #1 - b.gt 11b - 90: - -From 1a20c50471f23cd6976c6606ab53be7ef5a9afb7 Mon Sep 17 00:00:00 2001 -From: James Le Cuirot -Date: Sat, 17 Feb 2024 13:29:36 +0000 -Subject: [PATCH 177/186] aarch64/rpi_sand: Fix building under Clang/LLVM - -The "Arm A64 Instruction Set Architecture" manual says that the MOV -(element) instruction takes the form `MOV .[], -.[]`, where `` is one of B, H, S, or D. Only certain -other instructions accept a number in front. GNU as allows you to -include it for any instruction, but this is non-standard. This is -explained at https://stackoverflow.com/questions/71907156. - -(cherry picked from commit ba40fd4ff2de0ced75d713c6aac9cdac2d379625) ---- - libavutil/aarch64/rpi_sand_neon.S | 50 +++++++++++++++---------------- - 1 file changed, 25 insertions(+), 25 deletions(-) - -diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S -index 11658de0c8c2..3a6bc3de74e5 100644 ---- a/libavutil/aarch64/rpi_sand_neon.S -+++ b/libavutil/aarch64/rpi_sand_neon.S -@@ -387,13 +387,13 @@ function ff_rpi_sand30_lines_to_planar_c16, export=1 - st3 {v0.4h - v2.4h}, [x0], #24 - st3 {v16.4h - v18.4h}, [x2], #24 - beq 11b -- mov v0.2d[0], v0.2d[1] -+ mov v0.d[0], v0.d[1] - sub w9, w9, #12 -- mov v1.2d[0], v1.2d[1] -- mov v2.2d[0], v2.2d[1] -- mov v16.2d[0], v16.2d[1] -- mov v17.2d[0], v17.2d[1] -- mov v18.2d[0], v18.2d[1] -+ mov v1.d[0], v1.d[1] -+ mov v2.d[0], v2.d[1] -+ mov v16.d[0], v16.d[1] -+ mov v17.d[0], v17.d[1] -+ mov v18.d[0], v18.d[1] - 1: - cmp w9, #6-48 - blt 1f -@@ -526,28 +526,28 @@ function ff_rpi_sand30_lines_to_planar_y16, export=1 - blt 1f - st3 {v16.4h, v17.4h, v18.4h}, [x0], #24 - beq 11b -- mov v16.2d[0], v16.2d[1] -+ mov v16.d[0], v16.d[1] - sub w5, w5, #12 -- mov v17.2d[0], v17.2d[1] -- mov v18.2d[0], v18.2d[1] -+ mov v17.d[0], v17.d[1] -+ mov v18.d[0], v18.d[1] - 1: - cmp w5, #6-96 - blt 1f - st3 {v16.h, v17.h, v18.h}[0], [x0], #6 - st3 {v16.h, v17.h, v18.h}[1], [x0], #6 - beq 11b -- mov v16.2s[0], v16.2s[1] -+ mov v16.s[0], v16.s[1] - sub w5, w5, #6 -- mov v17.2s[0], v17.2s[1] -- mov v18.2s[0], v18.2s[1] -+ mov v17.s[0], v17.s[1] -+ mov v18.s[0], v18.s[1] - 1: - cmp w5, #3-96 - blt 1f - st3 {v16.h, v17.h, v18.h}[0], [x0], #6 - beq 11b -- mov v16.4h[0], v16.4h[1] -+ mov v16.h[0], v16.h[1] - sub w5, w5, #3 -- mov v17.4h[0], v17.4h[1] -+ mov v17.h[0], v17.h[1] - 1: - cmp w5, #2-96 - blt 1f -@@ -625,10 +625,10 @@ function ff_rpi_sand30_lines_to_planar_y8, export=1 - blt 1f - st3 {v16.8b, v17.8b, v18.8b}, [x0], #24 - beq 11b -- mov v16.2d[0], v16.2d[1] -+ mov v16.d[0], v16.d[1] - sub w5, w5, #24 -- mov v17.2d[0], v17.2d[1] -- mov v18.2d[0], v18.2d[1] -+ mov v17.d[0], v17.d[1] -+ mov v18.d[0], v18.d[1] - 1: - cmp w5, #12-96 - blt 1f -@@ -637,28 +637,28 @@ function ff_rpi_sand30_lines_to_planar_y8, export=1 - st3 {v16.b, v17.b, v18.b}[2], [x0], #3 - st3 {v16.b, v17.b, v18.b}[3], [x0], #3 - beq 11b -- mov v16.2s[0], v16.2s[1] -+ mov v16.s[0], v16.s[1] - sub w5, w5, #12 -- mov v17.2s[0], v17.2s[1] -- mov v18.2s[0], v18.2s[1] -+ mov v17.s[0], v17.s[1] -+ mov v18.s[0], v18.s[1] - 1: - cmp w5, #6-96 - blt 1f - st3 {v16.b, v17.b, v18.b}[0], [x0], #3 - st3 {v16.b, v17.b, v18.b}[1], [x0], #3 - beq 11b -- mov v16.4h[0], v16.4h[1] -+ mov v16.h[0], v16.h[1] - sub w5, w5, #6 -- mov v17.4h[0], v17.4h[1] -- mov v18.4h[0], v18.4h[1] -+ mov v17.h[0], v17.h[1] -+ mov v18.h[0], v18.h[1] - 1: - cmp w5, #3-96 - blt 1f - st3 {v16.b, v17.b, v18.b}[0], [x0], #3 - beq 11b -- mov v16.8b[0], v16.8b[1] -+ mov v16.b[0], v16.b[1] - sub w5, w5, #3 -- mov v17.8b[0], v17.8b[1] -+ mov v17.b[0], v17.b[1] - 1: - cmp w5, #2-96 - blt 1f - -From 09fa999ad4ea877311216081bba5989c4b44349e Mon Sep 17 00:00:00 2001 -From: James Le Cuirot -Date: Sat, 17 Feb 2024 14:37:44 +0000 -Subject: [PATCH 178/186] rtpenc: Fix building with GCC 14 - -This incompatible pointer type issue became a fatal error in GCC 14. The -AVBuffer API started using size_t in 5.0 with ef6a9e5e. - -(cherry picked from commit 5d3c1c0213f2d6fe7b310e65def4c44a6c610b18) ---- - libavformat/rtpenc.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c -index f67dc2a15ae1..1f1e4cb25ad9 100644 ---- a/libavformat/rtpenc.c -+++ b/libavformat/rtpenc.c -@@ -588,7 +588,7 @@ static int rtp_write_packet(AVFormatContext *s1, AVPacket *pkt) - case AV_CODEC_ID_H264: - { - uint8_t *side_data; -- int side_data_size = 0; -+ size_t side_data_size = 0; - - side_data = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, - &side_data_size); - -From 3bf2bc4d72a255aaec4b55bb5fd71258d8d16f67 Mon Sep 17 00:00:00 2001 -From: James Le Cuirot -Date: Sun, 18 Feb 2024 09:18:31 +0000 -Subject: [PATCH 179/186] v4l2_req: Fix building against musl by including - pthread.h - -(cherry picked from commit d4b70cc3ddd24036e0fd59ad562c6844767e314a) ---- - libavcodec/v4l2_req_decode_q.h | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/libavcodec/v4l2_req_decode_q.h b/libavcodec/v4l2_req_decode_q.h -index af7bbe1de462..27eafbc42b27 100644 ---- a/libavcodec/v4l2_req_decode_q.h -+++ b/libavcodec/v4l2_req_decode_q.h -@@ -1,6 +1,8 @@ - #ifndef AVCODEC_V4L2_REQ_DECODE_Q_H - #define AVCODEC_V4L2_REQ_DECODE_Q_H - -+#include -+ - typedef struct req_decode_ent { - struct req_decode_ent * next; - struct req_decode_ent * prev; - -From f4af3f1cbbec0bc6fa355bdaf6dcaa01790640d3 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 27 Feb 2024 12:57:08 +0000 -Subject: [PATCH 180/186] v4l2_buffers: Fix init of drmprime source (OUTPUT) - buffers for encode - -Previous fix for mmaped dmabuf CAPTURE buffers broke this due to failure -to note that dmabuf export wasn't wanted for OUTPUT buffers. - -(cherry picked from commit 21859689f25854eb9b46e1efacacf3eca3bef6e8) ---- - libavcodec/v4l2_buffers.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index e844a1a0b67d..2d1db41a3017 100644 ---- a/libavcodec/v4l2_buffers.c -+++ b/libavcodec/v4l2_buffers.c -@@ -1045,7 +1045,7 @@ int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ct - avbuf->buf.length = avbuf->planes[0].length; - } - -- if (!want_mmap) { -+ if (V4L2_TYPE_IS_CAPTURE(ctx->type) && !want_mmap) { - // export_drm does dmabuf alloc if we aren't using v4l2 alloc - ret = v4l2_buffer_export_drm(avbuf); - if (ret) { - -From 88b5be9784c6f6088d050c09203804142b7b4316 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 9 Jan 2024 09:00:17 +0000 -Subject: [PATCH 181/186] ffconf: Add ability to output yuv files & run - valgrind as part of conform - -(cherry picked from commit 8104e3701b3766976670a1d4afb13704c66a9c94) ---- - pi-util/ffconf.py | 59 ++++++++++++++++++++++++++++++++++++----------- - 1 file changed, 46 insertions(+), 13 deletions(-) - -diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py -index 657568014e57..204e6257fb36 100755 ---- a/pi-util/ffconf.py -+++ b/pi-util/ffconf.py -@@ -14,7 +14,12 @@ HWACCEL_RPI = 2 - HWACCEL_DRM = 3 - HWACCEL_VAAPI = 4 - --def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec): -+def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, args): -+ ffmpeg_exec = args.ffmpeg -+ gen_yuv = args.gen_yuv -+ valgrind = args.valgrind -+ rv = 0 -+ - hwaccel = "" - if dectype == HWACCEL_RPI: - hwaccel = "rpi" -@@ -48,17 +53,29 @@ def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_e - except: - pass - -- flog = open(os.path.join(tmp_root, name + ".log"), "wt") -+ yuv_file = os.path.join(tmp_root, name + ".dec.yuv") -+ try: -+ os.remove(yuv_file) -+ except: -+ pass -+ -+ flog = open(os.path.join(tmp_root, name + ".log"), "w+t") - -- ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file] -+ ffargs = [ffmpeg_exec, "-flags", "unaligned"] +\ -+ (["-hwaccel", hwaccel] if hwaccel else []) +\ -+ ["-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] +\ -+ pix_fmt +\ -+ ([yuv_file] if gen_yuv else ["-f", "md5", dec_file]) -+ -+ if valgrind: -+ ffargs = ['valgrind', '--leak-check=full'] + ffargs - - # Unaligned needed for cropping conformance -- if hwaccel: -- rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT) -- else: -- rstr = subprocess.call( -- [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file], -- stdout=flog, stderr=subprocess.STDOUT) -+ rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT) -+ -+ if gen_yuv: -+ with open(dec_file, 'wt') as f: -+ subprocess.call(["md5sum", yuv_file], stdout=f, stderr=subprocess.STDOUT) - - try: - m1 = None -@@ -74,9 +91,21 @@ def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_e - except: - pass - -+ if valgrind: -+ flog.seek(0) -+ leak = True -+ valerr = True -+ -+ for line in flog: -+ if re.search("^==[0-9]+== All heap blocks were freed", line): -+ leak = False -+ if re.search("^==[0-9]+== ERROR SUMMARY: 0 errors", line): -+ valerr = False -+ if leak or valerr: -+ rv = 4 -+ - if m1 and m2 and m1.group() == m2.group(): - print("Match: " + m1.group(), file=flog) -- rv = 0 - elif not m1: - print("****** Cannot find m1", file=flog) - rv = 3 -@@ -121,7 +150,7 @@ def runtest(name, tests): - return True - return False - --def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec): -+def doconf(csva, tests, test_root, vcodec, dectype, args): - unx_failures = [] - unx_success = [] - failures = 0 -@@ -133,7 +162,7 @@ def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec): - print ("==== ", name, end="") - sys.stdout.flush() - -- rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec) -+ rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, args=args) - if (rv == 0): - successes += 1 - else: -@@ -158,6 +187,8 @@ def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec): - print(": * CRASH *") - elif (rv == 3) : - print(": * MD5 MISSING *") -+ elif (rv == 4) : -+ print(": * VALGRIND *") - else : - print(": * BANG *") - -@@ -189,6 +220,8 @@ if __name__ == '__main__': - argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename") - argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use") - argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") -+ argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests") -+ argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)") - args = argp.parse_args() - - if args.csvgen: -@@ -211,5 +244,5 @@ if __name__ == '__main__': - elif args.vaapi: - dectype = HWACCEL_VAAPI - -- doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg) -+ doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args) - - -From 2a2e3858f09b446ee866c5e94c4d0cb81a176ad7 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 5 Mar 2024 15:47:34 +0000 -Subject: [PATCH 182/186] ffconf: Validate ffmpeg & test_root options rather - than crashing - -(cherry picked from commit c3948731965a10b3d459931f4134dd3d95b463aa) ---- - pi-util/ffconf.py | 12 +++++++++++- - 1 file changed, 11 insertions(+), 1 deletion(-) - -diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py -index 204e6257fb36..71cd8387203f 100755 ---- a/pi-util/ffconf.py -+++ b/pi-util/ffconf.py -@@ -219,11 +219,15 @@ if __name__ == '__main__': - argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir") - argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename") - argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use") -- argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name") -+ argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name; if directory given use /ffmpeg") - argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests") - argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)") - args = argp.parse_args() - -+ if not os.path.isdir(args.test_root): -+ print("Test root dir '%s' not found" % args.test_root) -+ exit(1) -+ - if args.csvgen: - csv.writer(sys.stdout).writerows(scandir(args.test_root)) - exit(0) -@@ -244,5 +248,11 @@ if __name__ == '__main__': - elif args.vaapi: - dectype = HWACCEL_VAAPI - -+ if os.path.isdir(args.ffmpeg): -+ args.ffmpeg = os.path.join(args.ffmpeg, "ffmpeg") -+ if not os.path.isfile(args.ffmpeg): -+ print("FFmpeg file '%s' not found" % args.ffmpeg) -+ exit(1) -+ - doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args) - - -From 3f92b3ddcf6647bb88b585bfa286a77c18b4ee30 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Wed, 6 Mar 2024 11:55:22 +0000 -Subject: [PATCH 183/186] ffconf: Fix expected conformance s.t. - VPSSPSPPS_A_MainConcept_1 fails - -(cherry picked from commit faa8c6afcda58e1ad91eacc4f4ca6b4d467200b3) ---- - pi-util/conf_h265.2016.csv | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv -index 4efd5d1c676d..177f1c8111fd 100644 ---- a/pi-util/conf_h265.2016.csv -+++ b/pi-util/conf_h265.2016.csv -@@ -128,7 +128,7 @@ - 3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10 - 1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8 - 1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8 --3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8 -+2,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8 - 1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10 - 1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8 - 1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8 - -From ad8c5df726d8623d2b8ce00fddb4312c3b871415 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 11 Mar 2024 18:36:51 +0000 -Subject: [PATCH 184/186] ffconf: Add loop option for race testing - ---- - pi-util/ffconf.py | 11 ++++++++++- - 1 file changed, 10 insertions(+), 1 deletion(-) - -diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py -index 71cd8387203f..702461200671 100755 ---- a/pi-util/ffconf.py -+++ b/pi-util/ffconf.py -@@ -198,6 +198,8 @@ def doconf(csva, tests, test_root, vcodec, dectype, args): - else: - print("All tests normal:", successes, "ok,", failures, "failed") - -+ return unx_failures + unx_success -+ - - class ConfCSVDialect(csv.Dialect): - delimiter = ',' -@@ -222,6 +224,7 @@ if __name__ == '__main__': - argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name; if directory given use /ffmpeg") - argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests") - argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)") -+ argp.add_argument("--loop", default=0, type=int, help="Create yuv file (stored with log under /tmp)") - args = argp.parse_args() - - if not os.path.isdir(args.test_root): -@@ -254,5 +257,11 @@ if __name__ == '__main__': - print("FFmpeg file '%s' not found" % args.ffmpeg) - exit(1) - -- doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args) -+ i = 0 -+ while True: -+ i = i + 1 -+ if args.loop: -+ print("== Loop ", i) -+ if doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args) or (args.loop >= 0 and i > args.loop): -+ break - - -From f75376acc36b73deed90ae5f60b53355b9cef599 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Tue, 19 Mar 2024 15:29:02 +0000 -Subject: [PATCH 185/186] pi-util/ffperf: Make ffmpeg run options an option - -Also change the default run args to be a simple s/w decode ---- - pi-util/ffperf.py | 24 ++++++++++++++++++------ - 1 file changed, 18 insertions(+), 6 deletions(-) - -diff --git a/pi-util/ffperf.py b/pi-util/ffperf.py -index 65c5224cd8fb..767efe2de2fa 100755 ---- a/pi-util/ffperf.py -+++ b/pi-util/ffperf.py -@@ -1,5 +1,6 @@ - #!/usr/bin/env python3 - -+import shlex - import time - import string - import os -@@ -36,14 +37,20 @@ class tstats: - def __gt__(self, other): - return self.elapsed > other.elapsed - -- def time_file(name, prefix, ffmpeg="./ffmpeg"): -+ def time_file(name, prefix, args): -+ cmdargs = [args.ffmpeg] -+ for x in args.args : -+ if x == '{INPUT}': -+ cmdargs.append(prefix + name) -+ elif x == '{NULL}': -+ cmdargs.append(os.devnull) -+ else: -+ cmdargs.append(x) -+ - stats = tstats() - stats.name = name - start_time = time.clock_gettime(time.CLOCK_MONOTONIC); -- cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw", -- "-vcodec", "hevc_rpi", -- "-t", "30", "-i", prefix + name, -- "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog); -+ cproc = subprocess.Popen(cmdargs, bufsize=-1, stdout=flog, stderr=flog); - pinfo = os.wait4(cproc.pid, 0) - end_time = time.clock_gettime(time.CLOCK_MONOTONIC); - stats.elapsed = end_time - start_time -@@ -67,6 +74,10 @@ To blank the screen before starting use "xdg-screensaver activate" - """) - - argp.add_argument("streams", nargs='*') -+ argp.add_argument("--args", default='-t 30 -i {INPUT} -f null {NULL}', help=""" -+ffmpeg arguments, default='-t 30 -i {INPUT} -f null {NULL}'; -+ {INPUT} is replaced by current inputfile path; -+ {NULL} is replaced by the system null device""") - argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename") - argp.add_argument("--csv_in", help="CSV input filename") - argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).") -@@ -74,6 +85,7 @@ To blank the screen before starting use "xdg-screensaver activate" - argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable") - - args = argp.parse_args() -+ args.args = shlex.split(args.args) - - csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"]) - csv_out.writeheader() -@@ -107,7 +119,7 @@ To blank the screen before starting use "xdg-screensaver activate" - - t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999}) - for i in range(args.repeat): -- t = tstats.time_file(f, prefix, args.ffmpeg) -+ t = tstats.time_file(f, prefix, args) - print ("...", t.times_str()) - if t0 > t: - t0 = t - -From b87000d0dc80ec8e0cbd4406e62bd64b5519a544 Mon Sep 17 00:00:00 2001 -From: John Cox -Date: Mon, 25 Mar 2024 18:25:47 +0000 -Subject: [PATCH 186/186] pi-util/ffconf: Fix --loop help text - ---- - pi-util/ffconf.py | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/pi-util/ffconf.py b/pi-util/ffconf.py -index 702461200671..26091f3c07c2 100755 ---- a/pi-util/ffconf.py -+++ b/pi-util/ffconf.py -@@ -224,7 +224,7 @@ if __name__ == '__main__': - argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name; if directory given use /ffmpeg") - argp.add_argument("--valgrind", action='store_true', help="Run valgrind on tests") - argp.add_argument("--gen_yuv", action='store_true', help="Create yuv file (stored with log under /tmp)") -- argp.add_argument("--loop", default=0, type=int, help="Create yuv file (stored with log under /tmp)") -+ argp.add_argument("--loop", default=0, type=int, help="Loop n times, or until unexpected result") - args = argp.parse_args() - - if not os.path.isdir(args.test_root): + fate-checkasm-rv34dsp \ + fate-checkasm-rv40dsp \ +diff --git a/tests/fate/filter-video.mak b/tests/fate/filter-video.mak +index 5b8a294afd18..2d67ca68fcaa 100644 +--- a/tests/fate/filter-video.mak ++++ b/tests/fate/filter-video.mak +@@ -391,9 +391,9 @@ fate-filter-fps-down-eof-pass: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3:eo + fate-filter-fps-start-drop: CMD = framecrc -lavfi testsrc2=r=7:d=3.5,fps=3:start_time=1.5 + fate-filter-fps-start-fill: CMD = framecrc -lavfi testsrc2=r=7:d=1.5,setpts=PTS+14,fps=3:start_time=1.5 + +-FATE_FILTER_SAMPLES-$(call FILTERDEMDEC, FPS SCALE, MOV, QTRLE) += fate-filter-fps-cfr fate-filter-fps +-fate-filter-fps-cfr: CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -r 30 -fps_mode cfr -pix_fmt yuv420p +-fate-filter-fps: CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -vf fps=30 -pix_fmt yuv420p ++#FATE_FILTER_SAMPLES-$(call FILTERDEMDEC, FPS SCALE, MOV, QTRLE) += fate-filter-fps-cfr fate-filter-fps ++#fate-filter-fps-cfr: CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -r 30 -fps_mode cfr -pix_fmt yuv420p ++#fate-filter-fps: CMD = framecrc -auto_conversion_filters -i $(TARGET_SAMPLES)/qtrle/apple-animation-variable-fps-bug.mov -vf fps=30 -pix_fmt yuv420p + + FATE_FILTER_SAMPLES-$(call FILTERFRAMECRC, TESTSRC2 FSYNC, FILE_PROTOCOL) += fate-filter-fsync-up fate-filter-fsync-down + fate-filter-fsync-up: tests/data/maps/fsync-up +diff --git a/tests/ref/fate/imgutils b/tests/ref/fate/imgutils +index fb2ed6d158fa..61fd261b6088 100644 +--- a/tests/ref/fate/imgutils ++++ b/tests/ref/fate/imgutils +@@ -235,6 +235,9 @@ nv24 planes: 2, linesizes: 64 128 0 0, plane_sizes: 3072 6144 + nv42 planes: 2, linesizes: 64 128 0 0, plane_sizes: 3072 6144 0 0, plane_offsets: 3072 0 0, total_size: 9216 + y210be planes: 1, linesizes: 256 0 0 0, plane_sizes: 12288 0 0 0, plane_offsets: 0 0 0, total_size: 12288 + y210le planes: 1, linesizes: 256 0 0 0, plane_sizes: 12288 0 0 0, plane_offsets: 0 0 0, total_size: 12288 ++sand128 planes: 2, linesizes: 64 64 0 0, plane_sizes: 3072 1536 0 0, plane_offsets: 3072 0 0, total_size: 4608 ++sand64_10 planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 3072 0 0, plane_offsets: 6144 0 0, total_size: 9216 ++sand64_16 planes: 2, linesizes: 128 128 0 0, plane_sizes: 6144 3072 0 0, plane_offsets: 6144 0 0, total_size: 9216 + x2rgb10le planes: 1, linesizes: 256 0 0 0, plane_sizes: 12288 0 0 0, plane_offsets: 0 0 0, total_size: 12288 + x2rgb10be planes: 1, linesizes: 256 0 0 0, plane_sizes: 12288 0 0 0, plane_offsets: 0 0 0, total_size: 12288 + x2bgr10le planes: 1, linesizes: 256 0 0 0, plane_sizes: 12288 0 0 0, plane_offsets: 0 0 0, total_size: 12288 +@@ -451,6 +454,9 @@ nv24 total_size: 9216, black_unknown_crc: 0x1c302b58, black_tv_cr + nv42 total_size: 9216, black_unknown_crc: 0x1c302b58, black_tv_crc: 0x1c302b58, black_pc_crc: 0xdf792ea7 + y210be total_size: 12288, black_unknown_crc: 0x5483d935, black_tv_crc: 0x5483d935, black_pc_crc: 0x06397bf3 + y210le total_size: 12288, black_unknown_crc: 0x5d8e1cf6, black_tv_crc: 0x5d8e1cf6, black_pc_crc: 0x8fceec45 ++sand128 total_size: 4608, black_unknown_crc: 0xd00f6cc6, black_tv_crc: 0xd00f6cc6, black_pc_crc: 0x234969af ++sand64_10 total_size: 9216, black_unknown_crc: 0xee47624d, black_tv_crc: 0xee47624d, black_pc_crc: 0x7c6afe45 ++sand64_16 total_size: 9216, black_unknown_crc: 0xfff85b60, black_tv_crc: 0xfff85b60, black_pc_crc: 0xc03cff93 + x2rgb10le total_size: 12288, black_unknown_crc: 0x00000000, black_tv_crc: 0x00000000, black_pc_crc: 0x00000000 + x2rgb10be total_size: 12288, black_unknown_crc: 0x00000000, black_tv_crc: 0x00000000, black_pc_crc: 0x00000000 + x2bgr10le total_size: 12288, black_unknown_crc: 0x00000000, black_tv_crc: 0x00000000, black_pc_crc: 0x00000000 +diff --git a/tests/ref/fate/source b/tests/ref/fate/source +index 78d3a2e0fa58..a0ab58558556 100644 +--- a/tests/ref/fate/source ++++ b/tests/ref/fate/source +@@ -1,5 +1,8 @@ + Files without standard license headers: + libavcodec/file_open.c ++libavcodec/hevc-ctrls-v1.h ++libavcodec/hevc-ctrls-v2.h ++libavcodec/hevc-ctrls-v3.h + libavcodec/interplayacm.c + libavcodec/log2_tab.c + libavcodec/reverse.c +@@ -26,6 +29,9 @@ compat/float/float.h + compat/float/limits.h + compat/stdbit/stdbit.h + libavcodec/bitstream_template.h ++libavcodec/hevc-ctrls-v1.h ++libavcodec/hevc-ctrls-v2.h ++libavcodec/hevc-ctrls-v3.h + tools/decode_simple.h + Use of av_clip() where av_clip_uintp2() could be used: + Use of av_clip() where av_clip_intp2() could be used: +diff --git a/tests/ref/fate/sws-pixdesc-query b/tests/ref/fate/sws-pixdesc-query +index fff93bbf0efd..96fefd5b8ab7 100644 +--- a/tests/ref/fate/sws-pixdesc-query ++++ b/tests/ref/fate/sws-pixdesc-query +@@ -23,6 +23,7 @@ is16BPS: + rgba64le + rgbaf16be + rgbaf16le ++ sand64_16 + ya16be + ya16le + yuv420p16be +@@ -75,6 +76,7 @@ isNBPS: + p410le + p412be + p412le ++ sand64_10 + x2bgr10be + x2bgr10le + x2rgb10be +@@ -245,6 +247,9 @@ isYUV: + p412le + p416be + p416le ++ sand128 ++ sand64_10 ++ sand64_16 + uyvy422 + uyyvyy411 + vuya +@@ -818,6 +823,9 @@ Packed: + rgbaf32le + rgbf32be + rgbf32le ++ sand128 ++ sand64_10 ++ sand64_16 + uyvy422 + uyyvyy411 + vuya