From 25301226bd1e5cada80e03f0deb3f8cb0d23d9c0 Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Sun, 24 Jul 2022 10:30:47 +0200 Subject: [PATCH] ffmpeg: update rpi patch Patch created using revisions dc91b91..a4f3ca9 from branch dev/4.4/rpi_import_1 of https://github.com/jc-kynesim/rpi-ffmpeg --- .../ffmpeg/patches/rpi/ffmpeg-001-rpi.patch | 4120 +++++++++++++---- 1 file changed, 3229 insertions(+), 891 deletions(-) diff --git a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch index 6809d21783..ca1830666f 100644 --- a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch +++ b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch @@ -375,7 +375,7 @@ index 807e783422..456d4f349b 100644 "write program-readable progress information", "url" }, { "stdin", OPT_BOOL | OPT_EXPERT, { &stdin_interaction }, diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 33a280cf69..ef22d26dc1 100644 +index 33a280cf69..a403dc41d6 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -19,6 +19,7 @@ HEADERS = ac3_parser.h \ @@ -428,7 +428,7 @@ index 33a280cf69..ef22d26dc1 100644 +OBJS-$(CONFIG_HEVC_RPI4_8_HWACCEL) += rpivid_hevc.o +OBJS-$(CONFIG_HEVC_RPI4_10_HWACCEL) += rpivid_hevc.o +OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o\ -+ v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o ++ v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o v4l2_req_hevc_v4.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o h265_profile_level.o OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o @@ -465,19 +465,10 @@ index 33a280cf69..ef22d26dc1 100644 +$(SUBDIR)rpi_hevcdec.o $(SUBDIR)rpi_shader_template.o $(SUBDIR)rpi_qpu.o: $(SUBDIR)rpi_hevc_shader.h +endif diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile -index 954461f81d..7078dc6089 100644 +index 954461f81d..c8935f205e 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile -@@ -35,6 +35,8 @@ ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o - - # subsystems - NEON-OBJS-$(CONFIG_AAC_DECODER) += aarch64/sbrdsp_neon.o -+NEON-OBJS-$(CONFIG_BLOCKDSP) += aarch64/blockdsp_init_aarch64.o \ -+ aarch64/blockdsp_neon.o - NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o - NEON-OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_neon.o - NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o -@@ -44,10 +46,12 @@ NEON-OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_neon.o +@@ -44,10 +44,12 @@ NEON-OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_neon.o NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \ aarch64/hpeldsp_neon.o NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o @@ -491,103 +482,6 @@ index 954461f81d..7078dc6089 100644 NEON-OBJS-$(CONFIG_VP8DSP) += aarch64/vp8dsp_neon.o # decoders/encoders -diff --git a/libavcodec/aarch64/blockdsp_init_aarch64.c b/libavcodec/aarch64/blockdsp_init_aarch64.c -new file mode 100644 -index 0000000000..9f3280f007 ---- /dev/null -+++ b/libavcodec/aarch64/blockdsp_init_aarch64.c -@@ -0,0 +1,42 @@ -+/* -+ * AArch64 NEON optimised block operations -+ * -+ * Copyright (c) 2022 Ben Avison -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include -+ -+#include "libavutil/attributes.h" -+#include "libavutil/cpu.h" -+#include "libavutil/arm/cpu.h" -+#include "libavcodec/avcodec.h" -+#include "libavcodec/blockdsp.h" -+ -+void ff_clear_block_neon(int16_t *block); -+void ff_clear_blocks_neon(int16_t *blocks); -+ -+av_cold void ff_blockdsp_init_aarch64(BlockDSPContext *c) -+{ -+ int cpu_flags = av_get_cpu_flags(); -+ -+ if (have_neon(cpu_flags)) { -+ c->clear_block = ff_clear_block_neon; -+ c->clear_blocks = ff_clear_blocks_neon; -+ } -+} -diff --git a/libavcodec/aarch64/blockdsp_neon.S b/libavcodec/aarch64/blockdsp_neon.S -new file mode 100644 -index 0000000000..e4a4959ccc ---- /dev/null -+++ b/libavcodec/aarch64/blockdsp_neon.S -@@ -0,0 +1,43 @@ -+/* -+ * AArch64 NEON optimised block operations -+ * -+ * Copyright (c) 2022 Ben Avison -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "libavutil/aarch64/asm.S" -+ -+function ff_clear_block_neon, export=1 -+ movi v0.16b, #0 -+ movi v1.16b, #0 -+ st1 {v0.16b, v1.16b}, [x0], #32 -+ st1 {v0.16b, v1.16b}, [x0], #32 -+ st1 {v0.16b, v1.16b}, [x0], #32 -+ st1 {v0.16b, v1.16b}, [x0] -+ ret -+endfunc -+ -+function ff_clear_blocks_neon, export=1 -+ movi v0.16b, #0 -+ movi v1.16b, #0 -+ .rept 23 -+ st1 {v0.16b, v1.16b}, [x0], #32 -+ .endr -+ st1 {v0.16b, v1.16b}, [x0] -+ ret -+endfunc diff --git a/libavcodec/aarch64/idctdsp_init_aarch64.c b/libavcodec/aarch64/idctdsp_init_aarch64.c index 742a3372e3..eec21aa5a2 100644 --- a/libavcodec/aarch64/idctdsp_init_aarch64.c @@ -767,7 +661,7 @@ index 0000000000..7f47611206 + ret +endfunc diff --git a/libavcodec/aarch64/vc1dsp_init_aarch64.c b/libavcodec/aarch64/vc1dsp_init_aarch64.c -index 13dfd74940..161d5a972b 100644 +index 13dfd74940..a7976fd596 100644 --- a/libavcodec/aarch64/vc1dsp_init_aarch64.c +++ b/libavcodec/aarch64/vc1dsp_init_aarch64.c @@ -21,10 +21,28 @@ @@ -789,12 +683,12 @@ index 13dfd74940..161d5a972b 100644 +void ff_vc1_inv_trans_4x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); +void ff_vc1_inv_trans_4x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); + -+void ff_vc1_v_loop_filter4_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_h_loop_filter4_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_v_loop_filter8_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_h_loop_filter8_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_v_loop_filter16_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_h_loop_filter16_neon(uint8_t *src, int stride, int pq); ++void ff_vc1_v_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_h_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_v_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_h_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_v_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_h_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq); + void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); @@ -892,10 +786,10 @@ index 13dfd74940..161d5a972b 100644 } diff --git a/libavcodec/aarch64/vc1dsp_neon.S b/libavcodec/aarch64/vc1dsp_neon.S new file mode 100644 -index 0000000000..529c21d285 +index 0000000000..9a96c2523c --- /dev/null +++ b/libavcodec/aarch64/vc1dsp_neon.S -@@ -0,0 +1,1552 @@ +@@ -0,0 +1,1546 @@ +/* + * VC1 AArch64 NEON optimisations + * @@ -1605,11 +1499,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 4 pixel pairs at boundary of vertically-neighbouring blocks +// On entry: +// x0 -> top-left pel of lower block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_v_loop_filter4_neon, export=1 + sub x3, x0, w1, sxtw #2 -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.s}[0], [x0], x1 // P5 + ld1 {v2.s}[0], [x3], x1 // P1 @@ -1678,11 +1571,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 4 pixel pairs at boundary of horizontally-neighbouring blocks +// On entry: +// x0 -> top-left pel of right block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_h_loop_filter4_neon, export=1 + sub x3, x0, #4 // where to start reading -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.8b}, [x3], x1 + sub x0, x0, #1 // where to start writing @@ -1752,11 +1644,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 8 pixel pairs at boundary of vertically-neighbouring blocks +// On entry: +// x0 -> top-left pel of lower block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_v_loop_filter8_neon, export=1 + sub x3, x0, w1, sxtw #2 -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.8b}, [x0], x1 // P5 + movi v2.2d, #0x0000ffff00000000 @@ -1830,11 +1721,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 8 pixel pairs at boundary of horizontally-neighbouring blocks +// On entry: +// x0 -> top-left pel of right block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_h_loop_filter8_neon, export=1 + sub x3, x0, #4 // where to start reading -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.8b}, [x3], x1 // P1[0], P2[0]... + sub x0, x0, #1 // where to start writing @@ -1939,11 +1829,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 16 pixel pairs at boundary of vertically-neighbouring blocks +// On entry: +// x0 -> top-left pel of lower block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_v_loop_filter16_neon, export=1 + sub x3, x0, w1, sxtw #2 -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.16b}, [x0], x1 // P5 + movi v2.2d, #0x0000ffff00000000 @@ -2071,11 +1960,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 16 pixel pairs at boundary of horizontally-neighbouring blocks +// On entry: +// x0 -> top-left pel of right block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_h_loop_filter16_neon, export=1 + sub x3, x0, #4 // where to start reading -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.8b}, [x3], x1 // P1[0], P2[0]... + sub x0, x0, #1 // where to start writing @@ -17404,7 +17292,7 @@ index 2cca784f5a..48cb816b70 100644 + dsp->vc1_unescape_buffer = vc1_unescape_buffer_neon; } diff --git a/libavcodec/arm/vc1dsp_neon.S b/libavcodec/arm/vc1dsp_neon.S -index 93f043bf08..8e97bc5e58 100644 +index 93f043bf08..96014fbebc 100644 --- a/libavcodec/arm/vc1dsp_neon.S +++ b/libavcodec/arm/vc1dsp_neon.S @@ -1161,3 +1161,764 @@ function ff_vc1_inv_trans_4x4_dc_neon, export=1 @@ -17560,17 +17448,17 @@ index 93f043bf08..8e97bc5e58 100644 +function ff_vc1_v_loop_filter8_neon, export=1 + sub r3, r0, r1, lsl #2 + vldr d0, .Lcoeffs -+ vld1.32 {d1}, [r0], r1 @ P5 -+ vld1.32 {d2}, [r3], r1 @ P1 -+ vld1.32 {d3}, [r3], r1 @ P2 -+ vld1.32 {d4}, [r0], r1 @ P6 -+ vld1.32 {d5}, [r3], r1 @ P3 -+ vld1.32 {d6}, [r0], r1 @ P7 ++ vld1.32 {d1}, [r0 :64], r1 @ P5 ++ vld1.32 {d2}, [r3 :64], r1 @ P1 ++ vld1.32 {d3}, [r3 :64], r1 @ P2 ++ vld1.32 {d4}, [r0 :64], r1 @ P6 ++ vld1.32 {d5}, [r3 :64], r1 @ P3 ++ vld1.32 {d6}, [r0 :64], r1 @ P7 + vshll.u8 q8, d1, #1 @ 2*P5 + vshll.u8 q9, d2, #1 @ 2*P1 -+ vld1.32 {d7}, [r3] @ P4 ++ vld1.32 {d7}, [r3 :64] @ P4 + vmovl.u8 q1, d3 @ P2 -+ vld1.32 {d20}, [r0] @ P8 ++ vld1.32 {d20}, [r0 :64] @ P8 + vmovl.u8 q11, d4 @ P6 + vdup.16 q12, r2 @ pq + vmovl.u8 q13, d5 @ P3 @@ -17625,8 +17513,8 @@ index 93f043bf08..8e97bc5e58 100644 + vmla.i16 q1, q0, q2 @ invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P5 + vqmovun.s16 d0, q3 + vqmovun.s16 d1, q1 -+ vst1.32 {d0}, [r3], r1 -+ vst1.32 {d1}, [r3] ++ vst1.32 {d0}, [r3 :64], r1 ++ vst1.32 {d1}, [r3 :64] +1: bx lr +endfunc + @@ -17741,17 +17629,17 @@ index 93f043bf08..8e97bc5e58 100644 + vpush {d8-d15} + sub r3, r0, r1, lsl #2 + vldr d0, .Lcoeffs -+ vld1.64 {q1}, [r0], r1 @ P5 -+ vld1.64 {q2}, [r3], r1 @ P1 -+ vld1.64 {q3}, [r3], r1 @ P2 -+ vld1.64 {q4}, [r0], r1 @ P6 -+ vld1.64 {q5}, [r3], r1 @ P3 -+ vld1.64 {q6}, [r0], r1 @ P7 ++ vld1.64 {q1}, [r0 :128], r1 @ P5 ++ vld1.64 {q2}, [r3 :128], r1 @ P1 ++ vld1.64 {q3}, [r3 :128], r1 @ P2 ++ vld1.64 {q4}, [r0 :128], r1 @ P6 ++ vld1.64 {q5}, [r3 :128], r1 @ P3 ++ vld1.64 {q6}, [r0 :128], r1 @ P7 + vshll.u8 q7, d2, #1 @ 2*P5[0..7] + vshll.u8 q8, d4, #1 @ 2*P1[0..7] -+ vld1.64 {q9}, [r3] @ P4 ++ vld1.64 {q9}, [r3 :128] @ P4 + vmovl.u8 q10, d6 @ P2[0..7] -+ vld1.64 {q11}, [r0] @ P8 ++ vld1.64 {q11}, [r0 :128] @ P8 + vmovl.u8 q12, d8 @ P6[0..7] + vdup.16 q13, r2 @ pq + vshll.u8 q2, d5, #1 @ 2*P1[8..15] @@ -17861,8 +17749,8 @@ index 93f043bf08..8e97bc5e58 100644 + vqmovun.s16 d0, q6 + vqmovun.s16 d5, q9 + vqmovun.s16 d1, q1 -+ vst1.64 {q2}, [r3], r1 -+ vst1.64 {q0}, [r3] ++ vst1.64 {q2}, [r3 :128], r1 ++ vst1.64 {q0}, [r3 :128] +1: vpop {d8-d15} + bx lr +endfunc @@ -18194,31 +18082,6 @@ index 8a71c04230..53644506e5 100644 } AVHWAccel; /** -diff --git a/libavcodec/blockdsp.c b/libavcodec/blockdsp.c -index c7efe7e77b..46766244b8 100644 ---- a/libavcodec/blockdsp.c -+++ b/libavcodec/blockdsp.c -@@ -65,6 +65,8 @@ av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx) - c->fill_block_tab[0] = fill_block16_c; - c->fill_block_tab[1] = fill_block8_c; - -+ if (ARCH_AARCH64) -+ ff_blockdsp_init_aarch64(c); - if (ARCH_ALPHA) - ff_blockdsp_init_alpha(c); - if (ARCH_ARM) -diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h -index 26fc2ea13b..fe539491da 100644 ---- a/libavcodec/blockdsp.h -+++ b/libavcodec/blockdsp.h -@@ -41,6 +41,7 @@ typedef struct BlockDSPContext { - - void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx); - -+void ff_blockdsp_init_aarch64(BlockDSPContext *c); - void ff_blockdsp_init_alpha(BlockDSPContext *c); - void ff_blockdsp_init_arm(BlockDSPContext *c); - void ff_blockdsp_init_ppc(BlockDSPContext *c); diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 38d06b2842..bbf5d70560 100644 --- a/libavcodec/cabac.h @@ -19015,6 +18878,527 @@ index 0000000000..4e35bd583d +#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) + +#endif +diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h +new file mode 100644 +index 0000000000..7e05f6e7c3 +--- /dev/null ++++ b/libavcodec/hevc-ctrls-v4.h +@@ -0,0 +1,515 @@ ++/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */ ++/* ++ * Video for Linux Two controls header file ++ * ++ * Copyright (C) 1999-2012 the contributors ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * Alternatively you can redistribute this file under the terms of the ++ * BSD license as stated below: ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in ++ * the documentation and/or other materials provided with the ++ * distribution. ++ * 3. The names of its contributors may not be used to endorse or promote ++ * products derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ++ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * The contents of this header was split off from videodev2.h. All control ++ * definitions should be added to this header, which is included by ++ * videodev2.h. ++ */ ++ ++#ifndef AVCODEC_HEVC_CTRLS_V4_H ++#define AVCODEC_HEVC_CTRLS_V4_H ++ ++#include ++#include ++ ++#define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) ++#define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401) ++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402) ++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_STATELESS_BASE + 403) ++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 404) ++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE (V4L2_CID_CODEC_STATELESS_BASE + 405) ++#define V4L2_CID_STATELESS_HEVC_START_CODE (V4L2_CID_CODEC_STATELESS_BASE + 406) ++#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407) ++ ++enum v4l2_stateless_hevc_decode_mode { ++ V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, ++ V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, ++}; ++ ++enum v4l2_stateless_hevc_start_code { ++ V4L2_STATELESS_HEVC_START_CODE_NONE, ++ V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, ++}; ++ ++#define V4L2_HEVC_SLICE_TYPE_B 0 ++#define V4L2_HEVC_SLICE_TYPE_P 1 ++#define V4L2_HEVC_SLICE_TYPE_I 2 ++ ++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) ++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) ++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) ++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) ++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) ++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) ++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) ++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) ++ ++/** ++ * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set ++ * ++ * @video_parameter_set_id: specifies the value of the ++ * vps_video_parameter_set_id of the active VPS ++ * @seq_parameter_set_id: provides an identifier for the SPS for ++ * reference by other syntax elements ++ * @pic_width_in_luma_samples: specifies the width of each decoded picture ++ * in units of luma samples ++ * @pic_height_in_luma_samples: specifies the height of each decoded picture ++ * in units of luma samples ++ * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the ++ * samples of the luma array ++ * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the ++ * samples of the chroma arrays ++ * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of ++ * the variable MaxPicOrderCntLsb ++ * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum ++ * required size of the decoded picture ++ * buffer for the codec video sequence ++ * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures ++ * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the ++ * value of SpsMaxLatencyPictures array ++ * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum ++ * luma coding block size ++ * @log2_diff_max_min_luma_coding_block_size: specifies the difference between ++ * the maximum and minimum luma ++ * coding block size ++ * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma ++ * transform block size ++ * @log2_diff_max_min_luma_transform_block_size: specifies the difference between ++ * the maximum and minimum luma ++ * transform block size ++ * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy ++ * depth for transform units of ++ * coding units coded in inter ++ * prediction mode ++ * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy ++ * depth for transform units of ++ * coding units coded in intra ++ * prediction mode ++ * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of ++ * bits used to represent each of PCM sample ++ * values of the luma component ++ * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number ++ * of bits used to represent each of PCM ++ * sample values of the chroma components ++ * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the ++ * minimum size of coding blocks ++ * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between ++ * the maximum and minimum size of ++ * coding blocks ++ * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set() ++ * syntax structures included in the SPS ++ * @num_long_term_ref_pics_sps: specifies the number of candidate long-term ++ * reference pictures that are specified in the SPS ++ * @chroma_format_idc: specifies the chroma sampling ++ * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number ++ * of temporal sub-layers ++ * @reserved: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_SPS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_sps { ++ __u8 video_parameter_set_id; ++ __u8 seq_parameter_set_id; ++ __u16 pic_width_in_luma_samples; ++ __u16 pic_height_in_luma_samples; ++ __u8 bit_depth_luma_minus8; ++ __u8 bit_depth_chroma_minus8; ++ __u8 log2_max_pic_order_cnt_lsb_minus4; ++ __u8 sps_max_dec_pic_buffering_minus1; ++ __u8 sps_max_num_reorder_pics; ++ __u8 sps_max_latency_increase_plus1; ++ __u8 log2_min_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_luma_coding_block_size; ++ __u8 log2_min_luma_transform_block_size_minus2; ++ __u8 log2_diff_max_min_luma_transform_block_size; ++ __u8 max_transform_hierarchy_depth_inter; ++ __u8 max_transform_hierarchy_depth_intra; ++ __u8 pcm_sample_bit_depth_luma_minus1; ++ __u8 pcm_sample_bit_depth_chroma_minus1; ++ __u8 log2_min_pcm_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_pcm_luma_coding_block_size; ++ __u8 num_short_term_ref_pic_sets; ++ __u8 num_long_term_ref_pics_sps; ++ __u8 chroma_format_idc; ++ __u8 sps_max_sub_layers_minus1; ++ ++ __u8 reserved[6]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) ++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) ++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) ++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) ++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) ++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) ++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) ++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) ++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) ++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) ++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) ++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) ++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) ++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) ++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) ++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) ++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) ++ ++/** ++ * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set ++ * ++ * @pic_parameter_set_id: identifies the PPS for reference by other ++ * syntax elements ++ * @num_extra_slice_header_bits: specifies the number of extra slice header ++ * bits that are present in the slice header RBSP ++ * for coded pictures referring to the PPS. ++ * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the ++ * inferred value of num_ref_idx_l0_active_minus1 ++ * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the ++ * inferred value of num_ref_idx_l1_active_minus1 ++ * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for ++ * each slice referring to the PPS ++ * @diff_cu_qp_delta_depth: specifies the difference between the luma coding ++ * tree block size and the minimum luma coding block ++ * size of coding units that convey cu_qp_delta_abs ++ * and cu_qp_delta_sign_flag ++ * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb ++ * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr ++ * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns ++ * partitioning the picture ++ * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning ++ * the picture ++ * @column_width_minus1: this value plus 1 specifies the width of the each tile column in ++ * units of coding tree blocks ++ * @row_height_minus1: this value plus 1 specifies the height of the each tile row in ++ * units of coding tree blocks ++ * @pps_beta_offset_div2: specify the default deblocking parameter offsets for ++ * beta divided by 2 ++ * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC ++ * divided by 2 ++ * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of ++ * the variable Log2ParMrgLevel ++ * @reserved: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_PPS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_pps { ++ __u8 pic_parameter_set_id; ++ __u8 num_extra_slice_header_bits; ++ __u8 num_ref_idx_l0_default_active_minus1; ++ __u8 num_ref_idx_l1_default_active_minus1; ++ __s8 init_qp_minus26; ++ __u8 diff_cu_qp_delta_depth; ++ __s8 pps_cb_qp_offset; ++ __s8 pps_cr_qp_offset; ++ __u8 num_tile_columns_minus1; ++ __u8 num_tile_rows_minus1; ++ __u8 column_width_minus1[20]; ++ __u8 row_height_minus1[22]; ++ __s8 pps_beta_offset_div2; ++ __s8 pps_tc_offset_div2; ++ __u8 log2_parallel_merge_level_minus2; ++ __u8 reserved; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 ++ ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME 0 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD 1 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD 2 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM 3 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP 4 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP 5 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM 6 ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING 7 ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING 8 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM 9 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP 10 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM 11 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP 12 ++ ++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 ++ ++/** ++ * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry ++ * ++ * @timestamp: timestamp of the V4L2 capture buffer to use as reference. ++ * @flags: long term flag for the reference frame ++ * @field_pic: whether the reference is a field picture or a frame. ++ * @reserved: padding field. Should be zeroed by applications. ++ * @pic_order_cnt_val: the picture order count of the current picture. ++ */ ++struct v4l2_hevc_dpb_entry { ++ __u64 timestamp; ++ __u8 flags; ++ __u8 field_pic; ++ __u16 reserved; ++ __s32 pic_order_cnt_val; ++}; ++ ++/** ++ * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters ++ * ++ * @delta_luma_weight_l0: the difference of the weighting factor applied ++ * to the luma prediction value for list 0 ++ * @luma_offset_l0: the additive offset applied to the luma prediction value ++ * for list 0 ++ * @delta_chroma_weight_l0: the difference of the weighting factor applied ++ * to the chroma prediction values for list 0 ++ * @chroma_offset_l0: the difference of the additive offset applied to ++ * the chroma prediction values for list 0 ++ * @delta_luma_weight_l1: the difference of the weighting factor applied ++ * to the luma prediction value for list 1 ++ * @luma_offset_l1: the additive offset applied to the luma prediction value ++ * for list 1 ++ * @delta_chroma_weight_l1: the difference of the weighting factor applied ++ * to the chroma prediction values for list 1 ++ * @chroma_offset_l1: the difference of the additive offset applied to ++ * the chroma prediction values for list 1 ++ * @luma_log2_weight_denom: the base 2 logarithm of the denominator for ++ * all luma weighting factors ++ * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm ++ * of the denominator for all chroma ++ * weighting factors ++ */ ++struct v4l2_hevc_pred_weight_table { ++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __u8 luma_log2_weight_denom; ++ __s8 delta_chroma_log2_weight_denom; ++}; ++ ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) ++ ++/** ++ * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters ++ * ++ * This control is a dynamically sized 1-dimensional array, ++ * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it. ++ * ++ * @bit_size: size (in bits) of the current slice data ++ * @data_byte_offset: offset (in bytes) to the video data in the current slice data ++ * @num_entry_point_offsets: specifies the number of entry point offset syntax ++ * elements in the slice header. ++ * @nal_unit_type: specifies the coding type of the slice (B, P or I) ++ * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit ++ * @slice_type: see V4L2_HEVC_SLICE_TYPE_{} ++ * @colour_plane_id: specifies the colour plane associated with the current slice ++ * @slice_pic_order_cnt: specifies the picture order count ++ * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum ++ * reference index for reference picture list 0 ++ * that may be used to decode the slice ++ * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum ++ * reference index for reference picture list 1 ++ * that may be used to decode the slice ++ * @collocated_ref_idx: specifies the reference index of the collocated picture used ++ * for temporal motion vector prediction ++ * @five_minus_max_num_merge_cand: specifies the maximum number of merging ++ * motion vector prediction candidates supported in ++ * the slice subtracted from 5 ++ * @slice_qp_delta: specifies the initial value of QpY to be used for the coding ++ * blocks in the slice ++ * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset ++ * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset ++ * @slice_act_y_qp_offset: screen content extension parameters ++ * @slice_act_cb_qp_offset: screen content extension parameters ++ * @slice_act_cr_qp_offset: screen content extension parameters ++ * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2 ++ * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2 ++ * @pic_struct: indicates whether a picture should be displayed as a frame or as one or ++ * more fields ++ * @reserved0: padding field. Should be zeroed by applications. ++ * @slice_segment_addr: specifies the address of the first coding tree block in ++ * the slice segment ++ * @ref_idx_l0: the list of L0 reference elements as indices in the DPB ++ * @ref_idx_l1: the list of L1 reference elements as indices in the DPB ++ * @short_term_ref_pic_set_size: specifies the size of short-term reference ++ * pictures set included in the SPS ++ * @long_term_ref_pic_set_size: specifies the size of long-term reference ++ * pictures set include in the SPS ++ * @pred_weight_table: the prediction weight coefficients for inter-picture ++ * prediction ++ * @reserved1: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_slice_params { ++ __u32 bit_size; ++ __u32 data_byte_offset; ++ __u32 num_entry_point_offsets; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ ++ __u8 nal_unit_type; ++ __u8 nuh_temporal_id_plus1; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 slice_type; ++ __u8 colour_plane_id; ++ __s32 slice_pic_order_cnt; ++ __u8 num_ref_idx_l0_active_minus1; ++ __u8 num_ref_idx_l1_active_minus1; ++ __u8 collocated_ref_idx; ++ __u8 five_minus_max_num_merge_cand; ++ __s8 slice_qp_delta; ++ __s8 slice_cb_qp_offset; ++ __s8 slice_cr_qp_offset; ++ __s8 slice_act_y_qp_offset; ++ __s8 slice_act_cb_qp_offset; ++ __s8 slice_act_cr_qp_offset; ++ __s8 slice_beta_offset_div2; ++ __s8 slice_tc_offset_div2; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ ++ __u8 pic_struct; ++ ++ __u8 reserved0[3]; ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u32 slice_segment_addr; ++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u16 short_term_ref_pic_set_size; ++ __u16 long_term_ref_pic_set_size; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ ++ struct v4l2_hevc_pred_weight_table pred_weight_table; ++ ++ __u8 reserved1[2]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 ++ ++/** ++ * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters ++ * ++ * @pic_order_cnt_val: picture order count ++ * @short_term_ref_pic_set_size: specifies the size of short-term reference ++ * pictures set included in the SPS of the first slice ++ * @long_term_ref_pic_set_size: specifies the size of long-term reference ++ * pictures set include in the SPS of the first slice ++ * @num_active_dpb_entries: the number of entries in dpb ++ * @num_poc_st_curr_before: the number of reference pictures in the short-term ++ * set that come before the current frame ++ * @num_poc_st_curr_after: the number of reference pictures in the short-term ++ * set that come after the current frame ++ * @num_poc_lt_curr: the number of reference pictures in the long-term set ++ * @poc_st_curr_before: provides the index of the short term before references ++ * in DPB array ++ * @poc_st_curr_after: provides the index of the short term after references ++ * in DPB array ++ * @poc_lt_curr: provides the index of the long term references in DPB array ++ * @reserved: padding field. Should be zeroed by applications. ++ * @dpb: the decoded picture buffer, for meta-data about reference frames ++ * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_decode_params { ++ __s32 pic_order_cnt_val; ++ __u16 short_term_ref_pic_set_size; ++ __u16 long_term_ref_pic_set_size; ++ __u8 num_active_dpb_entries; ++ __u8 num_poc_st_curr_before; ++ __u8 num_poc_st_curr_after; ++ __u8 num_poc_lt_curr; ++ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 reserved[4]; ++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u64 flags; ++}; ++ ++/** ++ * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters ++ * ++ * @scaling_list_4x4: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_8x8: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_16x16: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_32x32: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_dc_coef_16x16: scaling list is used for the scaling process ++ * for transform coefficients. The values on each ++ * scaling list are expected in raster scan order. ++ * @scaling_list_dc_coef_32x32: scaling list is used for the scaling process ++ * for transform coefficients. The values on each ++ * scaling list are expected in raster scan order. ++ */ ++struct v4l2_ctrl_hevc_scaling_matrix { ++ __u8 scaling_list_4x4[6][16]; ++ __u8 scaling_list_8x8[6][64]; ++ __u8 scaling_list_16x16[6][64]; ++ __u8 scaling_list_32x32[2][64]; ++ __u8 scaling_list_dc_coef_16x16[6]; ++ __u8 scaling_list_dc_coef_32x32[2]; ++}; ++ ++#endif diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c index 463d352055..7feff43c28 100644 --- a/libavcodec/hevc_parser.c @@ -19039,8 +19423,67 @@ index 463d352055..7feff43c28 100644 if (ps->vps->vps_timing_info_present_flag) { num = ps->vps->vps_num_units_in_tick; den = ps->vps->vps_time_scale; +diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c +index 4f6d985ae6..eefae71275 100644 +--- a/libavcodec/hevc_refs.c ++++ b/libavcodec/hevc_refs.c +@@ -96,18 +96,22 @@ static HEVCFrame *alloc_frame(HEVCContext *s) + if (!frame->rpl_buf) + goto fail; + +- frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); +- if (!frame->tab_mvf_buf) +- goto fail; +- frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; ++ if (s->tab_mvf_pool) { ++ frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); ++ if (!frame->tab_mvf_buf) ++ goto fail; ++ frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; ++ } + +- frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); +- if (!frame->rpl_tab_buf) +- goto fail; +- frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; +- frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; +- for (j = 0; j < frame->ctb_count; j++) +- frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; ++ if (s->rpl_tab_pool) { ++ frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); ++ if (!frame->rpl_tab_buf) ++ goto fail; ++ frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; ++ frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; ++ for (j = 0; j < frame->ctb_count; j++) ++ frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; ++ } + + frame->frame->top_field_first = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD; + frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD); +@@ -276,14 +280,17 @@ static int init_slice_rpl(HEVCContext *s) + int ctb_count = frame->ctb_count; + int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr]; + int i; ++ RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; + + if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab)) + return AVERROR_INVALIDDATA; + +- for (i = ctb_addr_ts; i < ctb_count; i++) +- frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; ++ if (frame->rpl_tab) { ++ for (i = ctb_addr_ts; i < ctb_count; i++) ++ frame->rpl_tab[i] = tab; ++ } + +- frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts]; ++ frame->refPicList = tab->refPicList; + + return 0; + } diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c -index 2231aed259..6d2d66dfdf 100644 +index 2231aed259..7b05b41441 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -333,6 +333,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps) @@ -19110,7 +19553,43 @@ index 2231aed259..6d2d66dfdf 100644 #endif break; case AV_PIX_FMT_YUV444P: -@@ -3327,7 +3355,14 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output, +@@ -485,6 +513,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps, + if (!sps) + return 0; + ++ // If hwaccel then we don't need all the s/w decode helper arrays ++ if (s->avctx->hwaccel) { ++ export_stream_params(s, sps); ++ ++ s->avctx->pix_fmt = pix_fmt; ++ s->ps.sps = sps; ++ s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data; ++ return 0; ++ } ++ + ret = pic_arrays_init(s, sps); + if (ret < 0) + goto fail; +@@ -2901,11 +2939,13 @@ static int hevc_frame_start(HEVCContext *s) + ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1); + int ret; + +- memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); +- memset(s->vertical_bs, 0, s->bs_width * s->bs_height); +- memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); +- memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); +- memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); ++ if (s->horizontal_bs) { ++ memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); ++ memset(s->vertical_bs, 0, s->bs_width * s->bs_height); ++ memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); ++ memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); ++ memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); ++ } + + s->is_decoded = 0; + s->first_nal_type = s->nal_unit_type; +@@ -3327,7 +3367,14 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output, s->ref = NULL; ret = decode_nal_units(s, avpkt->data, avpkt->size); if (ret < 0) @@ -19125,7 +19604,35 @@ index 2231aed259..6d2d66dfdf 100644 if (avctx->hwaccel) { if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) { -@@ -3697,6 +3732,15 @@ AVCodec ff_hevc_decoder = { +@@ -3370,15 +3417,19 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src) + if (ret < 0) + return ret; + +- dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); +- if (!dst->tab_mvf_buf) +- goto fail; +- dst->tab_mvf = src->tab_mvf; ++ if (src->tab_mvf_buf) { ++ dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); ++ if (!dst->tab_mvf_buf) ++ goto fail; ++ dst->tab_mvf = src->tab_mvf; ++ } + +- dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); +- if (!dst->rpl_tab_buf) +- goto fail; +- dst->rpl_tab = src->rpl_tab; ++ if (src->rpl_tab_buf) { ++ dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); ++ if (!dst->rpl_tab_buf) ++ goto fail; ++ dst->rpl_tab = src->rpl_tab; ++ } + + dst->rpl_buf = av_buffer_ref(src->rpl_buf); + if (!dst->rpl_buf) +@@ -3697,6 +3748,15 @@ AVCodec ff_hevc_decoder = { #if CONFIG_HEVC_NVDEC_HWACCEL HWACCEL_NVDEC(hevc), #endif @@ -49497,7 +50004,7 @@ index 0000000000..85c5b46d75 +}; + diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 4b2679eb38..6ca83cc21b 100644 +index 4b2679eb38..9ef2f40e39 100644 --- a/libavcodec/v4l2_buffers.c +++ b/libavcodec/v4l2_buffers.c @@ -21,6 +21,7 @@ @@ -49508,9 +50015,11 @@ index 4b2679eb38..6ca83cc21b 100644 #include #include #include -@@ -30,56 +31,68 @@ +@@ -29,57 +30,82 @@ + #include #include "libavcodec/avcodec.h" #include "libavcodec/internal.h" ++#include "libavutil/avassert.h" #include "libavutil/pixdesc.h" +#include "libavutil/hwcontext.h" #include "v4l2_context.h" @@ -49552,21 +50061,32 @@ index 4b2679eb38..6ca83cc21b 100644 } -static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) -+static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) ++static inline struct timeval tv_from_int(const int64_t t) { - int64_t v4l2_pts; -- ++ return (struct timeval){ ++ .tv_usec = t % USEC_PER_SEC, ++ .tv_sec = t / USEC_PER_SEC ++ }; ++} + - if (pts == AV_NOPTS_VALUE) - pts = 0; -- ++static inline int64_t int_from_tv(const struct timeval t) ++{ ++ return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec; ++} + ++static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) ++{ /* convert pts to v4l2 timebase */ - v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); +- out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; +- out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; + const int64_t v4l2_pts = -+ out->context->no_pts_rescale ? pts : + pts == AV_NOPTS_VALUE ? 0 : + av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); - out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; - out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; ++ out->buf.timestamp = tv_from_int(v4l2_pts); } -static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf) @@ -49574,18 +50094,20 @@ index 4b2679eb38..6ca83cc21b 100644 { - int64_t v4l2_pts; - ++ const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp); ++ return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE; ++#if 0 /* convert pts back to encoder timebase */ - v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + -+ const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + - avbuf->buf.timestamp.tv_usec; - -- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); +- avbuf->buf.timestamp.tv_usec; + return + avbuf->context->no_pts_rescale ? v4l2_pts : + v4l2_pts == 0 ? AV_NOPTS_VALUE : + av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); ++#endif +} -+ + +- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); +static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) +{ + if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { @@ -49598,7 +50120,7 @@ index 4b2679eb38..6ca83cc21b 100644 } static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) -@@ -116,6 +129,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) +@@ -116,6 +142,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) return AVCOL_PRI_UNSPECIFIED; } @@ -49704,7 +50226,7 @@ index 4b2679eb38..6ca83cc21b 100644 static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) { enum v4l2_quantization qt; -@@ -134,6 +246,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) +@@ -134,6 +259,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) return AVCOL_RANGE_UNSPECIFIED; } @@ -49725,29 +50247,23 @@ index 4b2679eb38..6ca83cc21b 100644 static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) { enum v4l2_ycbcr_encoding ycbcr; -@@ -210,73 +336,165 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) +@@ -210,73 +349,165 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) return AVCOL_TRC_UNSPECIFIED; } -static void v4l2_free_buffer(void *opaque, uint8_t *unused) +static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf) -+{ -+ return V4L2_FIELD_IS_INTERLACED(buf->buf.field); -+} -+ -+static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) { - V4L2Buffer* avbuf = opaque; - V4L2m2mContext *s = buf_to_m2mctx(avbuf); -+ return buf->buf.field == V4L2_FIELD_INTERLACED_TB; ++ return V4L2_FIELD_IS_INTERLACED(buf->buf.field); +} - if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) { - atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel); -+static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) ++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) +{ -+ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : -+ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; ++ return buf->buf.field == V4L2_FIELD_INTERLACED_TB; +} - if (s->reinit) { @@ -49761,11 +50277,18 @@ index 4b2679eb38..6ca83cc21b 100644 - else if (avbuf->context->streamon) - ff_v4l2_buffer_enqueue(avbuf); - } ++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) ++{ ++ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : ++ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; ++} ++ +static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) +{ + AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; + AVDRMLayerDescriptor *layer; -+ + +- av_buffer_unref(&avbuf->context_ref); + /* fill the DRM frame descriptor */ + drm_desc->nb_objects = avbuf->num_planes; + drm_desc->nb_layers = 1; @@ -49777,7 +50300,7 @@ index 4b2679eb38..6ca83cc21b 100644 + layer->planes[i].object_index = i; + layer->planes[i].offset = 0; + layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; -+ } + } + + switch (avbuf->context->av_pix_fmt) { + case AV_PIX_FMT_YUYV422: @@ -49805,8 +50328,7 @@ index 4b2679eb38..6ca83cc21b 100644 + break; + + case AV_PIX_FMT_YUV420P: - -- av_buffer_unref(&avbuf->context_ref); ++ + layer->format = DRM_FORMAT_YUV420; + + if (avbuf->num_planes > 1) @@ -49829,7 +50351,7 @@ index 4b2679eb38..6ca83cc21b 100644 + default: + drm_desc->nb_layers = 0; + break; - } ++ } + + return (uint8_t *) drm_desc; } @@ -49858,7 +50380,7 @@ index 4b2679eb38..6ca83cc21b 100644 - in->status = V4L2BUF_RET_USER; - atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed); -+ avbuf->status = V4L2BUF_AVAILABLE; ++ ff_v4l2_buffer_set_avail(avbuf); - return 0; + if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) { @@ -49934,7 +50456,7 @@ index 4b2679eb38..6ca83cc21b 100644 if (plane >= out->num_planes) return AVERROR(EINVAL); -@@ -284,32 +502,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i +@@ -284,32 +515,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i length = out->plane_info[plane].length; bytesused = FFMIN(size+offset, length); @@ -49989,7 +50511,7 @@ index 4b2679eb38..6ca83cc21b 100644 + frame->buf[0] = wrap_avbuf(avbuf); + if (frame->buf[0] == NULL) + return AVERROR(ENOMEM); - ++ + if (buf_to_m2mctx(avbuf)->output_drm) { + /* 1. get references to the actual data */ + frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf); @@ -49997,7 +50519,7 @@ index 4b2679eb38..6ca83cc21b 100644 + frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref); + return 0; + } -+ + + + /* 1. get references to the actual data */ + for (i = 0; i < avbuf->num_planes; i++) { @@ -50007,7 +50529,7 @@ index 4b2679eb38..6ca83cc21b 100644 } /* fixup special cases */ -@@ -318,17 +561,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -318,17 +574,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) case AV_PIX_FMT_NV21: if (avbuf->num_planes > 1) break; @@ -50031,7 +50553,7 @@ index 4b2679eb38..6ca83cc21b 100644 break; default: -@@ -338,68 +581,95 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -338,68 +594,127 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) return 0; } @@ -50053,6 +50575,38 @@ index 4b2679eb38..6ca83cc21b 100644 +{ + return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); +} ++ ++static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out) ++{ ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ if (frame->format != AV_PIX_FMT_DRM_PRIME || !src) ++ return AVERROR(EINVAL); ++ ++ av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { ++ // Only currently cope with single buffer types ++ if (out->buf.length != 1) ++ return AVERROR_PATCHWELCOME; ++ if (src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ out->planes[0].m.fd = src->objects[0].fd; ++ } ++ else { ++ if (src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ out->buf.m.fd = src->objects[0].fd; ++ } ++ ++ // No need to copy src AVDescriptor and if we did then we may confuse ++ // fd close on free ++ out->ref_buf = av_buffer_ref(frame->buf[0]); ++ ++ return 0; ++} + static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) { @@ -50181,10 +50735,16 @@ index 4b2679eb38..6ca83cc21b 100644 return 0; } -@@ -411,7 +681,16 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) +@@ -409,16 +724,31 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + * + ******************************************************************************/ - int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) +-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) ++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts) { +- v4l2_set_pts(out, frame->pts); +- +- return v4l2_buffer_swframe_to_buf(frame, out); + out->buf.flags = frame->key_frame ? + (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : + (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); @@ -50193,12 +50753,17 @@ index 4b2679eb38..6ca83cc21b 100644 + v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); + v4l2_set_color_range(out, frame->color_range); + // PTS & interlace are buffer vars - v4l2_set_pts(out, frame->pts); ++ if (track_ts) ++ out->buf.timestamp = tv_from_int(track_ts); ++ else ++ v4l2_set_pts(out, frame->pts); + v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first); - - return v4l2_buffer_swframe_to_buf(frame, out); ++ ++ return frame->format == AV_PIX_FMT_DRM_PRIME ? ++ v4l2_buffer_primeframe_to_buf(frame, out) : ++ v4l2_buffer_swframe_to_buf(frame, out); } -@@ -419,6 +698,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) { int ret; @@ -50206,7 +50771,7 @@ index 4b2679eb38..6ca83cc21b 100644 av_frame_unref(frame); -@@ -429,17 +709,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -429,17 +759,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) /* 2. get frame information */ frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME); @@ -50242,7 +50807,7 @@ index 4b2679eb38..6ca83cc21b 100644 /* 3. report errors upstream */ if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) { -@@ -452,15 +747,14 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -452,15 +797,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) { @@ -50260,16 +50825,18 @@ index 4b2679eb38..6ca83cc21b 100644 pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused; - pkt->data = pkt->buf->data; + pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset; ++ pkt->flags = 0; if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) pkt->flags |= AV_PKT_FLAG_KEY; -@@ -475,31 +769,85 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) +@@ -475,31 +820,91 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) return 0; } -int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -+ const void *extdata, size_t extlen) ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, ++ const void *extdata, size_t extlen, ++ const int64_t timestamp) { int ret; @@ -50285,7 +50852,11 @@ index 4b2679eb38..6ca83cc21b 100644 + if (ret && ret != AVERROR(ENOMEM)) return ret; - v4l2_set_pts(out, pkt->pts); +- v4l2_set_pts(out, pkt->pts); ++ if (timestamp) ++ out->buf.timestamp = tv_from_int(timestamp); ++ else ++ v4l2_set_pts(out, pkt->pts); - if (pkt->flags & AV_PKT_FLAG_KEY) - out->flags = V4L2_BUF_FLAG_KEYFRAME; @@ -50295,14 +50866,14 @@ index 4b2679eb38..6ca83cc21b 100644 - return 0; + return ret; ++} ++ ++int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) ++{ ++ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); } -int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) -+int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) -+{ -+ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0); -+} -+ + +static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data) +{ @@ -50320,13 +50891,15 @@ index 4b2679eb38..6ca83cc21b 100644 + close(avbuf->drm_frame.objects[i].fd); + } + ++ av_buffer_unref(&avbuf->ref_buf); ++ + ff_weak_link_unref(&avbuf->context_wl); + + av_free(avbuf); +} + + -+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx) ++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem) { - V4L2Context *ctx = avbuf->context; int ret, i; @@ -50343,8 +50916,9 @@ index 4b2679eb38..6ca83cc21b 100644 + return AVERROR(ENOMEM); + } +- avbuf->buf.memory = V4L2_MEMORY_MMAP; + avbuf->context = ctx; - avbuf->buf.memory = V4L2_MEMORY_MMAP; ++ avbuf->buf.memory = mem; avbuf->buf.type = ctx->type; avbuf->buf.index = index; @@ -50357,7 +50931,7 @@ index 4b2679eb38..6ca83cc21b 100644 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->buf.length = VIDEO_MAX_PLANES; avbuf->buf.m.planes = avbuf->planes; -@@ -507,7 +855,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -507,7 +912,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf); if (ret < 0) @@ -50366,7 +50940,16 @@ index 4b2679eb38..6ca83cc21b 100644 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->num_planes = 0; -@@ -527,25 +875,33 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -520,6 +925,8 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) + avbuf->num_planes = 1; + + for (i = 0; i < avbuf->num_planes; i++) { ++ const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && ++ (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); + + avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? + ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline : +@@ -527,25 +934,29 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; @@ -50374,24 +50957,20 @@ index 4b2679eb38..6ca83cc21b 100644 - PROT_READ | PROT_WRITE, MAP_SHARED, - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); + -+ if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) || -+ !buf_to_m2mctx(avbuf)->output_drm) { ++ if (want_mmap) + avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, + PROT_READ | PROT_WRITE, MAP_SHARED, + buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); -+ } } else { avbuf->plane_info[i].length = avbuf->buf.length; - avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, - PROT_READ | PROT_WRITE, MAP_SHARED, - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); + -+ if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) || -+ !buf_to_m2mctx(avbuf)->output_drm) { ++ if (want_mmap) + avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, + PROT_READ | PROT_WRITE, MAP_SHARED, + buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); -+ } } - if (avbuf->plane_info[i].mm_addr == MAP_FAILED) @@ -50411,7 +50990,7 @@ index 4b2679eb38..6ca83cc21b 100644 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->buf.m.planes = avbuf->planes; avbuf->buf.length = avbuf->num_planes; -@@ -555,20 +911,51 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -555,20 +966,51 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) avbuf->buf.length = avbuf->planes[0].length; } @@ -50468,10 +51047,10 @@ index 4b2679eb38..6ca83cc21b 100644 return 0; } diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index 8dbc7fc104..7d5fadcd3d 100644 +index 8dbc7fc104..e64441ec9b 100644 --- a/libavcodec/v4l2_buffers.h +++ b/libavcodec/v4l2_buffers.h -@@ -27,25 +27,34 @@ +@@ -27,25 +27,38 @@ #include #include @@ -50508,10 +51087,14 @@ index 8dbc7fc104..7d5fadcd3d 100644 - atomic_uint context_refcount; + /* DRM descriptor */ + AVDRMFrameDescriptor drm_frame; ++ /* For DRM_PRIME encode - need to keep a ref to the source buffer till we ++ * are done ++ */ ++ AVBufferRef * ref_buf; /* keep track of the mmap address and mmap length */ struct V4L2Plane_info { -@@ -60,7 +69,6 @@ typedef struct V4L2Buffer { +@@ -60,7 +73,6 @@ typedef struct V4L2Buffer { struct v4l2_buffer buf; struct v4l2_plane planes[VIDEO_MAX_PLANES]; @@ -50519,27 +51102,50 @@ index 8dbc7fc104..7d5fadcd3d 100644 enum V4L2Buffer_status status; } V4L2Buffer; -@@ -98,6 +106,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); +@@ -98,6 +110,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); */ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -+ const void *extdata, size_t extlen); ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, ++ const void *extdata, size_t extlen, ++ const int64_t timestamp); + /** * Extracts the data from an AVFrame to a V4L2Buffer * -@@ -116,7 +127,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); +@@ -106,7 +122,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); + * + * @returns 0 in case of success, a negative AVERROR code otherwise + */ +-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); ++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts); + + /** + * Initializes a V4L2Buffer +@@ -116,7 +132,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); * * @returns 0 in case of success, a negative AVERROR code otherwise */ -int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); -+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx); ++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem); /** * Enqueues a V4L2Buffer +@@ -127,5 +143,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); + */ + int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf); + ++static inline void ++ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf) ++{ ++ avbuf->status = V4L2BUF_AVAILABLE; ++ av_buffer_unref(&avbuf->ref_buf); ++} ++ + + #endif // AVCODEC_V4L2_BUFFERS_H diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index ff1ea8e57b..c0d257e5d3 100644 +index ff1ea8e57b..0225f6ba64 100644 --- a/libavcodec/v4l2_context.c +++ b/libavcodec/v4l2_context.c @@ -27,11 +27,13 @@ @@ -50556,49 +51162,191 @@ index ff1ea8e57b..c0d257e5d3 100644 struct v4l2_format_update { uint32_t v4l2_fmt; -@@ -41,28 +43,18 @@ struct v4l2_format_update { +@@ -41,26 +43,168 @@ struct v4l2_format_update { int update_avfmt; }; -static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx) -+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) ++ ++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) { - return V4L2_TYPE_IS_OUTPUT(ctx->type) ? - container_of(ctx, V4L2m2mContext, output) : - container_of(ctx, V4L2m2mContext, capture); +- return V4L2_TYPE_IS_OUTPUT(ctx->type) ? +- container_of(ctx, V4L2m2mContext, output) : +- container_of(ctx, V4L2m2mContext, capture); ++ return (int64_t)n; } -static inline AVCodecContext *logger(V4L2Context *ctx) -+static inline AVCodecContext *logger(const V4L2Context *ctx) ++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) { - return ctx_to_m2mctx(ctx)->avctx; +- return ctx_to_m2mctx(ctx)->avctx; ++ return (unsigned int)pts; ++} ++ ++// FFmpeg requires us to propagate a number of vars from the coded pkt into ++// the decoded frame. The only thing that tracks like that in V4L2 stateful ++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no ++// guarantees about PTS being unique or specified for every frame so replace ++// the supplied PTS with a simple incrementing number and keep a circular ++// buffer of all the things we want preserved (including the original PTS) ++// indexed by the tracking no. ++static int64_t ++xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt) ++{ ++ int64_t track_pts; ++ ++ // Avoid 0 ++ if (++x->track_no == 0) ++ x->track_no = 1; ++ ++ track_pts = track_to_pts(avctx, x->track_no); ++ ++ av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ ++ .discard = 0, ++ .pending = 1, ++ .pkt_size = avpkt->size, ++ .pts = avpkt->pts, ++ .dts = avpkt->dts, ++ .reordered_opaque = avctx->reordered_opaque, ++ .pkt_pos = avpkt->pos, ++ .pkt_duration = avpkt->duration, ++ .track_pts = track_pts ++ }; ++ return track_pts; ++} ++ ++static int64_t ++xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame) ++{ ++ int64_t track_pts; ++ ++ // Avoid 0 ++ if (++x->track_no == 0) ++ x->track_no = 1; ++ ++ track_pts = track_to_pts(avctx, x->track_no); ++ ++ av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no); ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ ++ .discard = 0, ++ .pending = 1, ++ .pkt_size = 0, ++ .pts = frame->pts, ++ .dts = AV_NOPTS_VALUE, ++ .reordered_opaque = frame->reordered_opaque, ++ .pkt_pos = frame->pkt_pos, ++ .pkt_duration = frame->pkt_duration, ++ .track_pts = track_pts ++ }; ++ return track_pts; ++} ++ ++ ++// Returns -1 if we should discard the frame ++static int ++xlat_pts_frame_out(AVCodecContext *const avctx, ++ xlat_track_t * const x, ++ AVFrame *const frame) ++{ ++ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; ++ V4L2m2mTrackEl *const t = x->track_els + n; ++ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) ++ { ++ av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, ++ "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); ++ frame->pts = AV_NOPTS_VALUE; ++ frame->pkt_dts = AV_NOPTS_VALUE; ++ frame->reordered_opaque = x->last_opaque; ++ frame->pkt_pos = -1; ++ frame->pkt_duration = 0; ++ frame->pkt_size = -1; ++ } ++ else if (!t->discard) ++ { ++ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; ++ frame->pkt_dts = t->dts; ++ frame->reordered_opaque = t->reordered_opaque; ++ frame->pkt_pos = t->pkt_pos; ++ frame->pkt_duration = t->pkt_duration; ++ frame->pkt_size = t->pkt_size; ++ ++ x->last_opaque = x->track_els[n].reordered_opaque; ++ if (frame->pts != AV_NOPTS_VALUE) ++ x->last_pts = frame->pts; ++ t->pending = 0; ++ } ++ else ++ { ++ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); ++ return -1; ++ } ++ ++ av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", ++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); ++ return 0; ++} ++ ++// Returns -1 if we should discard the frame ++static int ++xlat_pts_pkt_out(AVCodecContext *const avctx, ++ xlat_track_t * const x, ++ AVPacket *const pkt) ++{ ++ unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE; ++ V4L2m2mTrackEl *const t = x->track_els + n; ++ if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts) ++ { ++ av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, ++ "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); ++ pkt->pts = AV_NOPTS_VALUE; ++ } ++ else if (!t->discard) ++ { ++ pkt->pts = t->pending ? t->pts : AV_NOPTS_VALUE; ++ ++ x->last_opaque = x->track_els[n].reordered_opaque; ++ if (pkt->pts != AV_NOPTS_VALUE) ++ x->last_pts = pkt->pts; ++ t->pending = 0; ++ } ++ else ++ { ++ av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); ++ return -1; ++ } ++ ++ // * Would like something much better than this...xlat(offset + out_count)? ++ pkt->dts = pkt->pts; ++ av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n", ++ pkt->pts, t->track_pts, n); ++ return 0; } -static inline unsigned int v4l2_get_width(struct v4l2_format *fmt) --{ -- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; --} -- --static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) --{ -- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; --} -- - static AVRational v4l2_get_sar(V4L2Context *ctx) ++ ++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) { - struct AVRational sar = { 0, 1 }; -@@ -81,21 +73,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx) +- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; ++ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? ++ container_of(ctx, V4L2m2mContext, output) : ++ container_of(ctx, V4L2m2mContext, capture); + } + +-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) ++static inline AVCodecContext *logger(const V4L2Context *ctx) + { +- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; ++ return ctx_to_m2mctx(ctx)->avctx; + } + + static AVRational v4l2_get_sar(V4L2Context *ctx) +@@ -81,21 +225,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx) return sar; } -static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2) +static inline int ctx_buffers_alloced(const V4L2Context * const ctx) -+{ -+ return ctx->bufrefs != NULL; -+} -+ -+// Width/Height changed or we don't have an alloc in the first place? -+static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) { - struct v4l2_format *fmt1 = &ctx->format; - int ret = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? @@ -50607,6 +51355,12 @@ index ff1ea8e57b..c0d257e5d3 100644 - : - fmt1->fmt.pix.width != fmt2->fmt.pix.width || - fmt1->fmt.pix.height != fmt2->fmt.pix.height; ++ return ctx->bufrefs != NULL; ++} ++ ++// Width/Height changed or we don't have an alloc in the first place? ++static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) ++{ + const struct v4l2_format *fmt1 = &ctx->format; + int ret = !ctx_buffers_alloced(ctx) || + (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? @@ -50628,7 +51382,7 @@ index ff1ea8e57b..c0d257e5d3 100644 return ret; } -@@ -153,90 +153,110 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd +@@ -153,90 +305,110 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd } } @@ -50753,16 +51507,16 @@ index ff1ea8e57b..c0d257e5d3 100644 if (ret) { - av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n"); + av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n"); - return AVERROR(EINVAL); - } ++ return AVERROR(EINVAL); ++ } + + if (s->capture.width > ff_v4l2_get_format_width(&s->capture.format) || + s->capture.height > ff_v4l2_get_format_height(&s->capture.format)) { + av_log(avctx, AV_LOG_ERROR, "Format post reinit too small: wanted %dx%d > got %dx%d\n", + s->capture.width, s->capture.height, + ff_v4l2_get_format_width(&s->capture.format), ff_v4l2_get_format_height(&s->capture.format)); -+ return AVERROR(EINVAL); -+ } + return AVERROR(EINVAL); + } + + // Update pixel format - should only actually do something on initial change + s->capture.av_pix_fmt = @@ -50793,7 +51547,7 @@ index ff1ea8e57b..c0d257e5d3 100644 return 1; } -@@ -280,171 +300,275 @@ static int v4l2_stop_encode(V4L2Context *ctx) +@@ -280,171 +452,277 @@ static int v4l2_stop_encode(V4L2Context *ctx) return 0; } @@ -50879,16 +51633,18 @@ index ff1ea8e57b..c0d257e5d3 100644 } - ctx->done = 1; - return NULL; -+ } + } + atomic_fetch_sub(&ctx->q_count, 1); + + avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; -+ avbuf->status = V4L2BUF_AVAILABLE; ++ ff_v4l2_buffer_set_avail(avbuf); + avbuf->buf = buf; + if (is_mp) { + memcpy(avbuf->planes, planes, sizeof(planes)); + avbuf->buf.m.planes = avbuf->planes; - } ++ } ++ // Done with any attached buffer ++ av_buffer_unref(&avbuf->ref_buf); -start: - if (V4L2_TYPE_IS_OUTPUT(ctx->type)) @@ -51191,7 +51947,7 @@ index ff1ea8e57b..c0d257e5d3 100644 } return NULL; -@@ -452,25 +576,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) +@@ -452,25 +730,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) static int v4l2_release_buffers(V4L2Context* ctx) { @@ -51221,16 +51977,16 @@ index ff1ea8e57b..c0d257e5d3 100644 + .type = ctx->type, + .count = 0, /* 0 -> unmap all buffers from the driver */ + }; -+ -+ while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) { -+ if (errno == EINTR) -+ continue; - for (j = 0; j < buffer->num_planes; j++) { - struct V4L2Plane_info *p = &buffer->plane_info[j]; - if (p->mm_addr && p->length) - if (munmap(p->mm_addr, p->length) < 0) - av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno))); ++ while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) { ++ if (errno == EINTR) ++ continue; ++ + ret = AVERROR(errno); + + av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n", @@ -51251,7 +52007,7 @@ index ff1ea8e57b..c0d257e5d3 100644 } static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt) -@@ -499,6 +643,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm +@@ -499,6 +797,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) { @@ -51260,7 +52016,7 @@ index ff1ea8e57b..c0d257e5d3 100644 enum AVPixelFormat pixfmt = ctx->av_pix_fmt; struct v4l2_fmtdesc fdesc; int ret; -@@ -517,6 +663,13 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) +@@ -517,6 +817,13 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) if (ret) return AVERROR(EINVAL); @@ -51274,7 +52030,7 @@ index ff1ea8e57b..c0d257e5d3 100644 pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO); ret = v4l2_try_raw_format(ctx, pixfmt); if (ret){ -@@ -569,18 +722,83 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p) +@@ -569,30 +876,99 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p) * *****************************************************************************/ @@ -51289,7 +52045,7 @@ index ff1ea8e57b..c0d257e5d3 100644 + for (i = 0; i < ctx->num_buffers; ++i) { + struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; + if (buf->status == V4L2BUF_IN_DRIVER) -+ buf->status = V4L2BUF_AVAILABLE; ++ ff_v4l2_buffer_set_avail(buf); + } + atomic_store(&ctx->q_count, 0); +} @@ -51349,6 +52105,8 @@ index ff1ea8e57b..c0d257e5d3 100644 + { + if (cmd == VIDIOC_STREAMOFF) + flush_all_buffers_status(ctx); ++ else ++ ctx->first_buf = 1; + + ctx->streamon = (cmd == VIDIOC_STREAMON); + av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name, @@ -51364,7 +52122,33 @@ index ff1ea8e57b..c0d257e5d3 100644 } int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) -@@ -608,7 +826,8 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) + { +- V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ V4L2m2mContext *const s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; ++ int64_t track_ts; + V4L2Buffer* avbuf; + int ret; + + if (!frame) { + ret = v4l2_stop_encode(ctx); + if (ret) +- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name); ++ av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name); + s->draining= 1; + return 0; + } +@@ -601,23 +977,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) + if (!avbuf) + return AVERROR(EAGAIN); + +- ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf); ++ track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame); ++ ++ ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts); + if (ret) + return ret; + return ff_v4l2_buffer_enqueue(avbuf); } @@ -51373,25 +52157,29 @@ index ff1ea8e57b..c0d257e5d3 100644 + const void * extdata, size_t extlen) { V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; V4L2Buffer* avbuf; -@@ -616,8 +835,9 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) + int ret; ++ int64_t track_ts; if (!pkt->size) { ret = v4l2_stop_decode(ctx); + // Log but otherwise ignore stop failure if (ret) - av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name); -+ av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); ++ av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); s->draining = 1; return 0; } -@@ -626,8 +846,11 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) +@@ -626,8 +1008,13 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) if (!avbuf) return AVERROR(EAGAIN); - ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf); - if (ret) -+ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen); ++ track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt); ++ ++ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts); + if (ret == AVERROR(ENOMEM)) + av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", + __func__, pkt->size, avbuf->planes[0].length); @@ -51399,9 +52187,12 @@ index ff1ea8e57b..c0d257e5d3 100644 return ret; return ff_v4l2_buffer_enqueue(avbuf); -@@ -636,19 +859,10 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) +@@ -635,42 +1022,36 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) + int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) { ++ V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; V4L2Buffer *avbuf; + int rv; @@ -51414,17 +52205,24 @@ index ff1ea8e57b..c0d257e5d3 100644 - if (!avbuf) { - if (ctx->done) - return AVERROR_EOF; -- ++ do { ++ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) ++ return rv; ++ if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0) ++ return rv; ++ } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0); + - return AVERROR(EAGAIN); - } -+ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) -+ return rv; - - return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); +- +- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); ++ return 0; } -@@ -656,19 +870,10 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) + int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) { ++ V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; V4L2Buffer *avbuf; + int rv; @@ -51440,12 +52238,19 @@ index ff1ea8e57b..c0d257e5d3 100644 - - return AVERROR(EAGAIN); - } -+ if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) -+ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC ++ do { ++ if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) ++ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC ++ if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0) ++ return rv; ++ } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0); - return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); +- return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); ++ return 0; } -@@ -702,78 +907,160 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) + + int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) +@@ -702,78 +1083,160 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) int ff_v4l2_context_set_format(V4L2Context* ctx) { @@ -51497,20 +52302,20 @@ index ff1ea8e57b..c0d257e5d3 100644 -int ff_v4l2_context_init(V4L2Context* ctx) + -+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers) ++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem) { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); + V4L2m2mContext * const s = ctx_to_m2mctx(ctx); struct v4l2_requestbuffers req; - int ret, i; -- ++ int ret; ++ int i; + - if (!v4l2_type_supported(ctx)) { - av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); - return AVERROR_PATCHWELCOME; - } -+ int ret; -+ int i; - +- - ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); - if (ret) - av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name); @@ -51518,8 +52323,9 @@ index ff1ea8e57b..c0d257e5d3 100644 memset(&req, 0, sizeof(req)); - req.count = ctx->num_buffers; +- req.memory = V4L2_MEMORY_MMAP; + req.count = req_buffers; - req.memory = V4L2_MEMORY_MMAP; ++ req.memory = mem; req.type = ctx->type; - ret = ioctl(s->fd, VIDIOC_REQBUFS, &req); - if (ret < 0) { @@ -51554,7 +52360,7 @@ index ff1ea8e57b..c0d257e5d3 100644 + } + + for (i = 0; i < ctx->num_buffers; i++) { -+ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx); ++ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem); + if (ret) { av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret)); - goto error; @@ -51580,7 +52386,8 @@ index ff1ea8e57b..c0d257e5d3 100644 + av_freep(&ctx->bufrefs); + return ret; +} -+ + +- av_freep(&ctx->buffers); +int ff_v4l2_context_init(V4L2Context* ctx) +{ + V4L2m2mContext * const s = ctx_to_m2mctx(ctx); @@ -51616,8 +52423,7 @@ index ff1ea8e57b..c0d257e5d3 100644 + if (ret < 0) + goto fail_unref_hwframes; + } - -- av_freep(&ctx->buffers); ++ + ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format); + if (ret) { + ret = AVERROR(errno); @@ -51625,7 +52431,7 @@ index ff1ea8e57b..c0d257e5d3 100644 + goto fail_unref_hwframes; + } + -+ ret = create_buffers(ctx, ctx->num_buffers); ++ ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); + if (ret < 0) + goto fail_unref_hwframes; + @@ -51638,7 +52444,7 @@ index ff1ea8e57b..c0d257e5d3 100644 return ret; } diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 22a9532444..a56216e990 100644 +index 22a9532444..267a629925 100644 --- a/libavcodec/v4l2_context.h +++ b/libavcodec/v4l2_context.h @@ -31,6 +31,7 @@ @@ -51649,7 +52455,7 @@ index 22a9532444..a56216e990 100644 #include "v4l2_buffers.h" typedef struct V4L2Context { -@@ -70,11 +71,18 @@ typedef struct V4L2Context { +@@ -70,28 +71,57 @@ typedef struct V4L2Context { */ int width, height; AVRational sample_aspect_ratio; @@ -51670,18 +52476,35 @@ index 22a9532444..a56216e990 100644 /** * Readonly after init. -@@ -92,6 +100,21 @@ typedef struct V4L2Context { + */ + int num_buffers; + ++ /** ++ * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF ++ */ ++ enum v4l2_memory buf_mem; ++ + /** + * Whether the stream has been started (VIDIOC_STREAMON has been sent). + */ + int streamon; + ++ /* 1st buffer after stream on */ ++ int first_buf; ++ + /** + * Either no more buffers available or an unrecoverable error was notified + * by the V4L2 kernel driver: once set the context has to be exited. */ int done; + int flag_last; + + /** -+ * PTS rescale not wanted -+ * If the PTS is just a dummy frame count then rescale is -+ * actively harmful ++ * If NZ then when Qing frame/pkt use this rather than the ++ * "real" PTS + */ -+ int no_pts_rescale; ++ uint64_t track_ts; + + AVBufferRef *frames_ref; + atomic_int q_count; @@ -51692,7 +52515,7 @@ index 22a9532444..a56216e990 100644 } V4L2Context; /** -@@ -156,7 +179,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); +@@ -156,7 +186,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); * @param[in] ctx The V4L2Context to dequeue from. * @param[inout] f The AVFrame to dequeue to. * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) @@ -51703,7 +52526,7 @@ index 22a9532444..a56216e990 100644 */ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); -@@ -170,7 +196,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); +@@ -170,7 +203,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); * @param[in] pkt A pointer to an AVPacket. * @return 0 in case of success, a negative error otherwise. */ @@ -51713,10 +52536,35 @@ index 22a9532444..a56216e990 100644 /** * Enqueues a buffer to a V4L2Context from an AVFrame diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index cdfd579810..f14ed0b708 100644 +index cdfd579810..77fe5fc4e3 100644 --- a/libavcodec/v4l2_m2m.c +++ b/libavcodec/v4l2_m2m.c -@@ -215,13 +215,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) +@@ -36,6 +36,14 @@ + #include "v4l2_fmt.h" + #include "v4l2_m2m.h" + ++static void ++xlat_init(xlat_track_t * const x) ++{ ++ memset(x, 0, sizeof(*x)); ++ x->last_pts = AV_NOPTS_VALUE; ++} ++ ++ + static inline int v4l2_splane_video(struct v4l2_capability *cap) + { + if (cap->capabilities & (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT) && +@@ -68,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) + + s->capture.done = s->output.done = 0; + s->capture.name = "capture"; ++ s->capture.buf_mem = V4L2_MEMORY_MMAP; + s->output.name = "output"; ++ s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; + atomic_init(&s->refcount, 0); + sem_init(&s->refsync, 0, 0); + +@@ -215,13 +225,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n"); /* 2. unmap the capture buffers (v4l2 and ffmpeg): @@ -51730,7 +52578,7 @@ index cdfd579810..f14ed0b708 100644 ff_v4l2_context_release(&s->capture); /* 3. get the new capture format */ -@@ -240,7 +234,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) +@@ -240,7 +244,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) /* 5. complete reinit */ s->draining = 0; @@ -51738,7 +52586,7 @@ index cdfd579810..f14ed0b708 100644 return 0; } -@@ -274,7 +267,6 @@ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *s) +@@ -274,7 +277,6 @@ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *s) /* start again now that we know the stream dimensions */ s->draining = 0; @@ -51746,7 +52594,7 @@ index cdfd579810..f14ed0b708 100644 ret = ff_v4l2_context_get_format(&s->output, 0); if (ret) { -@@ -328,10 +320,14 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context) +@@ -328,10 +330,14 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context) ff_v4l2_context_release(&s->capture); sem_destroy(&s->refsync); @@ -51762,7 +52610,7 @@ index cdfd579810..f14ed0b708 100644 av_free(s); } -@@ -344,6 +340,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) +@@ -344,6 +350,11 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) if (!s) return 0; @@ -51774,7 +52622,7 @@ index cdfd579810..f14ed0b708 100644 if (s->fd >= 0) { ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF); if (ret) -@@ -356,7 +357,14 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) +@@ -356,7 +367,14 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) ff_v4l2_context_release(&s->output); @@ -51789,8 +52637,59 @@ index cdfd579810..f14ed0b708 100644 av_buffer_unref(&priv->context_ref); return 0; +@@ -400,35 +418,38 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *priv) + return v4l2_configure_contexts(s); + } + +-int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s) ++int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps) + { +- *s = av_mallocz(sizeof(V4L2m2mContext)); +- if (!*s) ++ V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext)); ++ ++ *pps = NULL; ++ if (!s) + return AVERROR(ENOMEM); + +- priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext), ++ priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s), + &v4l2_m2m_destroy_context, NULL, 0); + if (!priv->context_ref) { +- av_freep(s); ++ av_free(s); + return AVERROR(ENOMEM); + } + + /* assign the context */ +- priv->context = *s; +- (*s)->priv = priv; ++ priv->context = s; ++ s->priv = priv; + + /* populate it */ +- priv->context->capture.num_buffers = priv->num_capture_buffers; +- priv->context->output.num_buffers = priv->num_output_buffers; +- priv->context->self_ref = priv->context_ref; +- priv->context->fd = -1; ++ s->capture.num_buffers = priv->num_capture_buffers; ++ s->output.num_buffers = priv->num_output_buffers; ++ s->self_ref = priv->context_ref; ++ s->fd = -1; ++ xlat_init(&s->xlat); + + priv->context->frame = av_frame_alloc(); + if (!priv->context->frame) { + av_buffer_unref(&priv->context_ref); +- *s = NULL; /* freed when unreferencing context_ref */ + return AVERROR(ENOMEM); + } + ++ *pps = s; + return 0; + } diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index b67b216331..19d618698d 100644 +index b67b216331..ee72beb052 100644 --- a/libavcodec/v4l2_m2m.h +++ b/libavcodec/v4l2_m2m.h @@ -30,6 +30,7 @@ @@ -51801,7 +52700,7 @@ index b67b216331..19d618698d 100644 #include "v4l2_context.h" #define container_of(ptr, type, member) ({ \ -@@ -38,7 +39,38 @@ +@@ -38,7 +39,37 @@ #define V4L_M2M_DEFAULT_OPTS \ { "num_output_buffers", "Number of buffers in the output context",\ @@ -51834,14 +52733,13 @@ index b67b216331..19d618698d 100644 +typedef struct xlat_track_s { + unsigned int track_no; + int64_t last_pts; -+ int64_t last_pkt_dts; + int64_t last_opaque; + V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; +} xlat_track_t; typedef struct V4L2m2mContext { char devname[PATH_MAX]; -@@ -52,7 +84,6 @@ typedef struct V4L2m2mContext { +@@ -52,7 +83,6 @@ typedef struct V4L2m2mContext { AVCodecContext *avctx; sem_t refsync; atomic_uint refcount; @@ -51849,7 +52747,7 @@ index b67b216331..19d618698d 100644 /* null frame/packet received */ int draining; -@@ -66,6 +97,33 @@ typedef struct V4L2m2mContext { +@@ -66,6 +96,36 @@ typedef struct V4L2m2mContext { /* reference back to V4L2m2mPriv */ void *priv; @@ -51859,6 +52757,9 @@ index b67b216331..19d618698d 100644 + /* generate DRM frames */ + int output_drm; + ++ /* input frames are drmprime */ ++ int input_drm; ++ + /* Frame tracking */ + xlat_track_t xlat; + int pending_hw; @@ -51883,7 +52784,7 @@ index b67b216331..19d618698d 100644 } V4L2m2mContext; typedef struct V4L2m2mPriv { -@@ -76,6 +134,7 @@ typedef struct V4L2m2mPriv { +@@ -76,6 +136,7 @@ typedef struct V4L2m2mPriv { int num_output_buffers; int num_capture_buffers; @@ -51891,7 +52792,7 @@ index b67b216331..19d618698d 100644 } V4L2m2mPriv; /** -@@ -129,4 +188,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx); +@@ -129,4 +190,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx); */ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx); @@ -51919,7 +52820,7 @@ index b67b216331..19d618698d 100644 + #endif /* AVCODEC_V4L2_M2M_H */ diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index ab07c0a24a..dd383f31e5 100644 +index ab07c0a24a..545651e560 100644 --- a/libavcodec/v4l2_m2m_dec.c +++ b/libavcodec/v4l2_m2m_dec.c @@ -23,6 +23,10 @@ @@ -52092,94 +52993,16 @@ index ab07c0a24a..dd383f31e5 100644 return 0; } -@@ -133,58 +169,637 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) +@@ -133,58 +169,552 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) return 0; } -static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) -+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) -+{ -+ return (int64_t)n; -+} -+ -+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) -+{ -+ return (unsigned int)pts; -+} -+ -+// FFmpeg requires us to propagate a number of vars from the coded pkt into -+// the decoded frame. The only thing that tracks like that in V4L2 stateful -+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no -+// guarantees about PTS being unique or specified for every frame so replace -+// the supplied PTS with a simple incrementing number and keep a circular -+// buffer of all the things we want preserved (including the original PTS) -+// indexed by the tracking no. +static void -+xlat_pts_in(AVCodecContext *const avctx, xlat_track_t *const x, AVPacket *const avpkt) -+{ -+ int64_t track_pts; -+ -+ // Avoid 0 -+ if (++x->track_no == 0) -+ x->track_no = 1; -+ -+ track_pts = track_to_pts(avctx, x->track_no); -+ -+ av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); -+ x->last_pkt_dts = avpkt->dts; -+ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ -+ .discard = 0, -+ .pending = 1, -+ .pkt_size = avpkt->size, -+ .pts = avpkt->pts, -+ .dts = avpkt->dts, -+ .reordered_opaque = avctx->reordered_opaque, -+ .pkt_pos = avpkt->pos, -+ .pkt_duration = avpkt->duration, -+ .track_pts = track_pts -+ }; -+ avpkt->pts = track_pts; -+} -+ -+// Returns -1 if we should discard the frame -+static int -+xlat_pts_out(AVCodecContext *const avctx, -+ xlat_track_t * const x, ++set_best_effort_pts(AVCodecContext *const avctx, + pts_stats_t * const ps, + AVFrame *const frame) +{ -+ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; -+ V4L2m2mTrackEl *const t = x->track_els + n; -+ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) -+ { -+ av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -+ frame->pts = AV_NOPTS_VALUE; -+ frame->pkt_dts = x->last_pkt_dts; -+ frame->reordered_opaque = x->last_opaque; -+ frame->pkt_pos = -1; -+ frame->pkt_duration = 0; -+ frame->pkt_size = -1; -+ } -+ else if (!t->discard) -+ { -+ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; -+ frame->pkt_dts = x->last_pkt_dts; -+ frame->reordered_opaque = t->reordered_opaque; -+ frame->pkt_pos = t->pkt_pos; -+ frame->pkt_duration = t->pkt_duration; -+ frame->pkt_size = t->pkt_size; -+ -+ x->last_opaque = x->track_els[n].reordered_opaque; -+ if (frame->pts != AV_NOPTS_VALUE) -+ x->last_pts = frame->pts; -+ t->pending = 0; -+ } -+ else -+ { -+ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -+ return -1; -+ } -+ + pts_stats_add(ps, frame->pts); + +#if FF_API_PKT_PTS @@ -52188,10 +53011,15 @@ index ab07c0a24a..dd383f31e5 100644 +FF_ENABLE_DEPRECATION_WARNINGS +#endif + frame->best_effort_timestamp = pts_stats_guess(ps); -+ frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? -+ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", -+ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); -+ return 0; ++ // If we can't guess from just PTS - try DTS ++ if (frame->best_effort_timestamp == AV_NOPTS_VALUE) ++ frame->best_effort_timestamp = frame->pkt_dts; ++ ++ // We can't emulate what s/w does in a useful manner and using the ++ // "correct" answer seems to just confuse things. ++ frame->pkt_dts = frame->pts; ++ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", ++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts); +} + +static void @@ -52205,13 +53033,6 @@ index ab07c0a24a..dd383f31e5 100644 + x->last_pts = AV_NOPTS_VALUE; +} + -+static void -+xlat_init(xlat_track_t * const x) -+{ -+ memset(x, 0, sizeof(*x)); -+ x->last_pts = AV_NOPTS_VALUE; -+} -+ +static int +xlat_pending(const xlat_track_t * const x) +{ @@ -52358,38 +53179,36 @@ index ab07c0a24a..dd383f31e5 100644 + av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret); return ret; + } -+ -+ xlat_pts_in(avctx, &s->xlat, &s->buf_pkt); -+ } -+ + } + +- if (s->draining) +- goto dequeue; + if (s->draining) { + if (s->buf_pkt.size) { + av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n"); + av_packet_unref(&s->buf_pkt); + } + return NQ_DRAINING; - } - -- if (s->draining) -- goto dequeue; ++ } ++ + if (!s->buf_pkt.size) + return NQ_NONE; -+ -+ if ((ret = check_output_streamon(avctx, s)) != 0) -+ return ret; - ret = ff_v4l2_context_enqueue_packet(output, &s->buf_pkt); - if (ret < 0 && ret != AVERROR(EAGAIN)) - goto fail; ++ if ((ret = check_output_streamon(avctx, s)) != 0) ++ return ret; + +- /* if EAGAIN don't unref packet and try to enqueue in the next iteration */ +- if (ret != AVERROR(EAGAIN)) + if (s->extdata_sent) + ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); + else if (s->extdata_data) + ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); + else + ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, avctx->extradata, avctx->extradata_size); - -- /* if EAGAIN don't unref packet and try to enqueue in the next iteration */ -- if (ret != AVERROR(EAGAIN)) ++ + if (ret == AVERROR(EAGAIN)) { + // Out of input buffers - keep packet + ret = NQ_Q_FULL; @@ -52498,49 +53317,47 @@ index ab07c0a24a..dd383f31e5 100644 + prefer_dq ? 5 : + src_rv == NQ_Q_FULL ? -1 : 0; + -+ do { -+ // Dequeue frame will unref any previous contents of frame -+ // if it returns success so we don't need an explicit unref -+ // when discarding -+ // This returns AVERROR(EAGAIN) on timeout or if -+ // there is room in the input Q and timeout == -1 -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); ++ // Dequeue frame will unref any previous contents of frame ++ // if it returns success so we don't need an explicit unref ++ // when discarding ++ // This returns AVERROR(EAGAIN) on timeout or if ++ // there is room in the input Q and timeout == -1 ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); + -+ // Failure due to no buffer in Q? -+ if (dst_rv == AVERROR(ENOSPC)) { -+ // Wait & retry -+ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); -+ } ++ // Failure due to no buffer in Q? ++ if (dst_rv == AVERROR(ENOSPC)) { ++ // Wait & retry ++ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); + } ++ } + -+ // Adjust dynamic pending threshold -+ if (dst_rv == 0) { -+ if (--s->pending_hw < PENDING_HW_MIN) -+ s->pending_hw = PENDING_HW_MIN; ++ // Adjust dynamic pending threshold ++ if (dst_rv == 0) { ++ if (--s->pending_hw < PENDING_HW_MIN) ++ s->pending_hw = PENDING_HW_MIN; ++ s->pending_n = 0; ++ ++ set_best_effort_pts(avctx, &s->pts_stat, frame); ++ } ++ else if (dst_rv == AVERROR(EAGAIN)) { ++ if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) { ++ s->pending_hw = pending * 16 + PENDING_HW_OFFSET; + s->pending_n = 0; + } -+ else if (dst_rv == AVERROR(EAGAIN)) { -+ if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) { -+ s->pending_hw = pending * 16 + PENDING_HW_OFFSET; -+ s->pending_n = 0; -+ } -+ } ++ } + -+ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { -+ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); -+ dst_rv = AVERROR_EOF; -+ s->capture.done = 1; -+ } -+ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) -+ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", -+ s->draining, s->capture.done); -+ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) -+ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", -+ s->draining, s->capture.done, dst_rv); -+ -+ // Go again if we got a frame that we need to discard -+ } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame)); ++ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { ++ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); ++ dst_rv = AVERROR_EOF; ++ s->capture.done = 1; ++ } ++ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) ++ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", ++ s->draining, s->capture.done); ++ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) ++ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", ++ s->draining, s->capture.done, dst_rv); + } + + ++i; @@ -52747,14 +53564,13 @@ index ab07c0a24a..dd383f31e5 100644 if (ret < 0) return ret; -+ xlat_init(&s->xlat); + pts_stats_init(&s->pts_stat, avctx, "decoder"); + s->pending_hw = PENDING_HW_MIN; + capture = &s->capture; output = &s->output; -@@ -192,14 +807,53 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) +@@ -192,14 +722,51 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) * by the v4l2 driver; this event will trigger a full pipeline reconfig and * the proper values will be retrieved from the kernel driver. */ @@ -52768,12 +53584,10 @@ index ab07c0a24a..dd383f31e5 100644 output->av_codec_id = avctx->codec_id; output->av_pix_fmt = AV_PIX_FMT_NONE; + output->min_buf_size = max_coded_size(avctx); -+ output->no_pts_rescale = 1; capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; capture->av_pix_fmt = avctx->pix_fmt; + capture->min_buf_size = 0; -+ capture->no_pts_rescale = 1; + + /* the client requests the codec to generate DRM frames: + * - data[0] will therefore point to the returned AVDRMFrameDescriptor @@ -52810,7 +53624,7 @@ index ab07c0a24a..dd383f31e5 100644 s->avctx = avctx; ret = ff_v4l2_m2m_codec_init(priv); -@@ -208,12 +862,74 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) +@@ -208,12 +775,74 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) return ret; } @@ -52887,7 +53701,7 @@ index ab07c0a24a..dd383f31e5 100644 } #define OFFSET(x) offsetof(V4L2m2mPriv, x) -@@ -222,10 +938,16 @@ static av_cold int v4l2_decode_close(AVCodecContext *avctx) +@@ -222,10 +851,16 @@ static av_cold int v4l2_decode_close(AVCodecContext *avctx) static const AVOption options[] = { V4L_M2M_DEFAULT_OPTS, { "num_capture_buffers", "Number of buffers in the capture context", @@ -52905,7 +53719,7 @@ index ab07c0a24a..dd383f31e5 100644 #define M2MDEC_CLASS(NAME) \ static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \ .class_name = #NAME "_v4l2m2m_decoder", \ -@@ -246,9 +968,15 @@ static const AVOption options[] = { +@@ -246,9 +881,15 @@ static const AVOption options[] = { .init = v4l2_decode_init, \ .receive_frame = v4l2_receive_frame, \ .close = v4l2_decode_close, \ @@ -52921,6 +53735,367 @@ index ab07c0a24a..dd383f31e5 100644 .wrapper_name = "v4l2m2m", \ } +diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c +index f644b50133..3195ec729b 100644 +--- a/libavcodec/v4l2_m2m_enc.c ++++ b/libavcodec/v4l2_m2m_enc.c +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++ + #include "encode.h" + #include "libavcodec/avcodec.h" + #include "libavcodec/internal.h" +@@ -38,6 +40,34 @@ + #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x + #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x + ++// P030 should be defined in drm_fourcc.h and hopefully will be sometime ++// in the future but until then... ++#ifndef DRM_FORMAT_P030 ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') ++#endif ++ ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++ ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++ ++#ifndef V4L2_CID_CODEC_BASE ++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE ++#endif ++ ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in videodev2.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ + static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den) + { + struct v4l2_streamparm parm = { 0 }; +@@ -148,15 +178,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p) + static int v4l2_check_b_frame_support(V4L2m2mContext *s) + { + if (s->avctx->max_b_frames) +- av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n"); ++ av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames); + +- v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0); ++ v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1); + v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0); + if (s->avctx->max_b_frames == 0) + return 0; + + avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding"); +- + return AVERROR_PATCHWELCOME; + } + +@@ -271,13 +300,184 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s) + return 0; + } + ++static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame) ++{ ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ const uint32_t drm_fmt = src->layers[0].format; ++ // Treat INVALID as LINEAR ++ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? ++ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; ++ uint32_t pix_fmt = 0; ++ uint32_t w = 0; ++ uint32_t h = 0; ++ uint32_t bpl = src->layers[0].planes[0].pitch; ++ ++ // We really don't expect multiple layers ++ // All formats that we currently cope with are single object ++ ++ if (src->nb_layers != 1 || src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ switch (drm_fmt) { ++ case DRM_FORMAT_YUV420: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 3) ++ break; ++ pix_fmt = V4L2_PIX_FMT_YUV420; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ break; ++ ++ case DRM_FORMAT_NV12: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_COL128; ++ w = bpl; ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++ break; ++ ++ case DRM_FORMAT_P030: ++ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; ++ w = bpl / 2; // Matching lie to how we construct this ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!pix_fmt) ++ return AVERROR(EINVAL); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { ++ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->plane_fmt[0].bytesperline = bpl; ++ pix->num_planes = 1; ++ } ++ else { ++ struct v4l2_pix_format *const pix = &format->fmt.pix; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->bytesperline = bpl; ++ } ++ ++ return 0; ++} ++ ++// Do we have similar enough formats to be usable? ++static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b) ++{ ++ if (a->type != b->type) ++ return 0; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) { ++ const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp; ++ const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp; ++ unsigned int i; ++ if (pa->pixelformat != pb->pixelformat || ++ pa->num_planes != pb->num_planes) ++ return 0; ++ for (i = 0; i != pa->num_planes; ++i) { ++ if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline) ++ return 0; ++ } ++ } ++ else { ++ const struct v4l2_pix_format *const pa = &a->fmt.pix; ++ const struct v4l2_pix_format *const pb = &b->fmt.pix; ++ if (pa->pixelformat != pb->pixelformat || ++ pa->bytesperline != pb->bytesperline) ++ return 0; ++ } ++ return 1; ++} ++ ++ + static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) + { + V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; + V4L2Context *const output = &s->output; + ++ // Signal EOF if needed ++ if (!frame) { ++ return ff_v4l2_context_enqueue_frame(output, frame); ++ } ++ ++ if (s->input_drm && !output->streamon) { ++ int rv; ++ struct v4l2_format req_format = {.type = output->format.type}; ++ ++ // Set format when we first get a buffer ++ if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n"); ++ return rv; ++ } ++ ++ ff_v4l2_context_release(output); ++ ++ output->format = req_format; ++ ++ if ((rv = ff_v4l2_context_set_format(output)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n"); ++ return rv; ++ } ++ ++ if (!fmt_eq(&req_format, &output->format)) { ++ av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ output->selection.top = frame->crop_top; ++ output->selection.left = frame->crop_left; ++ output->selection.width = av_frame_cropped_width(frame); ++ output->selection.height = av_frame_cropped_height(frame); ++ ++ if ((rv = ff_v4l2_context_init(output)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n"); ++ return rv; ++ } ++ ++ { ++ struct v4l2_selection selection = { ++ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT, ++ .target = V4L2_SEL_TGT_CROP, ++ .r = output->selection ++ }; ++ if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n", ++ selection.r.width, selection.r.height, selection.r.left, selection.r.top, ++ av_err2str(AVERROR(errno))); ++ } ++ av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n", ++ selection.r.width, selection.r.height, selection.r.left, selection.r.top); ++ } ++ } ++ + #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME +- if (frame && frame->pict_type == AV_PICTURE_TYPE_I) ++ if (frame->pict_type == AV_PICTURE_TYPE_I) + v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1); + #endif + +@@ -328,7 +528,70 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) + } + + dequeue: +- return ff_v4l2_context_dequeue_packet(capture, avpkt); ++ if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0) ++ return ret; ++ ++ if (capture->first_buf == 1) { ++ uint8_t * data; ++ const int len = avpkt->size; ++ ++ // 1st buffer after streamon should be SPS/PPS ++ capture->first_buf = 2; ++ ++ // Clear both possible stores so there is no chance of confusion ++ av_freep(&s->extdata_data); ++ s->extdata_size = 0; ++ av_freep(&avctx->extradata); ++ avctx->extradata_size = 0; ++ ++ if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) != NULL) ++ memcpy(data, avpkt->data, len); ++ ++ av_packet_unref(avpkt); ++ ++ if (data == NULL) ++ return AVERROR(ENOMEM); ++ ++ // We need to copy the header, but keep local if not global ++ if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) { ++ avctx->extradata = data; ++ avctx->extradata_size = len; ++ } ++ else { ++ s->extdata_data = data; ++ s->extdata_size = len; ++ } ++ ++ if ((ret = ff_v4l2_context_dequeue_packet(capture, avpkt)) != 0) ++ return ret; ++ } ++ ++ // First frame must be key so mark as such even if encoder forgot ++ if (capture->first_buf == 2) ++ avpkt->flags |= AV_PKT_FLAG_KEY; ++ ++ // Add SPS/PPS to the start of every key frame if non-global headers ++ if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) { ++ const size_t newlen = s->extdata_size + avpkt->size; ++ AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE); ++ ++ if (buf == NULL) { ++ av_packet_unref(avpkt); ++ return AVERROR(ENOMEM); ++ } ++ ++ memcpy(buf->data, s->extdata_data, s->extdata_size); ++ memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size); ++ ++ av_buffer_unref(&avpkt->buf); ++ avpkt->buf = buf; ++ avpkt->data = buf->data; ++ avpkt->size = newlen; ++ } ++ ++// av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret); ++ capture->first_buf = 0; ++ return 0; + } + + static av_cold int v4l2_encode_init(AVCodecContext *avctx) +@@ -340,6 +603,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + uint32_t v4l2_fmt_output; + int ret; + ++ av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt); ++ + ret = ff_v4l2_m2m_create_context(priv, &s); + if (ret < 0) + return ret; +@@ -347,13 +612,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + capture = &s->capture; + output = &s->output; + ++ s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME); ++ + /* common settings output/capture */ + output->height = capture->height = avctx->height; + output->width = capture->width = avctx->width; + + /* output context */ + output->av_codec_id = AV_CODEC_ID_RAWVIDEO; +- output->av_pix_fmt = avctx->pix_fmt; ++ output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt : ++ avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt : ++ AV_PIX_FMT_YUV420P; + + /* capture context */ + capture->av_codec_id = avctx->codec_id; +@@ -372,7 +641,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + v4l2_fmt_output = output->format.fmt.pix.pixelformat; + + pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO); +- if (pix_fmt_output != avctx->pix_fmt) { ++ if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output); + av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name); + return AVERROR(EINVAL); diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c new file mode 100644 index 0000000000..5b3fb958fa @@ -53867,20 +55042,27 @@ index 0000000000..dcc8d95632 +#define HEVC_CTRLS_VERSION 3 +#include "v4l2_req_hevc_vx.c" + +diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c +new file mode 100644 +index 0000000000..c35579d8e0 +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v4.c +@@ -0,0 +1,3 @@ ++#define HEVC_CTRLS_VERSION 4 ++#include "v4l2_req_hevc_vx.c" ++ diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c new file mode 100644 -index 0000000000..55c41ae679 +index 0000000000..9ff5592e61 --- /dev/null +++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -0,0 +1,1228 @@ +@@ -0,0 +1,1365 @@ +// File included by v4l2_req_hevc_v* - not compiled on its own + +#include "decode.h" +#include "hevcdec.h" +#include "hwconfig.h" + -+#include "v4l2_request_hevc.h" -+ +#if HEVC_CTRLS_VERSION == 1 +#include "hevc-ctrls-v1.h" + @@ -53891,10 +55073,37 @@ index 0000000000..55c41ae679 +#include "hevc-ctrls-v2.h" +#elif HEVC_CTRLS_VERSION == 3 +#include "hevc-ctrls-v3.h" ++#elif HEVC_CTRLS_VERSION == 4 ++#include ++#if !defined(V4L2_CID_STATELESS_HEVC_SPS) ++#include "hevc-ctrls-v4.h" ++#endif +#else +#error Unknown HEVC_CTRLS_VERSION +#endif + ++#ifndef V4L2_CID_STATELESS_HEVC_SPS ++#define V4L2_CID_STATELESS_HEVC_SPS V4L2_CID_MPEG_VIDEO_HEVC_SPS ++#define V4L2_CID_STATELESS_HEVC_PPS V4L2_CID_MPEG_VIDEO_HEVC_PPS ++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS ++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX ++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS ++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE ++#define V4L2_CID_STATELESS_HEVC_START_CODE V4L2_CID_MPEG_VIDEO_HEVC_START_CODE ++ ++#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED ++#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED ++#define V4L2_STATELESS_HEVC_START_CODE_NONE V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE ++#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B ++#endif ++ ++// Should be in videodev2 but we might not have a good enough one ++#ifndef V4L2_PIX_FMT_HEVC_SLICE ++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++#endif ++ ++#include "v4l2_request_hevc.h" ++ +#include "libavutil/hwcontext_drm.h" + +#include @@ -53930,11 +55139,16 @@ index 0000000000..55c41ae679 + struct v4l2_ctrl_hevc_slice_params * slice_params; + struct slice_info * slices; + ++ size_t num_offsets; ++ size_t alloced_offsets; ++ uint32_t *offsets; ++ +} V4L2MediaReqDescriptor; + +struct slice_info { + const uint8_t * ptr; + size_t len; // bytes ++ size_t n_offsets; +}; + +// Handy container for accumulating controls before setting @@ -54093,7 +55307,7 @@ index 0000000000..55c41ae679 + if (rd->num_slices >= rd->alloced_slices) { + struct v4l2_ctrl_hevc_slice_params * p2; + struct slice_info * s2; -+ size_t n2 = rd->num_slices == 0 ? 8 : rd->num_slices * 2; ++ size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2; + + p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2)); + if (p2 == NULL) @@ -54111,6 +55325,23 @@ index 0000000000..55c41ae679 + return 0; +} + ++static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets) ++{ ++ if (rd->num_offsets + n > rd->alloced_offsets) { ++ size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2; ++ void * p2; ++ while (rd->num_offsets + n > n2) ++ n2 *= 2; ++ if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL) ++ return AVERROR(ENOMEM); ++ rd->offsets = p2; ++ rd->alloced_offsets = n2; ++ } ++ for (size_t i = 0; i != n; ++i) ++ rd->offsets[rd->num_offsets++] = offsets[i] - 1; ++ return 0; ++} ++ +static unsigned int +fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries) +{ @@ -54132,9 +55363,13 @@ index 0000000000..55c41ae679 +#endif + entry->field_pic = frame->frame->interlaced_frame; + ++#if HEVC_CTRLS_VERSION <= 3 + /* TODO: Interleaved: Get the POC for each field. */ + entry->pic_order_cnt[0] = frame->poc; + entry->pic_order_cnt[1] = frame->poc; ++#else ++ entry->pic_order_cnt_val = frame->poc; ++#endif + } + } + return n; @@ -54160,8 +55395,11 @@ index 0000000000..55c41ae679 + + *slice_params = (struct v4l2_ctrl_hevc_slice_params) { + .bit_size = bit_size, ++#if HEVC_CTRLS_VERSION <= 3 + .data_bit_offset = bit_offset, -+ ++#else ++ .data_byte_offset = bit_offset / 8 + 1, ++#endif + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + .slice_segment_addr = sh->slice_segment_addr, + @@ -54244,6 +55482,7 @@ index 0000000000..55c41ae679 + fill_pred_table(h, &slice_params->pred_weight_table); + + slice_params->num_entry_point_offsets = sh->num_entry_point_offsets; ++#if HEVC_CTRLS_VERSION <= 3 + if (slice_params->num_entry_point_offsets > 256) { + slice_params->num_entry_point_offsets = 256; + av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); @@ -54251,6 +55490,7 @@ index 0000000000..55c41ae679 + + for (i = 0; i < slice_params->num_entry_point_offsets; i++) + slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; ++#endif +} + +#if HEVC_CTRLS_VERSION >= 2 @@ -54626,51 +55866,66 @@ index 0000000000..55c41ae679 +#if HEVC_CTRLS_VERSION >= 2 + struct v4l2_ctrl_hevc_decode_params * const dec, +#endif -+ struct v4l2_ctrl_hevc_slice_params * const slices, -+ const unsigned int slice_no, -+ const unsigned int slice_count) ++ struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count, ++ void * const offsets, const size_t offset_count) +{ + int rv; ++#if HEVC_CTRLS_VERSION >= 2 ++ unsigned int n = 3; ++#else ++ unsigned int n = 2; ++#endif + -+ struct v4l2_ext_control control[] = { ++ struct v4l2_ext_control control[6] = { + { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS, ++ .id = V4L2_CID_STATELESS_HEVC_SPS, + .ptr = &controls->sps, + .size = sizeof(controls->sps), + }, + { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS, ++ .id = V4L2_CID_STATELESS_HEVC_PPS, + .ptr = &controls->pps, + .size = sizeof(controls->pps), + }, +#if HEVC_CTRLS_VERSION >= 2 + { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS, ++ .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, + .ptr = dec, + .size = sizeof(*dec), + }, +#endif -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, -+ .ptr = slices + slice_no, -+ .size = sizeof(*slices) * slice_count, -+ }, -+ // Optional -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX, -+ .ptr = &controls->scaling_matrix, -+ .size = sizeof(controls->scaling_matrix), -+ }, + }; + -+ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, -+ controls->has_scaling ? -+ FF_ARRAY_ELEMS(control) : -+ FF_ARRAY_ELEMS(control) - 1); ++ if (slices) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, ++ .ptr = slices, ++ .size = sizeof(*slices) * slice_count, ++ }; ++ ++ if (controls->has_scaling) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, ++ .ptr = &controls->scaling_matrix, ++ .size = sizeof(controls->scaling_matrix), ++ }; ++ ++#if HEVC_CTRLS_VERSION >= 4 ++ if (offsets) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, ++ .ptr = offsets, ++ .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count, ++ }; ++#endif ++ ++ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n); + + return rv; +} + ++// This only works because we started out from a single coded frame buffer ++// that will remain intact until after end_frame +static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) +{ + const HEVCContext * const h = avctx->priv_data; @@ -54679,18 +55934,45 @@ index 0000000000..55c41ae679 + int bcount = get_bits_count(&h->HEVClc->gb); + uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount; + ++ const unsigned int n = rd->num_slices; ++ const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices; ++ + int rv; + struct slice_info * si; + ++ // This looks dodgy but we know that FFmpeg has parsed this from a buffer ++ // that contains the entire frame including the start code ++ if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { ++ buffer -= 3; ++ size += 3; ++ boff += 24; ++ if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) { ++ av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n", ++ buffer[0], buffer[1], buffer[2]); ++ } ++ } ++ ++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { ++ if (rd->slices == NULL) { ++ if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL) ++ return AVERROR(ENOMEM); ++ rd->slices->ptr = buffer; ++ rd->num_slices = 1; ++ } ++ rd->slices->len = buffer - rd->slices->ptr + size; ++ return 0; ++ } ++ + if ((rv = slice_add(rd)) != 0) + return rv; + -+ si = rd->slices + rd->num_slices - 1; ++ si = rd->slices + n; + si->ptr = buffer; + si->len = size; ++ si->n_offsets = rd->num_offsets; + -+ if (ctx->multi_slice && rd->num_slices > 1) { -+ struct slice_info *const si0 = rd->slices; ++ if (n != block_start) { ++ struct slice_info *const si0 = rd->slices + block_start; + const size_t offset = (buffer - si0->ptr); + boff += offset * 8; + size += offset; @@ -54698,12 +55980,15 @@ index 0000000000..55c41ae679 + } + +#if HEVC_CTRLS_VERSION >= 2 -+ if (rd->num_slices == 1) ++ if (n == 0) + fill_decode_params(h, &rd->dec); -+ fill_slice_params(h, &rd->dec, rd->slice_params + rd->num_slices - 1, size * 8, boff); ++ fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff); +#else -+ fill_slice_params(h, rd->slice_params + rd->num_slices - 1, size * 8, boff); ++ fill_slice_params(h, rd->slice_params + n, size * 8, boff); +#endif ++ if (ctx->max_offsets != 0 && ++ (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0) ++ return rv; + + return 0; +} @@ -54729,10 +56014,13 @@ index 0000000000..55c41ae679 +{ + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + ++ const int is_last = (j == rd->num_slices); + struct slice_info *const si = rd->slices + i; + struct media_request * req = NULL; + struct qent_src * src = NULL; + MediaBufsStatus stat; ++ void * offsets = rd->offsets + rd->slices[i].n_offsets; ++ size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets; + + if ((req = media_request_get(ctx->mpool)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__); @@ -54744,8 +56032,8 @@ index 0000000000..55c41ae679 +#if HEVC_CTRLS_VERSION >= 2 + &rd->dec, +#endif -+ rd->slice_params, -+ i, j - i)) { ++ rd->slice_params + i, j - i, ++ offsets, n_offsets)) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__); + goto fail1; + } @@ -54765,13 +56053,9 @@ index 0000000000..55c41ae679 + goto fail2; + } + -+#warning ANNEX_B start code -+// if (ctx->start_code == V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) { -+// } -+ + stat = mediabufs_start_request(ctx->mbufs, &req, &src, + i == 0 ? rd->qe_dst : NULL, -+ j == rd->num_slices); ++ is_last); + + if (stat != MEDIABUFS_STATUS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__); @@ -54836,18 +56120,11 @@ index 0000000000..55c41ae679 + } + + // Send as slices -+ if (ctx->multi_slice) -+ { -+ if ((rv = send_slice(avctx, rd, &rc, 0, rd->num_slices)) != 0) ++ for (i = 0; i < rd->num_slices; i += ctx->max_slices) { ++ const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices); ++ if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0) + goto fail; + } -+ else -+ { -+ for (i = 0; i != rd->num_slices; ++i) { -+ if ((rv = send_slice(avctx, rd, &rc, i, i + 1)) != 0) -+ goto fail; -+ } -+ } + + // Set the drm_prime desriptor + drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs)); @@ -54862,6 +56139,12 @@ index 0000000000..55c41ae679 + return rv; +} + ++static inline int ++ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) ++{ ++ return v >= c->minimum && v <= c->maximum; ++} ++ +// Initial check & init +static int +probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) @@ -54873,17 +56156,19 @@ index 0000000000..55c41ae679 + + // Check for var slice array + struct v4l2_query_ext_ctrl qc[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX }, ++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_SPS }, ++ { .id = V4L2_CID_STATELESS_HEVC_PPS }, ++ { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX }, +#if HEVC_CTRLS_VERSION >= 2 -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS }, +#endif + }; + // Order & size must match! + static const size_t ctrl_sizes[] = { + sizeof(struct v4l2_ctrl_hevc_slice_params), ++ sizeof(int32_t), + sizeof(struct v4l2_ctrl_hevc_sps), + sizeof(struct v4l2_ctrl_hevc_pps), + sizeof(struct v4l2_ctrl_hevc_scaling_matrix), @@ -54901,11 +56186,22 @@ index 0000000000..55c41ae679 + return AVERROR(EINVAL); +#endif + -+ if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) { -+ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION); ++ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls); ++ i = 0; ++#if HEVC_CTRLS_VERSION >= 4 ++ // Skip slice check if no slice mode ++ if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ i = 1; ++#else ++ // Fail frame mode silently for anything prior to V4 ++ if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) + return AVERROR(EINVAL); -+ } -+ for (i = 0; i != noof_ctrls; ++i) { ++#endif ++ for (; i != noof_ctrls; ++i) { ++ if (qc[i].type == 0) { ++ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id); ++ return AVERROR(EINVAL); ++ } + if (ctrl_sizes[i] != (size_t)qc[i].elem_size) { + av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n", + HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size); @@ -54915,12 +56211,11 @@ index 0000000000..55c41ae679 + + fill_sps(&ctrl_sps, sps); + -+ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_MPEG_VIDEO_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { ++ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { + av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n"); + return AVERROR(EINVAL); + } + -+ ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0; + return 0; +} + @@ -54931,38 +56226,63 @@ index 0000000000..55c41ae679 + int ret; + + struct v4l2_query_ext_ctrl querys[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, }, ++#if HEVC_CTRLS_VERSION >= 4 ++ { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, }, ++#endif + }; + + struct v4l2_ext_control ctrls[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, + }; + + mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); + -+ ctx->decode_mode = querys[0].default_value; ++ ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) || ++ querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ? ++ 1 : querys[2].dims[0]; ++ av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices); + -+ if (ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED && -+ ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode); ++#if HEVC_CTRLS_VERSION >= 4 ++ ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ? ++ 0 : querys[3].dims[0]; ++ av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); ++#else ++ ctx->max_offsets = 0; ++#endif ++ ++ if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED || ++ querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) ++ ctx->decode_mode = querys[0].default_value; ++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)) ++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED; ++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED; ++ else { ++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__); + return AVERROR(EINVAL); + } + -+ ctx->start_code = querys[1].default_value; -+ if (ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE && -+ ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code); ++ if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE || ++ querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) ++ ctx->start_code = querys[1].default_value; ++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; ++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; ++ else { ++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); + return AVERROR(EINVAL); + } + -+ ctx->max_slices = querys[2].elems; -+ if (ctx->max_slices > MAX_SLICES) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported max slices, %d\n", __func__, ctx->max_slices); -+ return AVERROR(EINVAL); -+ } ++ // If we are in slice mode & START_CODE_NONE supported then pick that ++ // as it doesn't require the slightly dodgy look backwards in our raw buffer ++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && ++ ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; + + ctrls[0].value = ctx->decode_mode; + ctrls[1].value = ctx->start_code; @@ -54986,6 +56306,7 @@ index 0000000000..55c41ae679 + + av_freep(&rd->slices); + av_freep(&rd->slice_params); ++ av_freep(&rd->offsets); + + av_free(rd); +} @@ -57289,10 +58610,10 @@ index 0000000000..cb4bd164b4 + diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c new file mode 100644 -index 0000000000..0ae14db90b +index 0000000000..27b1b8dd6d --- /dev/null +++ b/libavcodec/v4l2_request_hevc.c -@@ -0,0 +1,311 @@ +@@ -0,0 +1,315 @@ +/* + * This file is part of FFmpeg. + * @@ -57504,7 +58825,11 @@ index 0000000000..0ae14db90b + goto fail4; + } + -+ if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { ++ if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) { ++ av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n"); ++ ctx->fns = &V2(ff_v4l2_req_hevc, 4); ++ } ++ else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { + av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n"); + ctx->fns = &V2(ff_v4l2_req_hevc, 3); + } @@ -57606,10 +58931,10 @@ index 0000000000..0ae14db90b +}; diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h new file mode 100644 -index 0000000000..b2cb8c8584 +index 0000000000..830444bf92 --- /dev/null +++ b/libavcodec/v4l2_request_hevc.h -@@ -0,0 +1,102 @@ +@@ -0,0 +1,101 @@ +#ifndef AVCODEC_V4L2_REQUEST_HEVC_H +#define AVCODEC_V4L2_REQUEST_HEVC_H + @@ -57657,8 +58982,6 @@ index 0000000000..b2cb8c8584 +#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 +#endif + -+#define MAX_SLICES 128 -+ +#define VCAT(name, version) name##_v##version +#define V2(n,v) VCAT(n, v) +#define V(n) V2(n, HEVC_CTRLS_VERSION) @@ -57675,10 +58998,10 @@ index 0000000000..b2cb8c8584 + + unsigned int timestamp; // ?? maybe uint64_t + -+ int multi_slice; + int decode_mode; + int start_code; -+ int max_slices; ++ unsigned int max_slices; // 0 => not wanted (frame mode) ++ unsigned int max_offsets; // 0 => not wanted + + req_decode_q decode_q; + @@ -57710,6 +59033,7 @@ index 0000000000..b2cb8c8584 +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3); ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4); + +#endif diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c @@ -61841,10 +63165,10 @@ index 5613813ba8..ab8bcfcf34 100644 + diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S new file mode 100644 -index 0000000000..cdcf71ee67 +index 0000000000..2f07d9674c --- /dev/null +++ b/libavutil/aarch64/rpi_sand_neon.S -@@ -0,0 +1,676 @@ +@@ -0,0 +1,781 @@ +/* +Copyright (c) 2021 Michael Eiler + @@ -62095,228 +63419,6 @@ index 0000000000..cdcf71ee67 + ret +endfunc + -+//void ff_rpi_sand30_lines_to_planar_y16( -+// uint8_t * dest, // [x0] -+// unsigned int dst_stride, // [w1] -> assumed to be equal to _w -+// const uint8_t * src, // [x2] -+// unsigned int src_stride1, // [w3] -> 128 -+// unsigned int src_stride2, // [w4] -+// unsigned int _x, // [w5] -+// unsigned int y, // [w6] -+// unsigned int _w, // [w7] -+// unsigned int h); // [sp, #0] -+ -+function ff_rpi_sand30_lines_to_planar_y16, export=1 -+ stp x19, x20, [sp, #-48]! -+ stp x21, x22, [sp, #16] -+ stp x23, x24, [sp, #32] -+ -+ // w6 = argument h -+ ldr w6, [sp, #48] -+ -+ // slice_inc = ((stride2 - 1) * stride1) -+ mov w5, w4 -+ sub w5, w5, #1 -+ lsl w5, w5, #7 -+ -+ // total number of bytes per row = (width / 3) * 4 -+ mov w8, w7 -+ mov w9, #3 -+ udiv w8, w8, w9 -+ lsl w8, w8, #2 -+ -+ // number of full 128 byte blocks to be processed -+ mov w9, #96 -+ udiv w9, w7, w9 // = (width * 4) / (3*128) = width/96 -+ -+ // w10 = number of full integers to process (4 bytes) -+ // w11 = remaning zero to two 10bit values still to copy over -+ mov w12, #96 -+ mul w12, w9, w12 -+ sub w12, w7, w12 // width - blocks*96 = remaining points per row -+ mov w11, #3 -+ udiv w10, w12, w11 // full integers to process = w12 / 3 -+ mul w11, w10, w11 // #integers *3 -+ sub w11, w12, w11 // remaining 0-2 points = remaining points - integers*3 -+ -+ // increase w9 by one if w10+w11 is not zero, and decrease the row count by one -+ // this is to efficiently copy incomplete blocks at the end of the rows -+ // the last row is handled explicitly to avoid writing out of bounds -+ add w22, w10, w11 -+ cmp w22, #0 -+ cset w22, ne // 1 iff w10+w11 not zero, 0 otherwise -+ add w9, w9, w22 -+ sub w6, w6, #1 -+ -+ // store the number of bytes in w20 which we copy too much for every row -+ // when the width of the frame is not a multiple of 96 (128bytes storing 96 10bit values) -+ mov w20, #96*2 -+ mul w20, w20, w9 -+ sub w20, w1, w20 -+ -+ mov w23, #0 // flag to check whether the last line had already been processed -+ -+ // bitmask to clear the uppper 6bits of the result values -+ mov x19, #0x03ff03ff03ff03ff -+ dup v22.2d, x19 -+ -+ // row counter = 0 -+ eor w12, w12, w12 -+row_loop_y16: -+ cmp w12, w6 // jump to row_loop_y16_fin if we processed all rows -+ bge row_loop_y16_fin -+ -+ mov x13, x2 // row src -+ eor w14, w14, w14 // full block counter -+block_loop_y16: -+ cmp w14, w9 -+ bge block_loop_y16_fin -+ -+ // load 64 bytes -+ ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x13], #64 -+ -+ // process v0 and v1 -+ xtn v16.4h, v0.4s -+ ushr v0.4s, v0.4s, #10 -+ xtn v17.4h, v0.4s -+ ushr v0.4s, v0.4s, #10 -+ xtn v18.4h, v0.4s -+ -+ xtn2 v16.8h, v1.4s -+ and v16.16b, v16.16b, v22.16b -+ ushr v1.4s, v1.4s, #10 -+ xtn2 v17.8h, v1.4s -+ and v17.16b, v17.16b, v22.16b -+ ushr v1.4s, v1.4s, #10 -+ xtn2 v18.8h, v1.4s -+ and v18.16b, v18.16b, v22.16b -+ -+ st3 { v16.8h, v17.8h, v18.8h }, [x0], #48 -+ -+ // process v2 and v3 -+ xtn v23.4h, v2.4s -+ ushr v2.4s, v2.4s, #10 -+ xtn v24.4h, v2.4s -+ ushr v2.4s, v2.4s, #10 -+ xtn v25.4h, v2.4s -+ -+ xtn2 v23.8h, v3.4s -+ and v23.16b, v23.16b, v22.16b -+ ushr v3.4s, v3.4s, #10 -+ xtn2 v24.8h, v3.4s -+ and v24.16b, v24.16b, v22.16b -+ ushr v3.4s, v3.4s, #10 -+ xtn2 v25.8h, v3.4s -+ and v25.16b, v25.16b, v22.16b -+ -+ st3 { v23.8h, v24.8h, v25.8h }, [x0], #48 -+ -+ // load the second half of the block -> 64 bytes into registers v4-v7 -+ ld1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x13], #64 -+ -+ // process v4 and v5 -+ xtn v16.4h, v4.4s -+ ushr v4.4s, v4.4s, #10 -+ xtn v17.4h, v4.4s -+ ushr v4.4s, v4.4s, #10 -+ xtn v18.4h, v4.4s -+ -+ xtn2 v16.8h, v5.4s -+ and v16.16b, v16.16b, v22.16b -+ ushr v5.4s, v5.4s, #10 -+ xtn2 v17.8h, v5.4s -+ and v17.16b, v17.16b, v22.16b -+ ushr v5.4s, v5.4s, #10 -+ xtn2 v18.8h, v5.4s -+ and v18.16b, v18.16b, v22.16b -+ -+ st3 { v16.8h, v17.8h, v18.8h }, [x0], #48 -+ -+ // v6 and v7 -+ xtn v23.4h, v6.4s -+ ushr v6.4s, v6.4s, #10 -+ xtn v24.4h, v6.4s -+ ushr v6.4s, v6.4s, #10 -+ xtn v25.4h, v6.4s -+ -+ xtn2 v23.8h, v7.4s -+ and v23.16b, v23.16b, v22.16b -+ ushr v7.4s, v7.4s, #10 -+ xtn2 v24.8h, v7.4s -+ and v24.16b, v24.16b, v22.16b -+ ushr v7.4s, v7.4s, #10 -+ xtn2 v25.8h, v7.4s -+ and v25.16b, v25.16b, v22.16b -+ -+ st3 { v23.8h, v24.8h, v25.8h }, [x0], #48 -+ -+ add x13, x13, x5 // row src += slice_inc -+ add w14, w14, #1 -+ b block_loop_y16 -+block_loop_y16_fin: -+ -+ -+ -+ -+ add x2, x2, #128 // src += stride1 (start of the next row) -+ add x0, x0, w20, sxtw // subtract the bytes we copied too much from dst -+ add w12, w12, #1 -+ b row_loop_y16 -+row_loop_y16_fin: -+ -+ // check whether we have incomplete blocks at the end of every row -+ // in that case decrease row block count by one -+ // change height back to it's original value (meaning increase it by 1) -+ // and jump back to another iteration of row_loop_y16 -+ -+ cmp w23, #1 -+ beq row_loop_y16_fin2 // don't continue here if we already processed the last row -+ add w6, w6, #1 // increase height to the original value -+ sub w9, w9, w22 // block count - 1 or 0, depending on the remaining bytes count -+ mov w23, #1 -+ b row_loop_y16 -+row_loop_y16_fin2: -+ -+ sub x0, x0, w20, sxtw // with the last row we didn't actually move the dst ptr to far ahead, therefore readd the diference -+ -+ // now we've got to handle the last block in the last row -+ eor w12, w12, w12 // w12 = 0 = counter -+integer_loop_y16: -+ cmp w12, w10 -+ bge integer_loop_y16_fin -+ ldr w14, [x13], #4 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ add w12, w12, #1 -+ b integer_loop_y16 -+integer_loop_y16_fin: -+ -+final_values_y16: -+ // remaining point count = w11 -+ ldr w14, [x13], #4 -+ cmp w11, #0 -+ beq final_values_y16_fin -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ cmp w11, #1 -+ beq final_values_y16_fin -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+final_values_y16_fin: -+ -+ ldp x23, x24, [sp, #32] -+ ldp x21, x22, [sp, #16] -+ ldp x19, x20, [sp], #48 -+ ret -+endfunc -+ +//void ff_rpi_sand30_lines_to_planar_c16( +// uint8_t * dst_u, // [x0] +// unsigned int dst_stride_u, // [w1] == _w*2 @@ -62521,12 +63623,339 @@ index 0000000000..cdcf71ee67 +// unsigned int _w, +// unsigned int h); + ++// void ff_rpi_sand30_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++// ++// Assumes that we are starting on a stripe boundary and that overreading ++// within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y16, export=1 ++ lsl w4, w4, #7 ++ sub w4, w4, #64 ++ sub w1, w1, w7, lsl #1 ++ uxtw x6, w6 ++ add x8, x2, x6, lsl #7 ++ ldr w6, [sp, #0] ++ ++10: ++ mov x2, x8 ++ mov w5, w7 ++1: ++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 ++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 ++ ++ subs w5, w5, #96 ++ ++ // v0, v1 ++ ++ shrn v18.4h, v0.4s, #14 ++ xtn v16.4h, v0.4s ++ shrn v17.4h, v0.4s, #10 ++ ++ shrn2 v18.8h, v1.4s, #14 ++ xtn2 v16.8h, v1.4s ++ shrn2 v17.8h, v1.4s, #10 ++ ++ ushr v18.8h, v18.8h, #6 ++ bic v16.8h, #0xfc, lsl #8 ++ bic v17.8h, #0xfc, lsl #8 ++ ++ // v2, v3 ++ ++ shrn v21.4h, v2.4s, #14 ++ xtn v19.4h, v2.4s ++ shrn v20.4h, v2.4s, #10 ++ ++ shrn2 v21.8h, v3.4s, #14 ++ xtn2 v19.8h, v3.4s ++ shrn2 v20.8h, v3.4s, #10 ++ ++ ushr v21.8h, v21.8h, #6 ++ bic v19.8h, #0xfc, lsl #8 ++ bic v20.8h, #0xfc, lsl #8 ++ ++ // v4, v5 ++ ++ shrn v24.4h, v4.4s, #14 ++ xtn v22.4h, v4.4s ++ shrn v23.4h, v4.4s, #10 ++ ++ shrn2 v24.8h, v5.4s, #14 ++ xtn2 v22.8h, v5.4s ++ shrn2 v23.8h, v5.4s, #10 ++ ++ ushr v24.8h, v24.8h, #6 ++ bic v22.8h, #0xfc, lsl #8 ++ bic v23.8h, #0xfc, lsl #8 ++ ++ // v6, v7 ++ ++ shrn v27.4h, v6.4s, #14 ++ xtn v25.4h, v6.4s ++ shrn v26.4h, v6.4s, #10 ++ ++ shrn2 v27.8h, v7.4s, #14 ++ xtn2 v25.8h, v7.4s ++ shrn2 v26.8h, v7.4s, #10 ++ ++ ushr v27.8h, v27.8h, #6 ++ bic v25.8h, #0xfc, lsl #8 ++ bic v26.8h, #0xfc, lsl #8 ++ ++ blt 2f ++ ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 ++ st3 {v22.8h, v23.8h, v24.8h}, [x0], #48 ++ st3 {v25.8h, v26.8h, v27.8h}, [x0], #48 ++ ++ bne 1b ++ ++11: ++ subs w6, w6, #1 ++ add x0, x0, w1, uxtw ++ add x8, x8, #128 ++ bne 10b ++ ++ ret ++ ++// Partial final write ++2: ++ cmp w5, #48-96 ++ blt 1f ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 ++ beq 11b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ sub w5, w5, #48 ++ mov v18.16b, v24.16b ++ mov v19.16b, v25.16b ++ mov v20.16b, v26.16b ++ mov v21.16b, v27.16b ++1: ++ cmp w5, #24-96 ++ blt 1f ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ beq 11b ++ mov v16.16b, v19.16b ++ mov v17.16b, v20.16b ++ sub w5, w5, #24 ++ mov v18.16b, v21.16b ++1: ++ cmp w5, #12-96 ++ blt 1f ++ st3 {v16.4h, v17.4h, v18.4h}, [x0], #24 ++ beq 11b ++ mov v16.2d[0], v16.2d[1] ++ sub w5, w5, #12 ++ mov v17.2d[0], v17.2d[1] ++ mov v18.2d[0], v18.2d[1] ++1: ++ cmp w5, #6-96 ++ blt 1f ++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 ++ st3 {v16.h, v17.h, v18.h}[1], [x0], #6 ++ beq 11b ++ mov v16.2s[0], v16.2s[1] ++ sub w5, w5, #6 ++ mov v17.2s[0], v17.2s[1] ++ mov v18.2s[0], v18.2s[1] ++1: ++ cmp w5, #3-96 ++ blt 1f ++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 ++ beq 11b ++ mov v16.4h[0], v16.4h[1] ++ sub w5, w5, #3 ++ mov v17.4h[0], v17.4h[1] ++1: ++ cmp w5, #2-96 ++ blt 1f ++ st2 {v16.h, v17.h}[0], [x0], #4 ++ b 11b ++1: ++ st1 {v16.h}[0], [x0], #2 ++ b 11b ++ ++endfunc ++ ++// void ff_rpi_sand30_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++// ++// Assumes that we are starting on a stripe boundary and that overreading ++// within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y8, export=1 ++ lsl w4, w4, #7 ++ sub w4, w4, #64 ++ sub w1, w1, w7 ++ uxtw x6, w6 ++ add x8, x2, x6, lsl #7 ++ ldr w6, [sp, #0] ++ ++10: ++ mov x2, x8 ++ mov w5, w7 ++1: ++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 ++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 ++ ++ subs w5, w5, #96 ++ ++ // v0, v1 ++ ++ shrn v18.4h, v0.4s, #16 ++ xtn v16.4h, v0.4s ++ shrn v17.4h, v0.4s, #12 ++ ++ shrn2 v18.8h, v1.4s, #16 ++ xtn2 v16.8h, v1.4s ++ shrn2 v17.8h, v1.4s, #12 ++ ++ shrn v18.8b, v18.8h, #6 ++ shrn v16.8b, v16.8h, #2 ++ xtn v17.8b, v17.8h ++ ++ // v2, v3 ++ ++ shrn v21.4h, v2.4s, #16 ++ xtn v19.4h, v2.4s ++ shrn v20.4h, v2.4s, #12 ++ ++ shrn2 v21.8h, v3.4s, #16 ++ xtn2 v19.8h, v3.4s ++ shrn2 v20.8h, v3.4s, #12 ++ ++ shrn2 v18.16b, v21.8h, #6 ++ shrn2 v16.16b, v19.8h, #2 ++ xtn2 v17.16b, v20.8h ++ ++ // v4, v5 ++ ++ shrn v24.4h, v4.4s, #16 ++ xtn v22.4h, v4.4s ++ shrn v23.4h, v4.4s, #12 ++ ++ shrn2 v24.8h, v5.4s, #16 ++ xtn2 v22.8h, v5.4s ++ shrn2 v23.8h, v5.4s, #12 ++ ++ shrn v21.8b, v24.8h, #6 ++ shrn v19.8b, v22.8h, #2 ++ xtn v20.8b, v23.8h ++ ++ // v6, v7 ++ ++ shrn v27.4h, v6.4s, #16 ++ xtn v25.4h, v6.4s ++ shrn v26.4h, v6.4s, #12 ++ ++ shrn2 v27.8h, v7.4s, #16 ++ xtn2 v25.8h, v7.4s ++ shrn2 v26.8h, v7.4s, #12 ++ ++ shrn2 v21.16b, v27.8h, #6 ++ shrn2 v19.16b, v25.8h, #2 ++ xtn2 v20.16b, v26.8h ++ ++ blt 2f ++ ++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 ++ st3 {v19.16b, v20.16b, v21.16b}, [x0], #48 ++ ++ bne 1b ++ ++11: ++ subs w6, w6, #1 ++ add x0, x0, w1, uxtw ++ add x8, x8, #128 ++ bne 10b ++ ++ ret ++ ++// Partial final write ++2: ++ cmp w5, #48-96 ++ blt 1f ++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 ++ beq 11b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ sub w5, w5, #48 ++ mov v18.16b, v24.16b ++1: ++ cmp w5, #24-96 ++ blt 1f ++ st3 {v16.8b, v17.8b, v18.8b}, [x0], #24 ++ beq 11b ++ mov v16.2d[0], v16.2d[1] ++ sub w5, w5, #24 ++ mov v17.2d[0], v17.2d[1] ++ mov v18.2d[0], v18.2d[1] ++1: ++ cmp w5, #12-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[2], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[3], [x0], #3 ++ beq 11b ++ mov v16.2s[0], v16.2s[1] ++ sub w5, w5, #12 ++ mov v17.2s[0], v17.2s[1] ++ mov v18.2s[0], v18.2s[1] ++1: ++ cmp w5, #6-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 ++ beq 11b ++ mov v16.4h[0], v16.4h[1] ++ sub w5, w5, #6 ++ mov v17.4h[0], v17.4h[1] ++ mov v18.4h[0], v18.4h[1] ++1: ++ cmp w5, #3-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ beq 11b ++ mov v16.8b[0], v16.8b[1] ++ sub w5, w5, #3 ++ mov v17.8b[0], v17.8b[1] ++1: ++ cmp w5, #2-96 ++ blt 1f ++ st2 {v16.b, v17.b}[0], [x0], #2 ++ b 11b ++1: ++ st1 {v16.b}[0], [x0], #1 ++ b 11b ++ ++endfunc ++ diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h new file mode 100644 -index 0000000000..b3aa481ea4 +index 0000000000..2a56135bc3 --- /dev/null +++ b/libavutil/aarch64/rpi_sand_neon.h -@@ -0,0 +1,55 @@ +@@ -0,0 +1,59 @@ +/* +Copyright (c) 2021 Michael Eiler + @@ -62578,6 +64007,10 @@ index 0000000000..b3aa481ea4 + uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1, + unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); + ++void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ +#ifdef __cplusplus +} +#endif @@ -62593,10 +64026,10 @@ index 5da44b0542..b74b7c4e2f 100644 + arm/rpi_sand_neon.o \ diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S new file mode 100644 -index 0000000000..80890fe985 +index 0000000000..60e697f681 --- /dev/null +++ b/libavutil/arm/rpi_sand_neon.S -@@ -0,0 +1,768 @@ +@@ -0,0 +1,925 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -62959,7 +64392,6 @@ index 0000000000..80890fe985 + ldr r6, [sp, #36] + ldr r7, [sp, #32] @ y + mov r12, #48 -+ vmov.u16 q15, #0x3ff + sub r3, #1 + lsl r3, #7 + sub r1, r1, r6, lsl #1 @@ -62975,37 +64407,33 @@ index 0000000000..80890fe985 + vldm r2!, {q10-q13} + add lr, #64 + -+ vshr.u32 q14, q10, #20 @ Cannot vshrn.u32 #20! ++ vshrn.u32 d4 , q10, #14 @ Cannot vshrn.u32 #20! + ands lr, #127 + vshrn.u32 d2, q10, #10 + vmovn.u32 d0, q10 -+ vmovn.u32 d4, q14 + -+ vshr.u32 q14, q11, #20 ++ vshrn.u32 d5, q11, #14 + it eq + addeq r2, r3 + vshrn.u32 d3, q11, #10 + vmovn.u32 d1, q11 -+ vmovn.u32 d5, q14 + + subs r5, #48 -+ vand q0, q15 -+ vand q1, q15 -+ vand q2, q15 ++ vshr.u16 q2, #6 ++ vbic.u16 q0, #0xfc00 ++ vbic.u16 q1, #0xfc00 + -+ vshr.u32 q14, q12, #20 ++ vshrn.u32 d20, q12, #14 + vshrn.u32 d18, q12, #10 + vmovn.u32 d16, q12 -+ vmovn.u32 d20, q14 + -+ vshr.u32 q14, q13, #20 ++ vshrn.u32 d21, q13, #14 + vshrn.u32 d19, q13, #10 + vmovn.u32 d17, q13 -+ vmovn.u32 d21, q14 + -+ vand q8, q15 -+ vand q9, q15 -+ vand q10, q15 ++ vshr.u16 q10, #6 ++ vbic.u16 q8, #0xfc00 ++ vbic.u16 q9 , #0xfc00 + blt 2f + + vst3.16 {d0, d2, d4}, [r0], r12 @@ -63098,7 +64526,6 @@ index 0000000000..80890fe985 + ldr r7, [sp, #48] + ldr r9, [sp, #52] + mov r12, #48 -+ vmov.u16 q15, #0x3ff + sub r8, #1 + lsl r8, #7 + add r5, r5, r7, lsl #7 @@ -63114,48 +64541,44 @@ index 0000000000..80890fe985 + add lr, #64 + + @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2 -+ vshr.u32 q14, q0, #20 -+ vshrn.u32 d16, q0, #10 ++ vshrn.u32 d20, q0, #14 + vmovn.u32 d18, q0 ++ vshrn.u32 d0, q0, #10 + ands lr, #127 -+ vmovn.u32 d20, q14 + -+ vshr.u32 q14, q1, #20 -+ vshrn.u32 d17, q1, #10 ++ vshrn.u32 d21, q1, #14 + vmovn.u32 d19, q1 -+ vmovn.u32 d21, q14 ++ vshrn.u32 d1, q1, #10 + -+ vshr.u32 q14, q2, #20 + vshrn.u32 d22, q2, #10 -+ vmovn.u32 d24, q2 -+ vmovn.u32 d26, q14 ++ vmovn.u32 d2, q2 ++ vshrn.u32 d4, q2, #14 + -+ vshr.u32 q14, q3, #20 -+ vshrn.u32 d23, q3, #10 -+ vmovn.u32 d25, q3 + add r10, r0, #24 -+ vmovn.u32 d27, q14 ++ vshrn.u32 d23, q3, #10 ++ vmovn.u32 d3, q3 ++ vshrn.u32 d5, q3, #14 + + it eq + addeq r4, r8 -+ vuzp.16 q8, q11 -+ vuzp.16 q9, q12 -+ vuzp.16 q10, q13 ++ vuzp.16 q0, q11 ++ vuzp.16 q9, q1 ++ vuzp.16 q10, q2 + -+ @ q8 V0, V3,.. -> q0 ++ @ q0 V0, V3,.. + @ q9 U0, U3... + @ q10 U1, U4... + @ q11 U2, U5,.. -+ @ q12 V1, V4,.. -> q1 -+ @ q13 V2, V5,.. -> q2 ++ @ q1 V1, V4, ++ @ q2 V2, V5,.. + + subs r6, #24 -+ vand q11, q15 -+ vand q9, q15 -+ vand q10, q15 -+ vand q0, q8, q15 -+ vand q1, q12, q15 -+ vand q2, q13, q15 ++ vbic.u16 q11, #0xfc00 ++ vbic.u16 q9, #0xfc00 ++ vshr.u16 q10, #6 ++ vshr.u16 q2, #6 ++ vbic.u16 q0, #0xfc00 ++ vbic.u16 q1, #0xfc00 + + blt 2f + @@ -63364,13 +64787,180 @@ index 0000000000..80890fe985 +endfunc + + ++@ void ff_rpi_sand30_lines_to_planar_y8( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y8, export=1 ++ push {r4-r8, lr} @ +24 ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ mov r12, #48 ++ lsl r3, #7 ++ sub r1, r1, r6 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++1: ++ vldm r2, {q8-q15} ++ ++ subs r5, #96 ++ ++ vmovn.u32 d0, q8 ++ vshrn.u32 d2, q8, #12 ++ vshrn.u32 d4, q8, #16 @ Cannot vshrn.u32 #20! ++ ++ add r2, r3 ++ ++ vmovn.u32 d1, q9 ++ vshrn.u32 d3, q9, #12 ++ vshrn.u32 d5, q9, #16 ++ ++ pld [r2, #0] ++ ++ vshrn.u16 d0, q0, #2 ++ vmovn.u16 d1, q1 ++ vshrn.u16 d2, q2, #6 ++ ++ vmovn.u32 d16, q10 ++ vshrn.u32 d18, q10, #12 ++ vshrn.u32 d20, q10, #16 ++ ++ vmovn.u32 d17, q11 ++ vshrn.u32 d19, q11, #12 ++ vshrn.u32 d21, q11, #16 ++ ++ pld [r2, #64] ++ ++ vshrn.u16 d4, q8, #2 ++ vmovn.u16 d5, q9 ++ vshrn.u16 d6, q10, #6 ++ ++ vmovn.u32 d16, q12 ++ vshrn.u32 d18, q12, #12 ++ vshrn.u32 d20, q12, #16 ++ ++ vmovn.u32 d17, q13 ++ vshrn.u32 d19, q13, #12 ++ vshrn.u32 d21, q13, #16 ++ ++ vshrn.u16 d16, q8, #2 ++ vmovn.u16 d17, q9 ++ vshrn.u16 d18, q10, #6 ++ ++ vmovn.u32 d20, q14 ++ vshrn.u32 d22, q14, #12 ++ vshrn.u32 d24, q14, #16 ++ ++ vmovn.u32 d21, q15 ++ vshrn.u32 d23, q15, #12 ++ vshrn.u32 d25, q15, #16 ++ ++ vshrn.u16 d20, q10, #2 ++ vmovn.u16 d21, q11 ++ vshrn.u16 d22, q12, #6 ++ ++ blt 2f ++ ++ vst3.8 {d0, d1, d2}, [r0], r12 ++ vst3.8 {d4, d5, d6}, [r4], r12 ++ vst3.8 {d16, d17, d18}, [r0], r12 ++ vst3.8 {d20, d21, d22}, [r4], r12 ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #48-96 ++ blt 1f ++ vst3.8 {d0, d1, d2}, [r0], r12 ++ vst3.8 {d4, d5, d6}, [r4], r12 ++ beq 11b ++ vmov q0, q8 ++ vmov q2, q10 ++ sub r5, #48 ++ vmov d2, d18 ++ vmov d6, d22 ++1: ++ cmp r5, #24-96 ++ blt 1f ++ vst3.8 {d0, d1, d2}, [r0]! ++ beq 11b ++ vmov q0, q2 ++ sub r5, #24 ++ vmov d2, d6 ++1: ++ cmp r5, #12-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! ++ vst3.8 {d0[2], d1[2], d2[2]}, [r0]! ++ vst3.8 {d0[3], d1[3], d2[3]}, [r0]! ++ beq 11b ++ vmov s0, s1 ++ sub r5, #12 ++ vmov s2, s3 ++ vmov s4, s5 ++1: ++ cmp r5, #6-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! ++ add r0, #12 ++ beq 11b ++ vshr.u32 d0, #16 ++ sub r5, #6 ++ vshr.u32 d1, #16 ++ vshr.u32 d2, #16 ++1: ++ cmp r5, #3-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ beq 11b ++ sub r5, #3 ++ vshr.u32 d0, #8 ++ vshr.u32 d1, #8 ++1: ++ cmp r5, #2-96 ++ blt 1f ++ vst2.8 {d0[0], d1[0]}, [r0]! ++ b 11b ++1: ++ vst1.8 {d0[0]}, [r0]! ++ b 11b ++ ++endfunc ++ + diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h new file mode 100644 -index 0000000000..447f367bea +index 0000000000..d457c10870 --- /dev/null +++ b/libavutil/arm/rpi_sand_neon.h -@@ -0,0 +1,99 @@ +@@ -0,0 +1,110 @@ +/* +Copyright (c) 2020 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -63468,6 +65058,17 @@ index 0000000000..447f367bea + unsigned int _w, // [sp, #12] -> r6 (cur r5) + unsigned int h); // [sp, #16] -> r7 + ++void ff_rpi_sand30_lines_to_planar_y8( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ +#endif // AVUTIL_ARM_SAND_NEON_H + diff --git a/libavutil/frame.c b/libavutil/frame.c @@ -63528,7 +65129,7 @@ index 7d1f8e2935..a4e7dc915d 100644 * @} */ diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c -index 7a9fdbd263..2a498f9b50 100644 +index 7a9fdbd263..2f825b7e16 100644 --- a/libavutil/hwcontext_drm.c +++ b/libavutil/hwcontext_drm.c @@ -21,6 +21,7 @@ @@ -63603,13 +65204,23 @@ index 7a9fdbd263..2a498f9b50 100644 err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src, &drm_unmap_frame, map); -@@ -212,7 +240,15 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, - if (!pix_fmts) +@@ -206,16 +234,29 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) + { +- enum AVPixelFormat *pix_fmts; ++ enum AVPixelFormat *p; + +- pix_fmts = av_malloc_array(2, sizeof(*pix_fmts)); +- if (!pix_fmts) ++ p = *formats = av_malloc_array(3, sizeof(*p)); ++ if (!p) return AVERROR(ENOMEM); - pix_fmts[0] = ctx->sw_format; +- pix_fmts[1] = AV_PIX_FMT_NONE; + // **** Offer native sand too ???? -+ pix_fmts[0] = ++ *p++ = +#if CONFIG_SAND + ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ? + AV_PIX_FMT_YUV420P : @@ -63617,10 +65228,19 @@ index 7a9fdbd263..2a498f9b50 100644 + AV_PIX_FMT_YUV420P10LE : +#endif + ctx->sw_format; - pix_fmts[1] = AV_PIX_FMT_NONE; ++ ++#if CONFIG_SAND ++ if (ctx->sw_format == AV_PIX_FMT_RPI4_10 || ++ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128) ++ *p++ = AV_PIX_FMT_NV12; ++#endif - *formats = pix_fmts; -@@ -231,18 +267,80 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, +- *formats = pix_fmts; ++ *p = AV_PIX_FMT_NONE; + return 0; + } + +@@ -231,18 +272,63 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, map = av_frame_alloc(); if (!map) return AVERROR(ENOMEM); @@ -63655,29 +65275,12 @@ index 7a9fdbd263..2a498f9b50 100644 + const unsigned int w = FFMIN(dst->width, map->width); + const unsigned int h = FFMIN(dst->height, map->height); + -+ if (map->format == AV_PIX_FMT_RPI4_8 && dst->format == AV_PIX_FMT_YUV420P) { -+ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], -+ map->data[0], -+ 128, stride2, -+ 0, 0, w, h); -+ av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ map->data[1], -+ 128, stride2, -+ 0, 0, w / 2, h / 2); -+ } -+ else if (map->format == AV_PIX_FMT_RPI4_10 && dst->format == AV_PIX_FMT_YUV420P10LE) { -+ av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], -+ map->data[0], -+ 128, stride2, -+ 0, 0, w, h); -+ av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ map->data[1], -+ 128, stride2, -+ 0, 0, w / 2, h / 2); -+ } -+ else ++ map->crop_top = 0; ++ map->crop_bottom = 0; ++ map->crop_left = 0; ++ map->crop_right = 0; ++ ++ if (av_rpi_sand_to_planar_frame(dst, map) != 0) + { + av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__); + err = AVERROR(EINVAL); @@ -63705,7 +65308,7 @@ index 7a9fdbd263..2a498f9b50 100644 err = 0; fail: -@@ -257,7 +355,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc, +@@ -257,7 +343,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc, int err; if (src->width > hwfc->width || src->height > hwfc->height) @@ -64011,10 +65614,10 @@ index 0000000000..0d5d203dc3 + diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c new file mode 100644 -index 0000000000..1f543e9357 +index 0000000000..b6071e2928 --- /dev/null +++ b/libavutil/rpi_sand_fns.c -@@ -0,0 +1,356 @@ +@@ -0,0 +1,445 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -64246,6 +65849,75 @@ index 0000000000..1f543e9357 + } +} + ++// Fetches a single patch - offscreen fixup not done here ++// w <= stride1 ++// single lose bottom 2 bits truncation ++// _x & _w in pixels, strides in bytes ++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word ++ const unsigned int xskip0 = _x - (x0 >> 2) * 3; ++ const unsigned int x1 = ((_x + _w) / 3) * 4; ++ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; ++ const unsigned int mask = stride1 - 1; ++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; ++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words ++ ++#if HAVE_SAND_ASM ++ if (_x == 0) { ++ ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if (x0 == x1) { ++ // ******************* ++ // Partial single word xfer ++ return; ++ } ++ ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) ++ { ++ unsigned int x = x0; ++ const uint32_t * p = (const uint32_t *)p0; ++ uint8_t * d = dst; ++ ++ if (xskip0 != 0) { ++ const uint32_t p3 = *p++; ++ ++ if (xskip0 == 1) ++ *d++ = (p3 >> 12) & 0xff; ++ *d++ = (p3 >> 22) & 0xff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ while (x != x1) { ++ const uint32_t p3 = *p++; ++ *d++ = (p3 >> 2) & 0xff; ++ *d++ = (p3 >> 12) & 0xff; ++ *d++ = (p3 >> 22) & 0xff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ if (xrem1 != 0) { ++ const uint32_t p3 = *p; ++ ++ *d++ = (p3 >> 2) & 0xff; ++ if (xrem1 == 2) ++ *d++ = (p3 >> 12) & 0xff; ++ } ++ } ++} ++ ++ + +// w/h in pixels +void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, @@ -64327,6 +65999,16 @@ index 0000000000..1f543e9357 + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x/2, y/2, w/2, h/2); + break; ++ case AV_PIX_FMT_NV12: ++ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w, h/2); ++ break; + default: + return -1; + } @@ -64361,6 +66043,16 @@ index 0000000000..1f543e9357 + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x/2, y/2, w/2, h/2); + break; ++ case AV_PIX_FMT_NV12: ++ av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w, h/2); ++ break; + default: + return -1; + } @@ -64373,10 +66065,10 @@ index 0000000000..1f543e9357 +} diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h new file mode 100644 -index 0000000000..634b55e800 +index 0000000000..462ccb8abd --- /dev/null +++ b/libavutil/rpi_sand_fns.h -@@ -0,0 +1,183 @@ +@@ -0,0 +1,188 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -64464,6 +66156,11 @@ index 0000000000..634b55e800 + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); + ++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); + +// w/h in pixels +void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, @@ -66194,3 +67891,644 @@ index 0000000000..5935a11ca5 + + do_logparse(args.logfile) + +diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile +index 1827a4e134..08da4166ef 100644 +--- a/tests/checkasm/Makefile ++++ b/tests/checkasm/Makefile +@@ -9,8 +9,10 @@ AVCODECOBJS-$(CONFIG_G722DSP) += g722dsp.o + AVCODECOBJS-$(CONFIG_H264DSP) += h264dsp.o + AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o + AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o ++AVCODECOBJS-$(CONFIG_IDCTDSP) += idctdsp.o + AVCODECOBJS-$(CONFIG_LLVIDDSP) += llviddsp.o + AVCODECOBJS-$(CONFIG_LLVIDENCDSP) += llviddspenc.o ++AVCODECOBJS-$(CONFIG_VC1DSP) += vc1dsp.o + AVCODECOBJS-$(CONFIG_VP8DSP) += vp8dsp.o + AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o + +diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c +index 8338e8ff58..81ef182f04 100644 +--- a/tests/checkasm/checkasm.c ++++ b/tests/checkasm/checkasm.c +@@ -131,6 +131,9 @@ static const struct { + #if CONFIG_HUFFYUV_DECODER + { "huffyuvdsp", checkasm_check_huffyuvdsp }, + #endif ++ #if CONFIG_IDCTDSP ++ { "idctdsp", checkasm_check_idctdsp }, ++ #endif + #if CONFIG_JPEG2000_DECODER + { "jpeg2000dsp", checkasm_check_jpeg2000dsp }, + #endif +@@ -155,6 +158,9 @@ static const struct { + #if CONFIG_V210_ENCODER + { "v210enc", checkasm_check_v210enc }, + #endif ++ #if CONFIG_VC1DSP ++ { "vc1dsp", checkasm_check_vc1dsp }, ++ #endif + #if CONFIG_VP8DSP + { "vp8dsp", checkasm_check_vp8dsp }, + #endif +diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h +index ef6645e3a2..1a1e17d835 100644 +--- a/tests/checkasm/checkasm.h ++++ b/tests/checkasm/checkasm.h +@@ -70,6 +70,7 @@ void checkasm_check_hevc_epel_bi(void); + void checkasm_check_hevc_epel_bi_w(void); + void checkasm_check_hevc_sao(void); + void checkasm_check_huffyuvdsp(void); ++void checkasm_check_idctdsp(void); + void checkasm_check_jpeg2000dsp(void); + void checkasm_check_llviddsp(void); + void checkasm_check_llviddspenc(void); +@@ -83,6 +84,7 @@ void checkasm_check_sw_scale(void); + void checkasm_check_utvideodsp(void); + void checkasm_check_v210dec(void); + void checkasm_check_v210enc(void); ++void checkasm_check_vc1dsp(void); + void checkasm_check_vf_eq(void); + void checkasm_check_vf_gblur(void); + void checkasm_check_vf_hflip(void); +diff --git a/tests/checkasm/idctdsp.c b/tests/checkasm/idctdsp.c +new file mode 100644 +index 0000000000..02724536a7 +--- /dev/null ++++ b/tests/checkasm/idctdsp.c +@@ -0,0 +1,98 @@ ++/* ++ * Copyright (c) 2022 Ben Avison ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with FFmpeg; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include ++ ++#include "checkasm.h" ++ ++#include "libavcodec/idctdsp.h" ++ ++#include "libavutil/common.h" ++#include "libavutil/internal.h" ++#include "libavutil/intreadwrite.h" ++#include "libavutil/mem_internal.h" ++ ++#define IDCTDSP_TEST(func) { #func, offsetof(IDCTDSPContext, func) }, ++ ++typedef struct { ++ const char *name; ++ size_t offset; ++} test; ++ ++#define RANDOMIZE_BUFFER16(name, size) \ ++ do { \ ++ int i; \ ++ for (i = 0; i < size; ++i) { \ ++ uint16_t r = rnd() % 0x201 - 0x100; \ ++ AV_WN16A(name##0 + i, r); \ ++ AV_WN16A(name##1 + i, r); \ ++ } \ ++ } while (0) ++ ++#define RANDOMIZE_BUFFER8(name, size) \ ++ do { \ ++ int i; \ ++ for (i = 0; i < size; ++i) { \ ++ uint8_t r = rnd(); \ ++ name##0[i] = r; \ ++ name##1[i] = r; \ ++ } \ ++ } while (0) ++ ++static void check_add_put_clamped(void) ++{ ++ /* Source buffers are only as big as needed, since any over-read won't affect results */ ++ LOCAL_ALIGNED_16(int16_t, src0, [64]); ++ LOCAL_ALIGNED_16(int16_t, src1, [64]); ++ /* Destination buffers have borders of one row above/below and 8 columns left/right to catch overflows */ ++ LOCAL_ALIGNED_8(uint8_t, dst0, [10 * 24]); ++ LOCAL_ALIGNED_8(uint8_t, dst1, [10 * 24]); ++ ++ AVCodecContext avctx = { 0 }; ++ IDCTDSPContext h; ++ ++ const test tests[] = { ++ IDCTDSP_TEST(add_pixels_clamped) ++ IDCTDSP_TEST(put_pixels_clamped) ++ IDCTDSP_TEST(put_signed_pixels_clamped) ++ }; ++ ++ ff_idctdsp_init(&h, &avctx); ++ ++ for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { ++ void (*func)(const int16_t *, uint8_t * ptrdiff_t) = *(void **)((intptr_t) &h + tests[t].offset); ++ if (check_func(func, "idctdsp.%s", tests[t].name)) { ++ declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *, uint8_t *, ptrdiff_t); ++ RANDOMIZE_BUFFER16(src, 64); ++ RANDOMIZE_BUFFER8(dst, 10 * 24); ++ call_ref(src0, dst0 + 24 + 8, 24); ++ call_new(src1, dst1 + 24 + 8, 24); ++ if (memcmp(dst0, dst1, 10 * 24)) ++ fail(); ++ bench_new(src1, dst1 + 24 + 8, 24); ++ } ++ } ++} ++ ++void checkasm_check_idctdsp(void) ++{ ++ check_add_put_clamped(); ++ report("idctdsp"); ++} +diff --git a/tests/checkasm/vc1dsp.c b/tests/checkasm/vc1dsp.c +new file mode 100644 +index 0000000000..52628d15e4 +--- /dev/null ++++ b/tests/checkasm/vc1dsp.c +@@ -0,0 +1,452 @@ ++/* ++ * Copyright (c) 2022 Ben Avison ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with FFmpeg; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include ++ ++#include "checkasm.h" ++ ++#include "libavcodec/vc1dsp.h" ++ ++#include "libavutil/common.h" ++#include "libavutil/internal.h" ++#include "libavutil/intreadwrite.h" ++#include "libavutil/mem_internal.h" ++ ++#define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) }, ++#define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height }, ++ ++typedef struct { ++ const char *name; ++ size_t offset; ++ int width; ++ int height; ++} test; ++ ++typedef struct matrix { ++ size_t width; ++ size_t height; ++ float d[]; ++} matrix; ++ ++static const matrix T8 = { 8, 8, { ++ 12, 12, 12, 12, 12, 12, 12, 12, ++ 16, 15, 9, 4, -4, -9, -15, -16, ++ 16, 6, -6, -16, -16, -6, 6, 16, ++ 15, -4, -16, -9, 9, 16, 4, -15, ++ 12, -12, -12, 12, 12, -12, -12, 12, ++ 9, -16, 4, 15, -15, -4, 16, -9, ++ 6, -16, 16, -6, -6, 16, -16, 6, ++ 4, -9, 15, -16, 16, -15, 9, -4 ++} }; ++ ++static const matrix T4 = { 4, 4, { ++ 17, 17, 17, 17, ++ 22, 10, -10, -22, ++ 17, -17, -17, 17, ++ 10, -22, 22, -10 ++} }; ++ ++static const matrix T8t = { 8, 8, { ++ 12, 16, 16, 15, 12, 9, 6, 4, ++ 12, 15, 6, -4, -12, -16, -16, -9, ++ 12, 9, -6, -16, -12, 4, 16, 15, ++ 12, 4, -16, -9, 12, 15, -6, -16, ++ 12, -4, -16, 9, 12, -15, -6, 16, ++ 12, -9, -6, 16, -12, -4, 16, -15, ++ 12, -15, 6, 4, -12, 16, -16, 9, ++ 12, -16, 16, -15, 12, -9, 6, -4 ++} }; ++ ++static const matrix T4t = { 4, 4, { ++ 17, 22, 17, 10, ++ 17, 10, -17, -22, ++ 17, -10, -17, 22, ++ 17, -22, 17, -10 ++} }; ++ ++static matrix *new_matrix(size_t width, size_t height) ++{ ++ matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float)); ++ if (out == NULL) { ++ fprintf(stderr, "Memory allocation failure\n"); ++ exit(EXIT_FAILURE); ++ } ++ out->width = width; ++ out->height = height; ++ return out; ++} ++ ++static matrix *multiply(const matrix *a, const matrix *b) ++{ ++ matrix *out; ++ if (a->width != b->height) { ++ fprintf(stderr, "Incompatible multiplication\n"); ++ exit(EXIT_FAILURE); ++ } ++ out = new_matrix(b->width, a->height); ++ for (int j = 0; j < out->height; ++j) ++ for (int i = 0; i < out->width; ++i) { ++ float sum = 0; ++ for (int k = 0; k < a->width; ++k) ++ sum += a->d[j * a->width + k] * b->d[k * b->width + i]; ++ out->d[j * out->width + i] = sum; ++ } ++ return out; ++} ++ ++static void normalise(matrix *a) ++{ ++ for (int j = 0; j < a->height; ++j) ++ for (int i = 0; i < a->width; ++i) { ++ float *p = a->d + j * a->width + i; ++ *p *= 64; ++ if (a->height == 4) ++ *p /= (const unsigned[]) { 289, 292, 289, 292 } [j]; ++ else ++ *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j]; ++ if (a->width == 4) ++ *p /= (const unsigned[]) { 289, 292, 289, 292 } [i]; ++ else ++ *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i]; ++ } ++} ++ ++static void divide_and_round_nearest(matrix *a, float by) ++{ ++ for (int j = 0; j < a->height; ++j) ++ for (int i = 0; i < a->width; ++i) { ++ float *p = a->d + j * a->width + i; ++ *p = rintf(*p / by); ++ } ++} ++ ++static void tweak(matrix *a) ++{ ++ for (int j = 4; j < a->height; ++j) ++ for (int i = 0; i < a->width; ++i) { ++ float *p = a->d + j * a->width + i; ++ *p += 1; ++ } ++} ++ ++/* The VC-1 spec places restrictions on the values permitted at three ++ * different stages: ++ * - D: the input coefficients in frequency domain ++ * - E: the intermediate coefficients, inverse-transformed only horizontally ++ * - R: the fully inverse-transformed coefficients ++ * ++ * To fully cater for the ranges specified requires various intermediate ++ * values to be held to 17-bit precision; yet these conditions do not appear ++ * to be utilised in real-world streams. At least some assembly ++ * implementations have chosen to restrict these values to 16-bit precision, ++ * to accelerate the decoding of real-world streams at the cost of strict ++ * adherence to the spec. To avoid our test marking these as failures, ++ * reduce our random inputs. ++ */ ++#define ATTENUATION 4 ++ ++static matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height) ++{ ++ matrix *raw, *tmp, *D, *E, *R; ++ raw = new_matrix(width, height); ++ for (int i = 0; i < width * height; ++i) ++ raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION; ++ tmp = multiply(height == 8 ? &T8 : &T4, raw); ++ D = multiply(tmp, width == 8 ? &T8t : &T4t); ++ normalise(D); ++ divide_and_round_nearest(D, 1); ++ for (int i = 0; i < width * height; ++i) { ++ if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) { ++ /* Rare, so simply try again */ ++ av_free(raw); ++ av_free(tmp); ++ av_free(D); ++ return generate_inverse_quantized_transform_coefficients(width, height); ++ } ++ } ++ E = multiply(D, width == 8 ? &T8 : &T4); ++ divide_and_round_nearest(E, 8); ++ for (int i = 0; i < width * height; ++i) ++ if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) { ++ /* Rare, so simply try again */ ++ av_free(raw); ++ av_free(tmp); ++ av_free(D); ++ av_free(E); ++ return generate_inverse_quantized_transform_coefficients(width, height); ++ } ++ R = multiply(height == 8 ? &T8t : &T4t, E); ++ tweak(R); ++ divide_and_round_nearest(R, 128); ++ for (int i = 0; i < width * height; ++i) ++ if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) { ++ /* Rare, so simply try again */ ++ av_free(raw); ++ av_free(tmp); ++ av_free(D); ++ av_free(E); ++ av_free(R); ++ return generate_inverse_quantized_transform_coefficients(width, height); ++ } ++ av_free(raw); ++ av_free(tmp); ++ av_free(E); ++ av_free(R); ++ return D; ++} ++ ++#define RANDOMIZE_BUFFER16(name, size) \ ++ do { \ ++ int i; \ ++ for (i = 0; i < size; ++i) { \ ++ uint16_t r = rnd(); \ ++ AV_WN16A(name##0 + i, r); \ ++ AV_WN16A(name##1 + i, r); \ ++ } \ ++ } while (0) ++ ++#define RANDOMIZE_BUFFER8(name, size) \ ++ do { \ ++ int i; \ ++ for (i = 0; i < size; ++i) { \ ++ uint8_t r = rnd(); \ ++ name##0[i] = r; \ ++ name##1[i] = r; \ ++ } \ ++ } while (0) ++ ++#define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size) \ ++ do { \ ++ uint8_t *p##0 = name##0, *p##1 = name##1; \ ++ int i = (size); \ ++ while (i-- > 0) { \ ++ int x = 0x80 | (rnd() & 0x7F); \ ++ x >>= rnd() % 9; \ ++ if (rnd() & 1) \ ++ x = -x; \ ++ *p##1++ = *p##0++ = 0x80 + x; \ ++ } \ ++ } while (0) ++ ++static void check_inv_trans_inplace(void) ++{ ++ /* Inverse transform input coefficients are stored in a 16-bit buffer ++ * with row stride of 8 coefficients irrespective of transform size. ++ * vc1_inv_trans_8x8 differs from the others in two ways: coefficients ++ * are stored in column-major order, and the outputs are written back ++ * to the input buffer, so we oversize it slightly to catch overruns. */ ++ LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]); ++ LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]); ++ ++ VC1DSPContext h; ++ ++ ff_vc1dsp_init(&h); ++ ++ if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) { ++ matrix *coeffs; ++ declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *); ++ RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8); ++ coeffs = generate_inverse_quantized_transform_coefficients(8, 8); ++ for (int j = 0; j < 8; ++j) ++ for (int i = 0; i < 8; ++i) { ++ int idx = 8 + i * 8 + j; ++ inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i]; ++ } ++ call_ref(inv_trans_in0 + 8); ++ call_new(inv_trans_in1 + 8); ++ if (memcmp(inv_trans_in0, inv_trans_in1, 10 * 8 * sizeof (int16_t))) ++ fail(); ++ bench_new(inv_trans_in1 + 8); ++ av_free(coeffs); ++ } ++} ++ ++static void check_inv_trans_adding(void) ++{ ++ /* Inverse transform input coefficients are stored in a 16-bit buffer ++ * with row stride of 8 coefficients irrespective of transform size. */ ++ LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]); ++ LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]); ++ ++ /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and ++ * added with saturation to an array of unsigned 8-bit values. Oversize ++ * this by 8 samples left and right and one row above and below. */ ++ LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]); ++ LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]); ++ ++ VC1DSPContext h; ++ ++ const test tests[] = { ++ VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4) ++ }; ++ ++ ff_vc1dsp_init(&h); ++ ++ for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { ++ void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset); ++ if (check_func(func, "vc1dsp.%s", tests[t].name)) { ++ matrix *coeffs; ++ declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *); ++ RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8); ++ RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24); ++ coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height); ++ for (int j = 0; j < tests[t].height; ++j) ++ for (int i = 0; i < tests[t].width; ++i) { ++ int idx = j * 8 + i; ++ inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i]; ++ } ++ call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0); ++ call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1); ++ if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24)) ++ fail(); ++ bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8); ++ av_free(coeffs); ++ } ++ } ++} ++ ++static void check_loop_filter(void) ++{ ++ /* Deblocking filter buffers are big enough to hold a 16x16 block, ++ * plus 16 columns left and 4 rows above to hold filter inputs ++ * (depending on whether v or h neighbouring block edge, oversized ++ * horizontally to maintain 16-byte alignment) plus 16 columns and ++ * 4 rows below to catch write overflows */ ++ LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]); ++ LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]); ++ ++ VC1DSPContext h; ++ ++ const test tests[] = { ++ VC1DSP_TEST(vc1_v_loop_filter4) ++ VC1DSP_TEST(vc1_h_loop_filter4) ++ VC1DSP_TEST(vc1_v_loop_filter8) ++ VC1DSP_TEST(vc1_h_loop_filter8) ++ VC1DSP_TEST(vc1_v_loop_filter16) ++ VC1DSP_TEST(vc1_h_loop_filter16) ++ }; ++ ++ ff_vc1dsp_init(&h); ++ ++ for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { ++ void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset); ++ declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int); ++ if (check_func(func, "vc1dsp.%s", tests[t].name)) { ++ for (int count = 1000; count > 0; --count) { ++ int pq = rnd() % 31 + 1; ++ RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48); ++ call_ref(filter_buf0 + 4 * 48 + 16, 48, pq); ++ call_new(filter_buf1 + 4 * 48 + 16, 48, pq); ++ if (memcmp(filter_buf0, filter_buf1, 24 * 48)) ++ fail(); ++ } ++ } ++ for (int j = 0; j < 24; ++j) ++ for (int i = 0; i < 48; ++i) ++ filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4); ++ if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name)) ++ bench_new(filter_buf1 + 4 * 48 + 16, 48, 1); ++ if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name)) ++ bench_new(filter_buf1 + 4 * 48 + 16, 48, 31); ++ } ++} ++ ++#define TEST_UNESCAPE \ ++ do { \ ++ for (int count = 100; count > 0; --count) { \ ++ escaped_offset = rnd() & 7; \ ++ unescaped_offset = rnd() & 7; \ ++ escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \ ++ RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \ ++ len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \ ++ len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \ ++ if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \ ++ fail(); \ ++ } \ ++ } while (0) ++ ++static void check_unescape(void) ++{ ++ /* This appears to be a typical length of buffer in use */ ++#define LOG2_UNESCAPE_BUF_SIZE 17 ++#define UNESCAPE_BUF_SIZE (1u<