diff --git a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch index 773f84a3cd..89bc51da80 100644 --- a/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch +++ b/packages/multimedia/ffmpeg/patches/rpi/ffmpeg-001-rpi.patch @@ -375,7 +375,7 @@ index 2eb4e1c973..ffbfa9accf 100644 "write program-readable progress information", "url" }, { "stdin", OPT_BOOL | OPT_EXPERT, { &stdin_interaction }, diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 5a6ea59715..e8631eaa4a 100644 +index 5a6ea59715..40249fe5c9 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -19,6 +19,7 @@ HEADERS = ac3_parser.h \ @@ -428,7 +428,7 @@ index 5a6ea59715..e8631eaa4a 100644 +OBJS-$(CONFIG_HEVC_RPI4_8_HWACCEL) += rpivid_hevc.o +OBJS-$(CONFIG_HEVC_RPI4_10_HWACCEL) += rpivid_hevc.o +OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL) += v4l2_request_hevc.o v4l2_req_decode_q.o\ -+ v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o ++ v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o v4l2_req_hevc_v3.o v4l2_req_hevc_v4.o OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL) += vaapi_hevc.o h265_profile_level.o OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL) += vdpau_hevc.o OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL) += nvdec_mjpeg.o @@ -465,19 +465,10 @@ index 5a6ea59715..e8631eaa4a 100644 +$(SUBDIR)rpi_hevcdec.o $(SUBDIR)rpi_shader_template.o $(SUBDIR)rpi_qpu.o: $(SUBDIR)rpi_hevc_shader.h +endif diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile -index f6434e40da..b48a1e4401 100644 +index f6434e40da..7fa6611e8f 100644 --- a/libavcodec/aarch64/Makefile +++ b/libavcodec/aarch64/Makefile -@@ -35,6 +35,8 @@ ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o - - # subsystems - NEON-OBJS-$(CONFIG_AAC_DECODER) += aarch64/sbrdsp_neon.o -+NEON-OBJS-$(CONFIG_BLOCKDSP) += aarch64/blockdsp_init_aarch64.o \ -+ aarch64/blockdsp_neon.o - NEON-OBJS-$(CONFIG_FFT) += aarch64/fft_neon.o - NEON-OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_neon.o - NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o -@@ -44,10 +46,12 @@ NEON-OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_neon.o +@@ -44,10 +44,12 @@ NEON-OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_neon.o NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \ aarch64/hpeldsp_neon.o NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o @@ -491,103 +482,6 @@ index f6434e40da..b48a1e4401 100644 NEON-OBJS-$(CONFIG_VP8DSP) += aarch64/vp8dsp_neon.o # decoders/encoders -diff --git a/libavcodec/aarch64/blockdsp_init_aarch64.c b/libavcodec/aarch64/blockdsp_init_aarch64.c -new file mode 100644 -index 0000000000..9f3280f007 ---- /dev/null -+++ b/libavcodec/aarch64/blockdsp_init_aarch64.c -@@ -0,0 +1,42 @@ -+/* -+ * AArch64 NEON optimised block operations -+ * -+ * Copyright (c) 2022 Ben Avison -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include -+ -+#include "libavutil/attributes.h" -+#include "libavutil/cpu.h" -+#include "libavutil/arm/cpu.h" -+#include "libavcodec/avcodec.h" -+#include "libavcodec/blockdsp.h" -+ -+void ff_clear_block_neon(int16_t *block); -+void ff_clear_blocks_neon(int16_t *blocks); -+ -+av_cold void ff_blockdsp_init_aarch64(BlockDSPContext *c) -+{ -+ int cpu_flags = av_get_cpu_flags(); -+ -+ if (have_neon(cpu_flags)) { -+ c->clear_block = ff_clear_block_neon; -+ c->clear_blocks = ff_clear_blocks_neon; -+ } -+} -diff --git a/libavcodec/aarch64/blockdsp_neon.S b/libavcodec/aarch64/blockdsp_neon.S -new file mode 100644 -index 0000000000..e4a4959ccc ---- /dev/null -+++ b/libavcodec/aarch64/blockdsp_neon.S -@@ -0,0 +1,43 @@ -+/* -+ * AArch64 NEON optimised block operations -+ * -+ * Copyright (c) 2022 Ben Avison -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "libavutil/aarch64/asm.S" -+ -+function ff_clear_block_neon, export=1 -+ movi v0.16b, #0 -+ movi v1.16b, #0 -+ st1 {v0.16b, v1.16b}, [x0], #32 -+ st1 {v0.16b, v1.16b}, [x0], #32 -+ st1 {v0.16b, v1.16b}, [x0], #32 -+ st1 {v0.16b, v1.16b}, [x0] -+ ret -+endfunc -+ -+function ff_clear_blocks_neon, export=1 -+ movi v0.16b, #0 -+ movi v1.16b, #0 -+ .rept 23 -+ st1 {v0.16b, v1.16b}, [x0], #32 -+ .endr -+ st1 {v0.16b, v1.16b}, [x0] -+ ret -+endfunc diff --git a/libavcodec/aarch64/idctdsp_init_aarch64.c b/libavcodec/aarch64/idctdsp_init_aarch64.c index 742a3372e3..eec21aa5a2 100644 --- a/libavcodec/aarch64/idctdsp_init_aarch64.c @@ -767,7 +661,7 @@ index 0000000000..7f47611206 + ret +endfunc diff --git a/libavcodec/aarch64/vc1dsp_init_aarch64.c b/libavcodec/aarch64/vc1dsp_init_aarch64.c -index 13dfd74940..161d5a972b 100644 +index 13dfd74940..a7976fd596 100644 --- a/libavcodec/aarch64/vc1dsp_init_aarch64.c +++ b/libavcodec/aarch64/vc1dsp_init_aarch64.c @@ -21,10 +21,28 @@ @@ -789,12 +683,12 @@ index 13dfd74940..161d5a972b 100644 +void ff_vc1_inv_trans_4x8_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); +void ff_vc1_inv_trans_4x4_dc_neon(uint8_t *dest, ptrdiff_t stride, int16_t *block); + -+void ff_vc1_v_loop_filter4_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_h_loop_filter4_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_v_loop_filter8_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_h_loop_filter8_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_v_loop_filter16_neon(uint8_t *src, int stride, int pq); -+void ff_vc1_h_loop_filter16_neon(uint8_t *src, int stride, int pq); ++void ff_vc1_v_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_h_loop_filter4_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_v_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_h_loop_filter8_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_v_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq); ++void ff_vc1_h_loop_filter16_neon(uint8_t *src, ptrdiff_t stride, int pq); + void ff_put_vc1_chroma_mc8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int h, int x, int y); @@ -892,10 +786,10 @@ index 13dfd74940..161d5a972b 100644 } diff --git a/libavcodec/aarch64/vc1dsp_neon.S b/libavcodec/aarch64/vc1dsp_neon.S new file mode 100644 -index 0000000000..529c21d285 +index 0000000000..9a96c2523c --- /dev/null +++ b/libavcodec/aarch64/vc1dsp_neon.S -@@ -0,0 +1,1552 @@ +@@ -0,0 +1,1546 @@ +/* + * VC1 AArch64 NEON optimisations + * @@ -1605,11 +1499,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 4 pixel pairs at boundary of vertically-neighbouring blocks +// On entry: +// x0 -> top-left pel of lower block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_v_loop_filter4_neon, export=1 + sub x3, x0, w1, sxtw #2 -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.s}[0], [x0], x1 // P5 + ld1 {v2.s}[0], [x3], x1 // P1 @@ -1678,11 +1571,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 4 pixel pairs at boundary of horizontally-neighbouring blocks +// On entry: +// x0 -> top-left pel of right block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_h_loop_filter4_neon, export=1 + sub x3, x0, #4 // where to start reading -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.8b}, [x3], x1 + sub x0, x0, #1 // where to start writing @@ -1752,11 +1644,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 8 pixel pairs at boundary of vertically-neighbouring blocks +// On entry: +// x0 -> top-left pel of lower block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_v_loop_filter8_neon, export=1 + sub x3, x0, w1, sxtw #2 -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.8b}, [x0], x1 // P5 + movi v2.2d, #0x0000ffff00000000 @@ -1830,11 +1721,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 8 pixel pairs at boundary of horizontally-neighbouring blocks +// On entry: +// x0 -> top-left pel of right block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_h_loop_filter8_neon, export=1 + sub x3, x0, #4 // where to start reading -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.8b}, [x3], x1 // P1[0], P2[0]... + sub x0, x0, #1 // where to start writing @@ -1939,11 +1829,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 16 pixel pairs at boundary of vertically-neighbouring blocks +// On entry: +// x0 -> top-left pel of lower block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_v_loop_filter16_neon, export=1 + sub x3, x0, w1, sxtw #2 -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.16b}, [x0], x1 // P5 + movi v2.2d, #0x0000ffff00000000 @@ -2071,11 +1960,10 @@ index 0000000000..529c21d285 +// VC-1 in-loop deblocking filter for 16 pixel pairs at boundary of horizontally-neighbouring blocks +// On entry: +// x0 -> top-left pel of right block -+// w1 = row stride, bytes ++// x1 = row stride, bytes +// w2 = PQUANT bitstream parameter +function ff_vc1_h_loop_filter16_neon, export=1 + sub x3, x0, #4 // where to start reading -+ sxtw x1, w1 // technically, stride is signed int + ldr d0, .Lcoeffs + ld1 {v1.8b}, [x3], x1 // P1[0], P2[0]... + sub x0, x0, #1 // where to start writing @@ -17404,7 +17292,7 @@ index 2cca784f5a..48cb816b70 100644 + dsp->vc1_unescape_buffer = vc1_unescape_buffer_neon; } diff --git a/libavcodec/arm/vc1dsp_neon.S b/libavcodec/arm/vc1dsp_neon.S -index 93f043bf08..8e97bc5e58 100644 +index 93f043bf08..96014fbebc 100644 --- a/libavcodec/arm/vc1dsp_neon.S +++ b/libavcodec/arm/vc1dsp_neon.S @@ -1161,3 +1161,764 @@ function ff_vc1_inv_trans_4x4_dc_neon, export=1 @@ -17560,17 +17448,17 @@ index 93f043bf08..8e97bc5e58 100644 +function ff_vc1_v_loop_filter8_neon, export=1 + sub r3, r0, r1, lsl #2 + vldr d0, .Lcoeffs -+ vld1.32 {d1}, [r0], r1 @ P5 -+ vld1.32 {d2}, [r3], r1 @ P1 -+ vld1.32 {d3}, [r3], r1 @ P2 -+ vld1.32 {d4}, [r0], r1 @ P6 -+ vld1.32 {d5}, [r3], r1 @ P3 -+ vld1.32 {d6}, [r0], r1 @ P7 ++ vld1.32 {d1}, [r0 :64], r1 @ P5 ++ vld1.32 {d2}, [r3 :64], r1 @ P1 ++ vld1.32 {d3}, [r3 :64], r1 @ P2 ++ vld1.32 {d4}, [r0 :64], r1 @ P6 ++ vld1.32 {d5}, [r3 :64], r1 @ P3 ++ vld1.32 {d6}, [r0 :64], r1 @ P7 + vshll.u8 q8, d1, #1 @ 2*P5 + vshll.u8 q9, d2, #1 @ 2*P1 -+ vld1.32 {d7}, [r3] @ P4 ++ vld1.32 {d7}, [r3 :64] @ P4 + vmovl.u8 q1, d3 @ P2 -+ vld1.32 {d20}, [r0] @ P8 ++ vld1.32 {d20}, [r0 :64] @ P8 + vmovl.u8 q11, d4 @ P6 + vdup.16 q12, r2 @ pq + vmovl.u8 q13, d5 @ P3 @@ -17625,8 +17513,8 @@ index 93f043bf08..8e97bc5e58 100644 + vmla.i16 q1, q0, q2 @ invert d depending on clip_sign & a0_sign, or zero it if they match, and accumulate into P5 + vqmovun.s16 d0, q3 + vqmovun.s16 d1, q1 -+ vst1.32 {d0}, [r3], r1 -+ vst1.32 {d1}, [r3] ++ vst1.32 {d0}, [r3 :64], r1 ++ vst1.32 {d1}, [r3 :64] +1: bx lr +endfunc + @@ -17741,17 +17629,17 @@ index 93f043bf08..8e97bc5e58 100644 + vpush {d8-d15} + sub r3, r0, r1, lsl #2 + vldr d0, .Lcoeffs -+ vld1.64 {q1}, [r0], r1 @ P5 -+ vld1.64 {q2}, [r3], r1 @ P1 -+ vld1.64 {q3}, [r3], r1 @ P2 -+ vld1.64 {q4}, [r0], r1 @ P6 -+ vld1.64 {q5}, [r3], r1 @ P3 -+ vld1.64 {q6}, [r0], r1 @ P7 ++ vld1.64 {q1}, [r0 :128], r1 @ P5 ++ vld1.64 {q2}, [r3 :128], r1 @ P1 ++ vld1.64 {q3}, [r3 :128], r1 @ P2 ++ vld1.64 {q4}, [r0 :128], r1 @ P6 ++ vld1.64 {q5}, [r3 :128], r1 @ P3 ++ vld1.64 {q6}, [r0 :128], r1 @ P7 + vshll.u8 q7, d2, #1 @ 2*P5[0..7] + vshll.u8 q8, d4, #1 @ 2*P1[0..7] -+ vld1.64 {q9}, [r3] @ P4 ++ vld1.64 {q9}, [r3 :128] @ P4 + vmovl.u8 q10, d6 @ P2[0..7] -+ vld1.64 {q11}, [r0] @ P8 ++ vld1.64 {q11}, [r0 :128] @ P8 + vmovl.u8 q12, d8 @ P6[0..7] + vdup.16 q13, r2 @ pq + vshll.u8 q2, d5, #1 @ 2*P1[8..15] @@ -17861,8 +17749,8 @@ index 93f043bf08..8e97bc5e58 100644 + vqmovun.s16 d0, q6 + vqmovun.s16 d5, q9 + vqmovun.s16 d1, q1 -+ vst1.64 {q2}, [r3], r1 -+ vst1.64 {q0}, [r3] ++ vst1.64 {q2}, [r3 :128], r1 ++ vst1.64 {q0}, [r3 :128] +1: vpop {d8-d15} + bx lr +endfunc @@ -18194,31 +18082,6 @@ index c91b2fd169..003079cdc6 100644 } AVHWAccel; /** -diff --git a/libavcodec/blockdsp.c b/libavcodec/blockdsp.c -index c7efe7e77b..46766244b8 100644 ---- a/libavcodec/blockdsp.c -+++ b/libavcodec/blockdsp.c -@@ -65,6 +65,8 @@ av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx) - c->fill_block_tab[0] = fill_block16_c; - c->fill_block_tab[1] = fill_block8_c; - -+ if (ARCH_AARCH64) -+ ff_blockdsp_init_aarch64(c); - if (ARCH_ALPHA) - ff_blockdsp_init_alpha(c); - if (ARCH_ARM) -diff --git a/libavcodec/blockdsp.h b/libavcodec/blockdsp.h -index 26fc2ea13b..fe539491da 100644 ---- a/libavcodec/blockdsp.h -+++ b/libavcodec/blockdsp.h -@@ -41,6 +41,7 @@ typedef struct BlockDSPContext { - - void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx); - -+void ff_blockdsp_init_aarch64(BlockDSPContext *c); - void ff_blockdsp_init_alpha(BlockDSPContext *c); - void ff_blockdsp_init_arm(BlockDSPContext *c); - void ff_blockdsp_init_ppc(BlockDSPContext *c); diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 1bf1c620d6..ccfa991f60 100644 --- a/libavcodec/cabac.h @@ -19020,6 +18883,536 @@ index 0000000000..4e35bd583d +#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (V4L2_CID_CODEC_HANTRO_BASE + 0) + +#endif +diff --git a/libavcodec/hevc-ctrls-v4.h b/libavcodec/hevc-ctrls-v4.h +new file mode 100644 +index 0000000000..c02fdbe5a8 +--- /dev/null ++++ b/libavcodec/hevc-ctrls-v4.h +@@ -0,0 +1,524 @@ ++/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */ ++/* ++ * Video for Linux Two controls header file ++ * ++ * Copyright (C) 1999-2012 the contributors ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * Alternatively you can redistribute this file under the terms of the ++ * BSD license as stated below: ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in ++ * the documentation and/or other materials provided with the ++ * distribution. ++ * 3. The names of its contributors may not be used to endorse or promote ++ * products derived from this software without specific prior written ++ * permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED ++ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ++ * The contents of this header was split off from videodev2.h. All control ++ * definitions should be added to this header, which is included by ++ * videodev2.h. ++ */ ++ ++#ifndef AVCODEC_HEVC_CTRLS_V4_H ++#define AVCODEC_HEVC_CTRLS_V4_H ++ ++#include ++#include ++ ++#ifndef V4L2_CTRL_CLASS_CODEC_STATELESS ++#define V4L2_CTRL_CLASS_CODEC_STATELESS 0x00a40000 /* Stateless codecs controls */ ++#endif ++#ifndef V4L2_CID_CODEC_STATELESS_BASE ++#define V4L2_CID_CODEC_STATELESS_BASE (V4L2_CTRL_CLASS_CODEC_STATELESS | 0x900) ++#endif ++ ++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++ ++#define V4L2_CID_STATELESS_HEVC_SPS (V4L2_CID_CODEC_STATELESS_BASE + 400) ++#define V4L2_CID_STATELESS_HEVC_PPS (V4L2_CID_CODEC_STATELESS_BASE + 401) ++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 402) ++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX (V4L2_CID_CODEC_STATELESS_BASE + 403) ++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS (V4L2_CID_CODEC_STATELESS_BASE + 404) ++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE (V4L2_CID_CODEC_STATELESS_BASE + 405) ++#define V4L2_CID_STATELESS_HEVC_START_CODE (V4L2_CID_CODEC_STATELESS_BASE + 406) ++#define V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS (V4L2_CID_CODEC_STATELESS_BASE + 407) ++ ++enum v4l2_stateless_hevc_decode_mode { ++ V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED, ++ V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED, ++}; ++ ++enum v4l2_stateless_hevc_start_code { ++ V4L2_STATELESS_HEVC_START_CODE_NONE, ++ V4L2_STATELESS_HEVC_START_CODE_ANNEX_B, ++}; ++ ++#define V4L2_HEVC_SLICE_TYPE_B 0 ++#define V4L2_HEVC_SLICE_TYPE_P 1 ++#define V4L2_HEVC_SLICE_TYPE_I 2 ++ ++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE (1ULL << 0) ++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED (1ULL << 1) ++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET (1ULL << 3) ++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED (1ULL << 4) ++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED (1ULL << 5) ++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT (1ULL << 6) ++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED (1ULL << 7) ++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED (1ULL << 8) ++ ++/** ++ * struct v4l2_ctrl_hevc_sps - ITU-T Rec. H.265: Sequence parameter set ++ * ++ * @video_parameter_set_id: specifies the value of the ++ * vps_video_parameter_set_id of the active VPS ++ * @seq_parameter_set_id: provides an identifier for the SPS for ++ * reference by other syntax elements ++ * @pic_width_in_luma_samples: specifies the width of each decoded picture ++ * in units of luma samples ++ * @pic_height_in_luma_samples: specifies the height of each decoded picture ++ * in units of luma samples ++ * @bit_depth_luma_minus8: this value plus 8specifies the bit depth of the ++ * samples of the luma array ++ * @bit_depth_chroma_minus8: this value plus 8 specifies the bit depth of the ++ * samples of the chroma arrays ++ * @log2_max_pic_order_cnt_lsb_minus4: this value plus 4 specifies the value of ++ * the variable MaxPicOrderCntLsb ++ * @sps_max_dec_pic_buffering_minus1: this value plus 1 specifies the maximum ++ * required size of the decoded picture ++ * buffer for the codec video sequence ++ * @sps_max_num_reorder_pics: indicates the maximum allowed number of pictures ++ * @sps_max_latency_increase_plus1: not equal to 0 is used to compute the ++ * value of SpsMaxLatencyPictures array ++ * @log2_min_luma_coding_block_size_minus3: plus 3 specifies the minimum ++ * luma coding block size ++ * @log2_diff_max_min_luma_coding_block_size: specifies the difference between ++ * the maximum and minimum luma ++ * coding block size ++ * @log2_min_luma_transform_block_size_minus2: plus 2 specifies the minimum luma ++ * transform block size ++ * @log2_diff_max_min_luma_transform_block_size: specifies the difference between ++ * the maximum and minimum luma ++ * transform block size ++ * @max_transform_hierarchy_depth_inter: specifies the maximum hierarchy ++ * depth for transform units of ++ * coding units coded in inter ++ * prediction mode ++ * @max_transform_hierarchy_depth_intra: specifies the maximum hierarchy ++ * depth for transform units of ++ * coding units coded in intra ++ * prediction mode ++ * @pcm_sample_bit_depth_luma_minus1: this value plus 1 specifies the number of ++ * bits used to represent each of PCM sample ++ * values of the luma component ++ * @pcm_sample_bit_depth_chroma_minus1: this value plus 1 specifies the number ++ * of bits used to represent each of PCM ++ * sample values of the chroma components ++ * @log2_min_pcm_luma_coding_block_size_minus3: this value plus 3 specifies the ++ * minimum size of coding blocks ++ * @log2_diff_max_min_pcm_luma_coding_block_size: specifies the difference between ++ * the maximum and minimum size of ++ * coding blocks ++ * @num_short_term_ref_pic_sets: specifies the number of st_ref_pic_set() ++ * syntax structures included in the SPS ++ * @num_long_term_ref_pics_sps: specifies the number of candidate long-term ++ * reference pictures that are specified in the SPS ++ * @chroma_format_idc: specifies the chroma sampling ++ * @sps_max_sub_layers_minus1: this value plus 1 specifies the maximum number ++ * of temporal sub-layers ++ * @reserved: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_SPS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_sps { ++ __u8 video_parameter_set_id; ++ __u8 seq_parameter_set_id; ++ __u16 pic_width_in_luma_samples; ++ __u16 pic_height_in_luma_samples; ++ __u8 bit_depth_luma_minus8; ++ __u8 bit_depth_chroma_minus8; ++ __u8 log2_max_pic_order_cnt_lsb_minus4; ++ __u8 sps_max_dec_pic_buffering_minus1; ++ __u8 sps_max_num_reorder_pics; ++ __u8 sps_max_latency_increase_plus1; ++ __u8 log2_min_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_luma_coding_block_size; ++ __u8 log2_min_luma_transform_block_size_minus2; ++ __u8 log2_diff_max_min_luma_transform_block_size; ++ __u8 max_transform_hierarchy_depth_inter; ++ __u8 max_transform_hierarchy_depth_intra; ++ __u8 pcm_sample_bit_depth_luma_minus1; ++ __u8 pcm_sample_bit_depth_chroma_minus1; ++ __u8 log2_min_pcm_luma_coding_block_size_minus3; ++ __u8 log2_diff_max_min_pcm_luma_coding_block_size; ++ __u8 num_short_term_ref_pic_sets; ++ __u8 num_long_term_ref_pics_sps; ++ __u8 chroma_format_idc; ++ __u8 sps_max_sub_layers_minus1; ++ ++ __u8 reserved[6]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED (1ULL << 0) ++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT (1ULL << 1) ++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED (1ULL << 2) ++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT (1ULL << 3) ++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED (1ULL << 4) ++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED (1ULL << 5) ++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED (1ULL << 6) ++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT (1ULL << 7) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED (1ULL << 8) ++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED (1ULL << 9) ++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED (1ULL << 10) ++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED (1ULL << 11) ++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED (1ULL << 12) ++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED (1ULL << 13) ++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED (1ULL << 15) ++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER (1ULL << 16) ++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT (1ULL << 17) ++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18) ++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT (1ULL << 19) ++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING (1ULL << 20) ++ ++/** ++ * struct v4l2_ctrl_hevc_pps - ITU-T Rec. H.265: Picture parameter set ++ * ++ * @pic_parameter_set_id: identifies the PPS for reference by other ++ * syntax elements ++ * @num_extra_slice_header_bits: specifies the number of extra slice header ++ * bits that are present in the slice header RBSP ++ * for coded pictures referring to the PPS. ++ * @num_ref_idx_l0_default_active_minus1: this value plus 1 specifies the ++ * inferred value of num_ref_idx_l0_active_minus1 ++ * @num_ref_idx_l1_default_active_minus1: this value plus 1 specifies the ++ * inferred value of num_ref_idx_l1_active_minus1 ++ * @init_qp_minus26: this value plus 26 specifies the initial value of SliceQp Y for ++ * each slice referring to the PPS ++ * @diff_cu_qp_delta_depth: specifies the difference between the luma coding ++ * tree block size and the minimum luma coding block ++ * size of coding units that convey cu_qp_delta_abs ++ * and cu_qp_delta_sign_flag ++ * @pps_cb_qp_offset: specify the offsets to the luma quantization parameter Cb ++ * @pps_cr_qp_offset: specify the offsets to the luma quantization parameter Cr ++ * @num_tile_columns_minus1: this value plus 1 specifies the number of tile columns ++ * partitioning the picture ++ * @num_tile_rows_minus1: this value plus 1 specifies the number of tile rows partitioning ++ * the picture ++ * @column_width_minus1: this value plus 1 specifies the width of the each tile column in ++ * units of coding tree blocks ++ * @row_height_minus1: this value plus 1 specifies the height of the each tile row in ++ * units of coding tree blocks ++ * @pps_beta_offset_div2: specify the default deblocking parameter offsets for ++ * beta divided by 2 ++ * @pps_tc_offset_div2: specify the default deblocking parameter offsets for tC ++ * divided by 2 ++ * @log2_parallel_merge_level_minus2: this value plus 2 specifies the value of ++ * the variable Log2ParMrgLevel ++ * @reserved: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_PPS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_pps { ++ __u8 pic_parameter_set_id; ++ __u8 num_extra_slice_header_bits; ++ __u8 num_ref_idx_l0_default_active_minus1; ++ __u8 num_ref_idx_l1_default_active_minus1; ++ __s8 init_qp_minus26; ++ __u8 diff_cu_qp_delta_depth; ++ __s8 pps_cb_qp_offset; ++ __s8 pps_cr_qp_offset; ++ __u8 num_tile_columns_minus1; ++ __u8 num_tile_rows_minus1; ++ __u8 column_width_minus1[20]; ++ __u8 row_height_minus1[22]; ++ __s8 pps_beta_offset_div2; ++ __s8 pps_tc_offset_div2; ++ __u8 log2_parallel_merge_level_minus2; ++ __u8 reserved; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE 0x01 ++ ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME 0 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_FIELD 1 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_FIELD 2 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM 3 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP 4 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_BOTTOM_TOP 5 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM 6 ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_DOUBLING 7 ++#define V4L2_HEVC_SEI_PIC_STRUCT_FRAME_TRIPLING 8 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_PREVIOUS_BOTTOM 9 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_PREVIOUS_TOP 10 ++#define V4L2_HEVC_SEI_PIC_STRUCT_TOP_PAIRED_NEXT_BOTTOM 11 ++#define V4L2_HEVC_SEI_PIC_STRUCT_BOTTOM_PAIRED_NEXT_TOP 12 ++ ++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX 16 ++ ++/** ++ * struct v4l2_hevc_dpb_entry - HEVC decoded picture buffer entry ++ * ++ * @timestamp: timestamp of the V4L2 capture buffer to use as reference. ++ * @flags: long term flag for the reference frame ++ * @field_pic: whether the reference is a field picture or a frame. ++ * @reserved: padding field. Should be zeroed by applications. ++ * @pic_order_cnt_val: the picture order count of the current picture. ++ */ ++struct v4l2_hevc_dpb_entry { ++ __u64 timestamp; ++ __u8 flags; ++ __u8 field_pic; ++ __u16 reserved; ++ __s32 pic_order_cnt_val; ++}; ++ ++/** ++ * struct v4l2_hevc_pred_weight_table - HEVC weighted prediction parameters ++ * ++ * @delta_luma_weight_l0: the difference of the weighting factor applied ++ * to the luma prediction value for list 0 ++ * @luma_offset_l0: the additive offset applied to the luma prediction value ++ * for list 0 ++ * @delta_chroma_weight_l0: the difference of the weighting factor applied ++ * to the chroma prediction values for list 0 ++ * @chroma_offset_l0: the difference of the additive offset applied to ++ * the chroma prediction values for list 0 ++ * @delta_luma_weight_l1: the difference of the weighting factor applied ++ * to the luma prediction value for list 1 ++ * @luma_offset_l1: the additive offset applied to the luma prediction value ++ * for list 1 ++ * @delta_chroma_weight_l1: the difference of the weighting factor applied ++ * to the chroma prediction values for list 1 ++ * @chroma_offset_l1: the difference of the additive offset applied to ++ * the chroma prediction values for list 1 ++ * @luma_log2_weight_denom: the base 2 logarithm of the denominator for ++ * all luma weighting factors ++ * @delta_chroma_log2_weight_denom: the difference of the base 2 logarithm ++ * of the denominator for all chroma ++ * weighting factors ++ */ ++struct v4l2_hevc_pred_weight_table { ++ __s8 delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __s8 delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __s8 delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ __s8 chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2]; ++ ++ __u8 luma_log2_weight_denom; ++ __s8 delta_chroma_log2_weight_denom; ++}; ++ ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA (1ULL << 0) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA (1ULL << 1) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED (1ULL << 2) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO (1ULL << 3) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT (1ULL << 4) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0 (1ULL << 5) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV (1ULL << 6) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8) ++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT (1ULL << 9) ++ ++/** ++ * struct v4l2_ctrl_hevc_slice_params - HEVC slice parameters ++ * ++ * This control is a dynamically sized 1-dimensional array, ++ * V4L2_CTRL_FLAG_DYNAMIC_ARRAY flag must be set when using it. ++ * ++ * @bit_size: size (in bits) of the current slice data ++ * @data_byte_offset: offset (in bytes) to the video data in the current slice data ++ * @num_entry_point_offsets: specifies the number of entry point offset syntax ++ * elements in the slice header. ++ * @nal_unit_type: specifies the coding type of the slice (B, P or I) ++ * @nuh_temporal_id_plus1: minus 1 specifies a temporal identifier for the NAL unit ++ * @slice_type: see V4L2_HEVC_SLICE_TYPE_{} ++ * @colour_plane_id: specifies the colour plane associated with the current slice ++ * @slice_pic_order_cnt: specifies the picture order count ++ * @num_ref_idx_l0_active_minus1: this value plus 1 specifies the maximum ++ * reference index for reference picture list 0 ++ * that may be used to decode the slice ++ * @num_ref_idx_l1_active_minus1: this value plus 1 specifies the maximum ++ * reference index for reference picture list 1 ++ * that may be used to decode the slice ++ * @collocated_ref_idx: specifies the reference index of the collocated picture used ++ * for temporal motion vector prediction ++ * @five_minus_max_num_merge_cand: specifies the maximum number of merging ++ * motion vector prediction candidates supported in ++ * the slice subtracted from 5 ++ * @slice_qp_delta: specifies the initial value of QpY to be used for the coding ++ * blocks in the slice ++ * @slice_cb_qp_offset: specifies a difference to be added to the value of pps_cb_qp_offset ++ * @slice_cr_qp_offset: specifies a difference to be added to the value of pps_cr_qp_offset ++ * @slice_act_y_qp_offset: screen content extension parameters ++ * @slice_act_cb_qp_offset: screen content extension parameters ++ * @slice_act_cr_qp_offset: screen content extension parameters ++ * @slice_beta_offset_div2: specify the deblocking parameter offsets for beta divided by 2 ++ * @slice_tc_offset_div2: specify the deblocking parameter offsets for tC divided by 2 ++ * @pic_struct: indicates whether a picture should be displayed as a frame or as one or ++ * more fields ++ * @reserved0: padding field. Should be zeroed by applications. ++ * @slice_segment_addr: specifies the address of the first coding tree block in ++ * the slice segment ++ * @ref_idx_l0: the list of L0 reference elements as indices in the DPB ++ * @ref_idx_l1: the list of L1 reference elements as indices in the DPB ++ * @short_term_ref_pic_set_size: specifies the size of short-term reference ++ * pictures set included in the SPS ++ * @long_term_ref_pic_set_size: specifies the size of long-term reference ++ * pictures set include in the SPS ++ * @pred_weight_table: the prediction weight coefficients for inter-picture ++ * prediction ++ * @reserved1: padding field. Should be zeroed by applications. ++ * @flags: see V4L2_HEVC_SLICE_PARAMS_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_slice_params { ++ __u32 bit_size; ++ __u32 data_byte_offset; ++ __u32 num_entry_point_offsets; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */ ++ __u8 nal_unit_type; ++ __u8 nuh_temporal_id_plus1; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u8 slice_type; ++ __u8 colour_plane_id; ++ __s32 slice_pic_order_cnt; ++ __u8 num_ref_idx_l0_active_minus1; ++ __u8 num_ref_idx_l1_active_minus1; ++ __u8 collocated_ref_idx; ++ __u8 five_minus_max_num_merge_cand; ++ __s8 slice_qp_delta; ++ __s8 slice_cb_qp_offset; ++ __s8 slice_cr_qp_offset; ++ __s8 slice_act_y_qp_offset; ++ __s8 slice_act_cb_qp_offset; ++ __s8 slice_act_cr_qp_offset; ++ __s8 slice_beta_offset_div2; ++ __s8 slice_tc_offset_div2; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */ ++ __u8 pic_struct; ++ ++ __u8 reserved0[3]; ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ ++ __u32 slice_segment_addr; ++ __u8 ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u16 short_term_ref_pic_set_size; ++ __u16 long_term_ref_pic_set_size; ++ ++ /* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */ ++ struct v4l2_hevc_pred_weight_table pred_weight_table; ++ ++ __u8 reserved1[2]; ++ __u64 flags; ++}; ++ ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC 0x1 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC 0x2 ++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR 0x4 ++ ++/** ++ * struct v4l2_ctrl_hevc_decode_params - HEVC decode parameters ++ * ++ * @pic_order_cnt_val: picture order count ++ * @short_term_ref_pic_set_size: specifies the size of short-term reference ++ * pictures set included in the SPS of the first slice ++ * @long_term_ref_pic_set_size: specifies the size of long-term reference ++ * pictures set include in the SPS of the first slice ++ * @num_active_dpb_entries: the number of entries in dpb ++ * @num_poc_st_curr_before: the number of reference pictures in the short-term ++ * set that come before the current frame ++ * @num_poc_st_curr_after: the number of reference pictures in the short-term ++ * set that come after the current frame ++ * @num_poc_lt_curr: the number of reference pictures in the long-term set ++ * @poc_st_curr_before: provides the index of the short term before references ++ * in DPB array ++ * @poc_st_curr_after: provides the index of the short term after references ++ * in DPB array ++ * @poc_lt_curr: provides the index of the long term references in DPB array ++ * @reserved: padding field. Should be zeroed by applications. ++ * @dpb: the decoded picture buffer, for meta-data about reference frames ++ * @flags: see V4L2_HEVC_DECODE_PARAM_FLAG_{} ++ */ ++struct v4l2_ctrl_hevc_decode_params { ++ __s32 pic_order_cnt_val; ++ __u16 short_term_ref_pic_set_size; ++ __u16 long_term_ref_pic_set_size; ++ __u8 num_active_dpb_entries; ++ __u8 num_poc_st_curr_before; ++ __u8 num_poc_st_curr_after; ++ __u8 num_poc_lt_curr; ++ __u8 poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u8 reserved[4]; ++ struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]; ++ __u64 flags; ++}; ++ ++/** ++ * struct v4l2_ctrl_hevc_scaling_matrix - HEVC scaling lists parameters ++ * ++ * @scaling_list_4x4: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_8x8: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_16x16: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_32x32: scaling list is used for the scaling process for ++ * transform coefficients. The values on each scaling ++ * list are expected in raster scan order ++ * @scaling_list_dc_coef_16x16: scaling list is used for the scaling process ++ * for transform coefficients. The values on each ++ * scaling list are expected in raster scan order. ++ * @scaling_list_dc_coef_32x32: scaling list is used for the scaling process ++ * for transform coefficients. The values on each ++ * scaling list are expected in raster scan order. ++ */ ++struct v4l2_ctrl_hevc_scaling_matrix { ++ __u8 scaling_list_4x4[6][16]; ++ __u8 scaling_list_8x8[6][64]; ++ __u8 scaling_list_16x16[6][64]; ++ __u8 scaling_list_32x32[2][64]; ++ __u8 scaling_list_dc_coef_16x16[6]; ++ __u8 scaling_list_dc_coef_32x32[2]; ++}; ++ ++#endif diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c index 5af4b788d5..c7314a6af8 100644 --- a/libavcodec/hevc_parser.c @@ -19044,8 +19437,67 @@ index 5af4b788d5..c7314a6af8 100644 if (ps->vps->vps_timing_info_present_flag) { num = ps->vps->vps_num_units_in_tick; den = ps->vps->vps_time_scale; +diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c +index 4f6d985ae6..eefae71275 100644 +--- a/libavcodec/hevc_refs.c ++++ b/libavcodec/hevc_refs.c +@@ -96,18 +96,22 @@ static HEVCFrame *alloc_frame(HEVCContext *s) + if (!frame->rpl_buf) + goto fail; + +- frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); +- if (!frame->tab_mvf_buf) +- goto fail; +- frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; ++ if (s->tab_mvf_pool) { ++ frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool); ++ if (!frame->tab_mvf_buf) ++ goto fail; ++ frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data; ++ } + +- frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); +- if (!frame->rpl_tab_buf) +- goto fail; +- frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; +- frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; +- for (j = 0; j < frame->ctb_count; j++) +- frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; ++ if (s->rpl_tab_pool) { ++ frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool); ++ if (!frame->rpl_tab_buf) ++ goto fail; ++ frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data; ++ frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height; ++ for (j = 0; j < frame->ctb_count; j++) ++ frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data; ++ } + + frame->frame->top_field_first = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD; + frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD); +@@ -276,14 +280,17 @@ static int init_slice_rpl(HEVCContext *s) + int ctb_count = frame->ctb_count; + int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr]; + int i; ++ RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; + + if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab)) + return AVERROR_INVALIDDATA; + +- for (i = ctb_addr_ts; i < ctb_count; i++) +- frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx; ++ if (frame->rpl_tab) { ++ for (i = ctb_addr_ts; i < ctb_count; i++) ++ frame->rpl_tab[i] = tab; ++ } + +- frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts]; ++ frame->refPicList = tab->refPicList; + + return 0; + } diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c -index 1eaeaf72f1..b6871ff2e2 100644 +index 1eaeaf72f1..ffef145b15 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -332,6 +332,19 @@ static void export_stream_params(HEVCContext *s, const HEVCSPS *sps) @@ -19115,7 +19567,43 @@ index 1eaeaf72f1..b6871ff2e2 100644 #endif break; case AV_PIX_FMT_YUV444P: -@@ -3230,7 +3258,14 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output, +@@ -459,6 +487,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps, + if (!sps) + return 0; + ++ // If hwaccel then we don't need all the s/w decode helper arrays ++ if (s->avctx->hwaccel) { ++ export_stream_params(s, sps); ++ ++ s->avctx->pix_fmt = pix_fmt; ++ s->ps.sps = sps; ++ s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data; ++ return 0; ++ } ++ + ret = pic_arrays_init(s, sps); + if (ret < 0) + goto fail; +@@ -2809,11 +2847,13 @@ static int hevc_frame_start(HEVCContext *s) + ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1); + int ret; + +- memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); +- memset(s->vertical_bs, 0, s->bs_width * s->bs_height); +- memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); +- memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); +- memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); ++ if (s->horizontal_bs) { ++ memset(s->horizontal_bs, 0, s->bs_width * s->bs_height); ++ memset(s->vertical_bs, 0, s->bs_width * s->bs_height); ++ memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height); ++ memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1)); ++ memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address)); ++ } + + s->is_decoded = 0; + s->first_nal_type = s->nal_unit_type; +@@ -3230,7 +3270,14 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output, s->ref = NULL; ret = decode_nal_units(s, avpkt->data, avpkt->size); if (ret < 0) @@ -19130,7 +19618,35 @@ index 1eaeaf72f1..b6871ff2e2 100644 if (avctx->hwaccel) { if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) { -@@ -3585,6 +3620,15 @@ AVCodec ff_hevc_decoder = { +@@ -3273,15 +3320,19 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src) + if (ret < 0) + return ret; + +- dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); +- if (!dst->tab_mvf_buf) +- goto fail; +- dst->tab_mvf = src->tab_mvf; ++ if (src->tab_mvf_buf) { ++ dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf); ++ if (!dst->tab_mvf_buf) ++ goto fail; ++ dst->tab_mvf = src->tab_mvf; ++ } + +- dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); +- if (!dst->rpl_tab_buf) +- goto fail; +- dst->rpl_tab = src->rpl_tab; ++ if (src->rpl_tab_buf) { ++ dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf); ++ if (!dst->rpl_tab_buf) ++ goto fail; ++ dst->rpl_tab = src->rpl_tab; ++ } + + dst->rpl_buf = av_buffer_ref(src->rpl_buf); + if (!dst->rpl_buf) +@@ -3585,6 +3636,15 @@ AVCodec ff_hevc_decoder = { #if CONFIG_HEVC_NVDEC_HWACCEL HWACCEL_NVDEC(hevc), #endif @@ -49502,7 +50018,7 @@ index 0000000000..85c5b46d75 +}; + diff --git a/libavcodec/v4l2_buffers.c b/libavcodec/v4l2_buffers.c -index 02f23d954b..6ca83cc21b 100644 +index 02f23d954b..7f82ca3fa0 100644 --- a/libavcodec/v4l2_buffers.c +++ b/libavcodec/v4l2_buffers.c @@ -21,6 +21,7 @@ @@ -49513,9 +50029,11 @@ index 02f23d954b..6ca83cc21b 100644 #include #include #include -@@ -30,56 +31,68 @@ +@@ -29,57 +30,82 @@ + #include #include "libavcodec/avcodec.h" #include "libavcodec/internal.h" ++#include "libavutil/avassert.h" #include "libavutil/pixdesc.h" +#include "libavutil/hwcontext.h" #include "v4l2_context.h" @@ -49557,21 +50075,32 @@ index 02f23d954b..6ca83cc21b 100644 } -static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts) -+static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) ++static inline struct timeval tv_from_int(const int64_t t) { - int64_t v4l2_pts; -- ++ return (struct timeval){ ++ .tv_usec = t % USEC_PER_SEC, ++ .tv_sec = t / USEC_PER_SEC ++ }; ++} + - if (pts == AV_NOPTS_VALUE) - pts = 0; -- ++static inline int64_t int_from_tv(const struct timeval t) ++{ ++ return (int64_t)t.tv_sec * USEC_PER_SEC + t.tv_usec; ++} + ++static inline void v4l2_set_pts(V4L2Buffer * const out, const int64_t pts) ++{ /* convert pts to v4l2 timebase */ - v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); +- out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; +- out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; + const int64_t v4l2_pts = -+ out->context->no_pts_rescale ? pts : + pts == AV_NOPTS_VALUE ? 0 : + av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase); - out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC; - out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC; ++ out->buf.timestamp = tv_from_int(v4l2_pts); } -static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf) @@ -49579,18 +50108,20 @@ index 02f23d954b..6ca83cc21b 100644 { - int64_t v4l2_pts; - ++ const int64_t v4l2_pts = int_from_tv(avbuf->buf.timestamp); ++ return v4l2_pts != 0 ? v4l2_pts : AV_NOPTS_VALUE; ++#if 0 /* convert pts back to encoder timebase */ - v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + -+ const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC + - avbuf->buf.timestamp.tv_usec; - -- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); +- avbuf->buf.timestamp.tv_usec; + return + avbuf->context->no_pts_rescale ? v4l2_pts : + v4l2_pts == 0 ? AV_NOPTS_VALUE : + av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); ++#endif +} -+ + +- return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf)); +static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length) +{ + if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { @@ -49603,15 +50134,17 @@ index 02f23d954b..6ca83cc21b 100644 } static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) -@@ -116,6 +129,105 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) +@@ -116,49 +142,176 @@ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf) return AVCOL_PRI_UNSPECIFIED; } +-static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) +static void v4l2_set_color(V4L2Buffer *buf, + const enum AVColorPrimaries avcp, + const enum AVColorSpace avcs, + const enum AVColorTransferCharacteristic avxc) -+{ + { +- enum v4l2_quantization qt; + enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; + enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; + enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; @@ -49647,7 +50180,10 @@ index 02f23d954b..6ca83cc21b 100644 + default: + break; + } -+ + +- qt = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? +- buf->context->format.fmt.pix_mp.quantization : +- buf->context->format.fmt.pix.quantization; + switch (avcs) { + case AVCOL_SPC_RGB: + cs = V4L2_COLORSPACE_SRGB; @@ -49677,7 +50213,10 @@ index 02f23d954b..6ca83cc21b 100644 + default: + break; + } -+ + +- switch (qt) { +- case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG; +- case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG; + switch (xfer) { + case AVCOL_TRC_BT709: + xfer = V4L2_XFER_FUNC_709; @@ -49691,10 +50230,11 @@ index 02f23d954b..6ca83cc21b 100644 + case AVCOL_TRC_SMPTE2084: + xfer = V4L2_XFER_FUNC_SMPTE2084; + break; -+ default: -+ break; -+ } -+ + default: + break; + } + +- return AVCOL_RANGE_UNSPECIFIED; + if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) { + buf->context->format.fmt.pix_mp.colorspace = cs; + buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr; @@ -49704,15 +50244,58 @@ index 02f23d954b..6ca83cc21b 100644 + buf->context->format.fmt.pix.ycbcr_enc = ycbcr; + buf->context->format.fmt.pix.xfer_func = xfer; + } -+} -+ - static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) - { - enum v4l2_quantization qt; -@@ -134,6 +246,20 @@ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) - return AVCOL_RANGE_UNSPECIFIED; } +-static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) ++static inline enum v4l2_quantization ++buf_quantization(const V4L2Buffer * const buf) + { +- enum v4l2_ycbcr_encoding ycbcr; +- enum v4l2_colorspace cs; ++ return V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? ++ buf->context->format.fmt.pix_mp.quantization : ++ buf->context->format.fmt.pix.quantization; ++} + +- cs = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? ++static inline enum v4l2_colorspace ++buf_colorspace(const V4L2Buffer * const buf) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? + buf->context->format.fmt.pix_mp.colorspace : + buf->context->format.fmt.pix.colorspace; ++} + +- ycbcr = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? ++static inline enum v4l2_ycbcr_encoding ++buf_ycbcr_enc(const V4L2Buffer * const buf) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? + buf->context->format.fmt.pix_mp.ycbcr_enc: + buf->context->format.fmt.pix.ycbcr_enc; ++} + +- switch(cs) { +- case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB; ++static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf) ++{ ++ switch (buf_quantization(buf)) { ++ case V4L2_QUANTIZATION_LIM_RANGE: ++ return AVCOL_RANGE_MPEG; ++ case V4L2_QUANTIZATION_FULL_RANGE: ++ return AVCOL_RANGE_JPEG; ++ case V4L2_QUANTIZATION_DEFAULT: ++ // If YUV (which we assume for all video decode) then, from the header ++ // comments, range is limited unless CS is JPEG ++ return buf_colorspace(buf) == V4L2_COLORSPACE_JPEG ? ++ AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG; ++ default: ++ break; ++ } ++ ++ return AVCOL_RANGE_UNSPECIFIED; ++} ++ +static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr) +{ + const enum v4l2_quantization q = @@ -49727,32 +50310,64 @@ index 02f23d954b..6ca83cc21b 100644 + } +} + - static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) ++static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) ++{ ++ switch (buf_colorspace(buf)) { ++ case V4L2_COLORSPACE_JPEG: // JPEG -> SRGB ++ case V4L2_COLORSPACE_SRGB: ++ return AVCOL_SPC_RGB; + case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709; + case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC; + case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG; + case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M; + case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M; + case V4L2_COLORSPACE_BT2020: +- if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM) +- return AVCOL_SPC_BT2020_CL; +- else +- return AVCOL_SPC_BT2020_NCL; ++ return buf_ycbcr_enc(buf) == V4L2_YCBCR_ENC_BT2020_CONST_LUM ? ++ AVCOL_SPC_BT2020_CL : AVCOL_SPC_BT2020_NCL; + default: + break; + } +@@ -168,17 +321,9 @@ static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf) + + static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) { - enum v4l2_ycbcr_encoding ycbcr; -@@ -210,73 +336,165 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) +- enum v4l2_ycbcr_encoding ycbcr; ++ const enum v4l2_ycbcr_encoding ycbcr = buf_ycbcr_enc(buf); + enum v4l2_xfer_func xfer; +- enum v4l2_colorspace cs; +- +- cs = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? +- buf->context->format.fmt.pix_mp.colorspace : +- buf->context->format.fmt.pix.colorspace; +- +- ycbcr = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? +- buf->context->format.fmt.pix_mp.ycbcr_enc: +- buf->context->format.fmt.pix.ycbcr_enc; ++ const enum v4l2_colorspace cs = buf_colorspace(buf); + + xfer = V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type) ? + buf->context->format.fmt.pix_mp.xfer_func: +@@ -210,73 +355,165 @@ static enum AVColorTransferCharacteristic v4l2_get_color_trc(V4L2Buffer *buf) return AVCOL_TRC_UNSPECIFIED; } -static void v4l2_free_buffer(void *opaque, uint8_t *unused) +static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf) -+{ -+ return V4L2_FIELD_IS_INTERLACED(buf->buf.field); -+} -+ -+static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) { - V4L2Buffer* avbuf = opaque; - V4L2m2mContext *s = buf_to_m2mctx(avbuf); -+ return buf->buf.field == V4L2_FIELD_INTERLACED_TB; ++ return V4L2_FIELD_IS_INTERLACED(buf->buf.field); +} - if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) { - atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel); -+static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) ++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf) +{ -+ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : -+ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; ++ return buf->buf.field == V4L2_FIELD_INTERLACED_TB; +} - if (s->reinit) { @@ -49766,6 +50381,12 @@ index 02f23d954b..6ca83cc21b 100644 - else if (avbuf->context->streamon) - ff_v4l2_buffer_enqueue(avbuf); - } ++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff) ++{ ++ buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE : ++ is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT; ++} ++ +static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf) +{ + AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; @@ -49791,7 +50412,8 @@ index 02f23d954b..6ca83cc21b 100644 + layer->nb_planes = 1; + + break; -+ + +- av_buffer_unref(&avbuf->context_ref); + case AV_PIX_FMT_NV12: + case AV_PIX_FMT_NV21: + @@ -49810,8 +50432,7 @@ index 02f23d954b..6ca83cc21b 100644 + break; + + case AV_PIX_FMT_YUV420P: - -- av_buffer_unref(&avbuf->context_ref); ++ + layer->format = DRM_FORMAT_YUV420; + + if (avbuf->num_planes > 1) @@ -49863,7 +50484,7 @@ index 02f23d954b..6ca83cc21b 100644 - in->status = V4L2BUF_RET_USER; - atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed); -+ avbuf->status = V4L2BUF_AVAILABLE; ++ ff_v4l2_buffer_set_avail(avbuf); - return 0; + if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) { @@ -49940,7 +50561,7 @@ index 02f23d954b..6ca83cc21b 100644 if (plane >= out->num_planes) return AVERROR(EINVAL); -@@ -284,32 +502,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i +@@ -284,32 +521,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, i length = out->plane_info[plane].length; bytesused = FFMIN(size+offset, length); @@ -50013,7 +50634,7 @@ index 02f23d954b..6ca83cc21b 100644 } /* fixup special cases */ -@@ -318,17 +561,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -318,17 +580,17 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) case AV_PIX_FMT_NV21: if (avbuf->num_planes > 1) break; @@ -50037,7 +50658,7 @@ index 02f23d954b..6ca83cc21b 100644 break; default: -@@ -338,68 +581,95 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -338,68 +600,127 @@ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf) return 0; } @@ -50059,6 +50680,38 @@ index 02f23d954b..6ca83cc21b 100644 +{ + return i != 0 && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA)); +} ++ ++static int v4l2_buffer_primeframe_to_buf(const AVFrame *frame, V4L2Buffer *out) ++{ ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ if (frame->format != AV_PIX_FMT_DRM_PRIME || !src) ++ return AVERROR(EINVAL); ++ ++ av_assert0(out->buf.memory == V4L2_MEMORY_DMABUF); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) { ++ // Only currently cope with single buffer types ++ if (out->buf.length != 1) ++ return AVERROR_PATCHWELCOME; ++ if (src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ out->planes[0].m.fd = src->objects[0].fd; ++ } ++ else { ++ if (src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ out->buf.m.fd = src->objects[0].fd; ++ } ++ ++ // No need to copy src AVDescriptor and if we did then we may confuse ++ // fd close on free ++ out->ref_buf = av_buffer_ref(frame->buf[0]); ++ ++ return 0; ++} + static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) { @@ -50187,10 +50840,16 @@ index 02f23d954b..6ca83cc21b 100644 return 0; } -@@ -411,7 +681,16 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) +@@ -409,16 +730,31 @@ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + * + ******************************************************************************/ - int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) +-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) ++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts) { +- v4l2_set_pts(out, frame->pts); +- +- return v4l2_buffer_swframe_to_buf(frame, out); + out->buf.flags = frame->key_frame ? + (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME) : + (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME); @@ -50199,12 +50858,17 @@ index 02f23d954b..6ca83cc21b 100644 + v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc); + v4l2_set_color_range(out, frame->color_range); + // PTS & interlace are buffer vars - v4l2_set_pts(out, frame->pts); ++ if (track_ts) ++ out->buf.timestamp = tv_from_int(track_ts); ++ else ++ v4l2_set_pts(out, frame->pts); + v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first); - - return v4l2_buffer_swframe_to_buf(frame, out); ++ ++ return frame->format == AV_PIX_FMT_DRM_PRIME ? ++ v4l2_buffer_primeframe_to_buf(frame, out) : ++ v4l2_buffer_swframe_to_buf(frame, out); } -@@ -419,6 +698,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out) + int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) { int ret; @@ -50212,7 +50876,7 @@ index 02f23d954b..6ca83cc21b 100644 av_frame_unref(frame); -@@ -429,17 +709,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -429,17 +765,32 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) /* 2. get frame information */ frame->key_frame = !!(avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME); @@ -50248,7 +50912,7 @@ index 02f23d954b..6ca83cc21b 100644 /* 3. report errors upstream */ if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) { -@@ -452,15 +747,14 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) +@@ -452,15 +803,15 @@ int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf) int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) { @@ -50266,16 +50930,18 @@ index 02f23d954b..6ca83cc21b 100644 pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused; - pkt->data = pkt->buf->data; + pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset; ++ pkt->flags = 0; if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME) pkt->flags |= AV_PKT_FLAG_KEY; -@@ -475,31 +769,85 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) +@@ -475,31 +826,91 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf) return 0; } -int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -+ const void *extdata, size_t extlen) ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, ++ const void *extdata, size_t extlen, ++ const int64_t timestamp) { int ret; @@ -50291,7 +50957,11 @@ index 02f23d954b..6ca83cc21b 100644 + if (ret && ret != AVERROR(ENOMEM)) return ret; - v4l2_set_pts(out, pkt->pts); +- v4l2_set_pts(out, pkt->pts); ++ if (timestamp) ++ out->buf.timestamp = tv_from_int(timestamp); ++ else ++ v4l2_set_pts(out, pkt->pts); - if (pkt->flags & AV_PKT_FLAG_KEY) - out->flags = V4L2_BUF_FLAG_KEYFRAME; @@ -50301,12 +50971,11 @@ index 02f23d954b..6ca83cc21b 100644 - return 0; + return ret; - } - --int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) ++} ++ +int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out) +{ -+ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0); ++ return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0); +} + + @@ -50326,19 +50995,23 @@ index 02f23d954b..6ca83cc21b 100644 + close(avbuf->drm_frame.objects[i].fd); + } + ++ av_buffer_unref(&avbuf->ref_buf); ++ + ff_weak_link_unref(&avbuf->context_wl); + + av_free(avbuf); -+} + } + +-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) + -+ -+int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx) ++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx, enum v4l2_memory mem) { - V4L2Context *ctx = avbuf->context; int ret, i; + V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf)); + AVBufferRef * bufref; -+ + +- avbuf->buf.memory = V4L2_MEMORY_MMAP; + *pbufref = NULL; + if (avbuf == NULL) + return AVERROR(ENOMEM); @@ -50348,9 +51021,9 @@ index 02f23d954b..6ca83cc21b 100644 + av_free(avbuf); + return AVERROR(ENOMEM); + } - ++ + avbuf->context = ctx; - avbuf->buf.memory = V4L2_MEMORY_MMAP; ++ avbuf->buf.memory = mem; avbuf->buf.type = ctx->type; avbuf->buf.index = index; @@ -50363,7 +51036,7 @@ index 02f23d954b..6ca83cc21b 100644 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->buf.length = VIDEO_MAX_PLANES; avbuf->buf.m.planes = avbuf->planes; -@@ -507,7 +855,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -507,7 +918,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf); if (ret < 0) @@ -50372,7 +51045,16 @@ index 02f23d954b..6ca83cc21b 100644 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->num_planes = 0; -@@ -527,25 +875,33 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -520,6 +931,8 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) + avbuf->num_planes = 1; + + for (i = 0; i < avbuf->num_planes; i++) { ++ const int want_mmap = avbuf->buf.memory == V4L2_MEMORY_MMAP && ++ (V4L2_TYPE_IS_OUTPUT(ctx->type) || !buf_to_m2mctx(avbuf)->output_drm); + + avbuf->plane_info[i].bytesperline = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? + ctx->format.fmt.pix_mp.plane_fmt[i].bytesperline : +@@ -527,25 +940,29 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length; @@ -50380,24 +51062,20 @@ index 02f23d954b..6ca83cc21b 100644 - PROT_READ | PROT_WRITE, MAP_SHARED, - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); + -+ if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) || -+ !buf_to_m2mctx(avbuf)->output_drm) { ++ if (want_mmap) + avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length, + PROT_READ | PROT_WRITE, MAP_SHARED, + buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset); -+ } } else { avbuf->plane_info[i].length = avbuf->buf.length; - avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, - PROT_READ | PROT_WRITE, MAP_SHARED, - buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); + -+ if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) || -+ !buf_to_m2mctx(avbuf)->output_drm) { ++ if (want_mmap) + avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length, + PROT_READ | PROT_WRITE, MAP_SHARED, + buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset); -+ } } - if (avbuf->plane_info[i].mm_addr == MAP_FAILED) @@ -50417,7 +51095,7 @@ index 02f23d954b..6ca83cc21b 100644 if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { avbuf->buf.m.planes = avbuf->planes; avbuf->buf.length = avbuf->num_planes; -@@ -555,20 +911,51 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) +@@ -555,20 +972,51 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index) avbuf->buf.length = avbuf->planes[0].length; } @@ -50474,10 +51152,10 @@ index 02f23d954b..6ca83cc21b 100644 return 0; } diff --git a/libavcodec/v4l2_buffers.h b/libavcodec/v4l2_buffers.h -index 8dbc7fc104..7d5fadcd3d 100644 +index 8dbc7fc104..e64441ec9b 100644 --- a/libavcodec/v4l2_buffers.h +++ b/libavcodec/v4l2_buffers.h -@@ -27,25 +27,34 @@ +@@ -27,25 +27,38 @@ #include #include @@ -50514,10 +51192,14 @@ index 8dbc7fc104..7d5fadcd3d 100644 - atomic_uint context_refcount; + /* DRM descriptor */ + AVDRMFrameDescriptor drm_frame; ++ /* For DRM_PRIME encode - need to keep a ref to the source buffer till we ++ * are done ++ */ ++ AVBufferRef * ref_buf; /* keep track of the mmap address and mmap length */ struct V4L2Plane_info { -@@ -60,7 +69,6 @@ typedef struct V4L2Buffer { +@@ -60,7 +73,6 @@ typedef struct V4L2Buffer { struct v4l2_buffer buf; struct v4l2_plane planes[VIDEO_MAX_PLANES]; @@ -50525,27 +51207,50 @@ index 8dbc7fc104..7d5fadcd3d 100644 enum V4L2Buffer_status status; } V4L2Buffer; -@@ -98,6 +106,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); +@@ -98,6 +110,10 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *buf); */ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); -+int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out, -+ const void *extdata, size_t extlen); ++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket * const pkt, V4L2Buffer * const out, ++ const void *extdata, size_t extlen, ++ const int64_t timestamp); + /** * Extracts the data from an AVFrame to a V4L2Buffer * -@@ -116,7 +127,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); +@@ -106,7 +122,7 @@ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out); + * + * @returns 0 in case of success, a negative AVERROR code otherwise + */ +-int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); ++int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out, const int64_t track_ts); + + /** + * Initializes a V4L2Buffer +@@ -116,7 +132,7 @@ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out); * * @returns 0 in case of success, a negative AVERROR code otherwise */ -int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); -+int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx); ++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx, enum v4l2_memory mem); /** * Enqueues a V4L2Buffer +@@ -127,5 +143,12 @@ int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index); + */ + int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf); + ++static inline void ++ff_v4l2_buffer_set_avail(V4L2Buffer* const avbuf) ++{ ++ avbuf->status = V4L2BUF_AVAILABLE; ++ av_buffer_unref(&avbuf->ref_buf); ++} ++ + + #endif // AVCODEC_V4L2_BUFFERS_H diff --git a/libavcodec/v4l2_context.c b/libavcodec/v4l2_context.c -index 29b144ed73..077c5223af 100644 +index 29b144ed73..c94b09b60f 100644 --- a/libavcodec/v4l2_context.c +++ b/libavcodec/v4l2_context.c @@ -27,11 +27,13 @@ @@ -50562,49 +51267,191 @@ index 29b144ed73..077c5223af 100644 struct v4l2_format_update { uint32_t v4l2_fmt; -@@ -41,28 +43,18 @@ struct v4l2_format_update { +@@ -41,26 +43,168 @@ struct v4l2_format_update { int update_avfmt; }; -static inline V4L2m2mContext *ctx_to_m2mctx(V4L2Context *ctx) -+static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) ++ ++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) { - return V4L2_TYPE_IS_OUTPUT(ctx->type) ? - container_of(ctx, V4L2m2mContext, output) : - container_of(ctx, V4L2m2mContext, capture); +- return V4L2_TYPE_IS_OUTPUT(ctx->type) ? +- container_of(ctx, V4L2m2mContext, output) : +- container_of(ctx, V4L2m2mContext, capture); ++ return (int64_t)n; } -static inline AVCodecContext *logger(V4L2Context *ctx) -+static inline AVCodecContext *logger(const V4L2Context *ctx) ++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) { - return ctx_to_m2mctx(ctx)->avctx; +- return ctx_to_m2mctx(ctx)->avctx; ++ return (unsigned int)pts; ++} ++ ++// FFmpeg requires us to propagate a number of vars from the coded pkt into ++// the decoded frame. The only thing that tracks like that in V4L2 stateful ++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no ++// guarantees about PTS being unique or specified for every frame so replace ++// the supplied PTS with a simple incrementing number and keep a circular ++// buffer of all the things we want preserved (including the original PTS) ++// indexed by the tracking no. ++static int64_t ++xlat_pts_pkt_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVPacket *const avpkt) ++{ ++ int64_t track_pts; ++ ++ // Avoid 0 ++ if (++x->track_no == 0) ++ x->track_no = 1; ++ ++ track_pts = track_to_pts(avctx, x->track_no); ++ ++ av_log(avctx, AV_LOG_TRACE, "In pkt PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ ++ .discard = 0, ++ .pending = 1, ++ .pkt_size = avpkt->size, ++ .pts = avpkt->pts, ++ .dts = avpkt->dts, ++ .reordered_opaque = avctx->reordered_opaque, ++ .pkt_pos = avpkt->pos, ++ .pkt_duration = avpkt->duration, ++ .track_pts = track_pts ++ }; ++ return track_pts; ++} ++ ++static int64_t ++xlat_pts_frame_in(AVCodecContext *const avctx, xlat_track_t *const x, const AVFrame *const frame) ++{ ++ int64_t track_pts; ++ ++ // Avoid 0 ++ if (++x->track_no == 0) ++ x->track_no = 1; ++ ++ track_pts = track_to_pts(avctx, x->track_no); ++ ++ av_log(avctx, AV_LOG_TRACE, "In frame PTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", frame->pts, track_pts, x->track_no); ++ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ ++ .discard = 0, ++ .pending = 1, ++ .pkt_size = 0, ++ .pts = frame->pts, ++ .dts = AV_NOPTS_VALUE, ++ .reordered_opaque = frame->reordered_opaque, ++ .pkt_pos = frame->pkt_pos, ++ .pkt_duration = frame->pkt_duration, ++ .track_pts = track_pts ++ }; ++ return track_pts; ++} ++ ++ ++// Returns -1 if we should discard the frame ++static int ++xlat_pts_frame_out(AVCodecContext *const avctx, ++ xlat_track_t * const x, ++ AVFrame *const frame) ++{ ++ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; ++ V4L2m2mTrackEl *const t = x->track_els + n; ++ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) ++ { ++ av_log(avctx, frame->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, ++ "Frame tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); ++ frame->pts = AV_NOPTS_VALUE; ++ frame->pkt_dts = AV_NOPTS_VALUE; ++ frame->reordered_opaque = x->last_opaque; ++ frame->pkt_pos = -1; ++ frame->pkt_duration = 0; ++ frame->pkt_size = -1; ++ } ++ else if (!t->discard) ++ { ++ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; ++ frame->pkt_dts = t->dts; ++ frame->reordered_opaque = t->reordered_opaque; ++ frame->pkt_pos = t->pkt_pos; ++ frame->pkt_duration = t->pkt_duration; ++ frame->pkt_size = t->pkt_size; ++ ++ x->last_opaque = x->track_els[n].reordered_opaque; ++ if (frame->pts != AV_NOPTS_VALUE) ++ x->last_pts = frame->pts; ++ t->pending = 0; ++ } ++ else ++ { ++ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); ++ return -1; ++ } ++ ++ av_log(avctx, AV_LOG_TRACE, "Out frame PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", ++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); ++ return 0; } -static inline unsigned int v4l2_get_width(struct v4l2_format *fmt) --{ -- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; --} -- --static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) --{ -- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; --} -- - static AVRational v4l2_get_sar(V4L2Context *ctx) ++// Returns -1 if we should discard the frame ++static int ++xlat_pts_pkt_out(AVCodecContext *const avctx, ++ xlat_track_t * const x, ++ AVPacket *const pkt) { - struct AVRational sar = { 0, 1 }; -@@ -81,21 +73,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx) +- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; ++ unsigned int n = pts_to_track(avctx, pkt->pts) % FF_V4L2_M2M_TRACK_SIZE; ++ V4L2m2mTrackEl *const t = x->track_els + n; ++ if (pkt->pts == AV_NOPTS_VALUE || pkt->pts != t->track_pts) ++ { ++ av_log(avctx, pkt->pts == AV_NOPTS_VALUE ? AV_LOG_DEBUG : AV_LOG_WARNING, ++ "Pkt tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); ++ pkt->pts = AV_NOPTS_VALUE; ++ } ++ else if (!t->discard) ++ { ++ pkt->pts = t->pending ? t->pts : AV_NOPTS_VALUE; ++ ++ x->last_opaque = x->track_els[n].reordered_opaque; ++ if (pkt->pts != AV_NOPTS_VALUE) ++ x->last_pts = pkt->pts; ++ t->pending = 0; ++ } ++ else ++ { ++ av_log(avctx, AV_LOG_DEBUG, "Discard packet (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", pkt->pts, n, t->track_pts); ++ return -1; ++ } ++ ++ // * Would like something much better than this...xlat(offset + out_count)? ++ pkt->dts = pkt->pts; ++ av_log(avctx, AV_LOG_TRACE, "Out pkt PTS=%" PRId64 ", track=%"PRId64", n=%d\n", ++ pkt->pts, t->track_pts, n); ++ return 0; + } + +-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt) ++ ++static inline V4L2m2mContext *ctx_to_m2mctx(const V4L2Context *ctx) + { +- return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; ++ return V4L2_TYPE_IS_OUTPUT(ctx->type) ? ++ container_of(ctx, V4L2m2mContext, output) : ++ container_of(ctx, V4L2m2mContext, capture); ++} ++ ++static inline AVCodecContext *logger(const V4L2Context *ctx) ++{ ++ return ctx_to_m2mctx(ctx)->avctx; + } + + static AVRational v4l2_get_sar(V4L2Context *ctx) +@@ -81,21 +225,29 @@ static AVRational v4l2_get_sar(V4L2Context *ctx) return sar; } -static inline unsigned int v4l2_resolution_changed(V4L2Context *ctx, struct v4l2_format *fmt2) +static inline int ctx_buffers_alloced(const V4L2Context * const ctx) -+{ -+ return ctx->bufrefs != NULL; -+} -+ -+// Width/Height changed or we don't have an alloc in the first place? -+static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) { - struct v4l2_format *fmt1 = &ctx->format; - int ret = V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? @@ -50613,6 +51460,12 @@ index 29b144ed73..077c5223af 100644 - : - fmt1->fmt.pix.width != fmt2->fmt.pix.width || - fmt1->fmt.pix.height != fmt2->fmt.pix.height; ++ return ctx->bufrefs != NULL; ++} ++ ++// Width/Height changed or we don't have an alloc in the first place? ++static int ctx_resolution_changed(const V4L2Context *ctx, const struct v4l2_format *fmt2) ++{ + const struct v4l2_format *fmt1 = &ctx->format; + int ret = !ctx_buffers_alloced(ctx) || + (V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? @@ -50634,7 +51487,7 @@ index 29b144ed73..077c5223af 100644 return ret; } -@@ -153,90 +153,110 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd +@@ -153,90 +305,110 @@ static inline void v4l2_save_to_context(V4L2Context* ctx, struct v4l2_format_upd } } @@ -50799,7 +51652,7 @@ index 29b144ed73..077c5223af 100644 return 1; } -@@ -280,171 +300,275 @@ static int v4l2_stop_encode(V4L2Context *ctx) +@@ -280,171 +452,282 @@ static int v4l2_stop_encode(V4L2Context *ctx) return 0; } @@ -50885,16 +51738,18 @@ index 29b144ed73..077c5223af 100644 } - ctx->done = 1; - return NULL; -+ } + } + atomic_fetch_sub(&ctx->q_count, 1); + + avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data; -+ avbuf->status = V4L2BUF_AVAILABLE; ++ ff_v4l2_buffer_set_avail(avbuf); + avbuf->buf = buf; + if (is_mp) { + memcpy(avbuf->planes, planes, sizeof(planes)); + avbuf->buf.m.planes = avbuf->planes; - } ++ } ++ // Done with any attached buffer ++ av_buffer_unref(&avbuf->ref_buf); -start: - if (V4L2_TYPE_IS_OUTPUT(ctx->type)) @@ -51129,13 +51984,6 @@ index 29b144ed73..077c5223af 100644 - ctx->done = 1; -#endif + continue; -+ } -+ -+ if ((pfd.revents & poll_cap) != 0) { -+ ret = dq_buf(ctx, ppavbuf); -+ if (ret == AVERROR(EPIPE)) -+ continue; -+ return ret; } - avbuf = &ctx->buffers[buf.index]; @@ -51144,6 +51992,13 @@ index 29b144ed73..077c5223af 100644 - if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) { - memcpy(avbuf->planes, planes, sizeof(planes)); - avbuf->buf.m.planes = avbuf->planes; ++ if ((pfd.revents & poll_cap) != 0) { ++ ret = dq_buf(ctx, ppavbuf); ++ if (ret == AVERROR(EPIPE)) ++ continue; ++ return ret; ++ } ++ + if ((pfd.revents & poll_out) != 0) { + if (is_cap) + return AVERROR(EAGAIN); @@ -51171,6 +52026,15 @@ index 29b144ed73..077c5223af 100644 + buf->sequence = 0; + + return avbuf; ++} ++ ++void ++ff_v4l2_dq_all(V4L2Context *const ctx) ++{ ++ V4L2Buffer * avbuf; ++ do { ++ get_qbuf(ctx, &avbuf, 0); ++ } while (avbuf); } static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) @@ -51179,14 +52043,12 @@ index 29b144ed73..077c5223af 100644 int i; /* get back as many output buffers as possible */ - if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { +- if (V4L2_TYPE_IS_OUTPUT(ctx->type)) { - do { - } while (v4l2_dequeue_v4l2buf(ctx, timeout)); -+ V4L2Buffer * avbuf; -+ do { -+ get_qbuf(ctx, &avbuf, 0); -+ } while (avbuf); - } +- } ++ if (V4L2_TYPE_IS_OUTPUT(ctx->type)) ++ ff_v4l2_dq_all(ctx); for (i = 0; i < ctx->num_buffers; i++) { - if (ctx->buffers[i].status == V4L2BUF_AVAILABLE) @@ -51197,7 +52059,7 @@ index 29b144ed73..077c5223af 100644 } return NULL; -@@ -452,25 +576,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) +@@ -452,25 +735,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(V4L2Context *ctx) static int v4l2_release_buffers(V4L2Context* ctx) { @@ -51257,7 +52119,7 @@ index 29b144ed73..077c5223af 100644 } static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt) -@@ -499,6 +643,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm +@@ -499,6 +802,8 @@ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfm static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) { @@ -51266,7 +52128,7 @@ index 29b144ed73..077c5223af 100644 enum AVPixelFormat pixfmt = ctx->av_pix_fmt; struct v4l2_fmtdesc fdesc; int ret; -@@ -517,6 +663,13 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) +@@ -517,6 +822,13 @@ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p) if (ret) return AVERROR(EINVAL); @@ -51280,7 +52142,7 @@ index 29b144ed73..077c5223af 100644 pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO); ret = v4l2_try_raw_format(ctx, pixfmt); if (ret){ -@@ -569,18 +722,83 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p) +@@ -569,30 +881,99 @@ static int v4l2_get_coded_format(V4L2Context* ctx, uint32_t *p) * *****************************************************************************/ @@ -51295,7 +52157,7 @@ index 29b144ed73..077c5223af 100644 + for (i = 0; i < ctx->num_buffers; ++i) { + struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data; + if (buf->status == V4L2BUF_IN_DRIVER) -+ buf->status = V4L2BUF_AVAILABLE; ++ ff_v4l2_buffer_set_avail(buf); + } + atomic_store(&ctx->q_count, 0); +} @@ -51355,6 +52217,8 @@ index 29b144ed73..077c5223af 100644 + { + if (cmd == VIDIOC_STREAMOFF) + flush_all_buffers_status(ctx); ++ else ++ ctx->first_buf = 1; + + ctx->streamon = (cmd == VIDIOC_STREAMON); + av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name, @@ -51370,7 +52234,33 @@ index 29b144ed73..077c5223af 100644 } int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) -@@ -608,7 +826,8 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) + { +- V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ V4L2m2mContext *const s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; ++ int64_t track_ts; + V4L2Buffer* avbuf; + int ret; + + if (!frame) { + ret = v4l2_stop_encode(ctx); + if (ret) +- av_log(logger(ctx), AV_LOG_ERROR, "%s stop_encode\n", ctx->name); ++ av_log(avctx, AV_LOG_ERROR, "%s stop_encode\n", ctx->name); + s->draining= 1; + return 0; + } +@@ -601,23 +982,29 @@ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame) + if (!avbuf) + return AVERROR(ENOMEM); + +- ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf); ++ track_ts = xlat_pts_frame_in(avctx, &s->xlat, frame); ++ ++ ret = ff_v4l2_buffer_avframe_to_buf(frame, avbuf, track_ts); + if (ret) + return ret; + return ff_v4l2_buffer_enqueue(avbuf); } @@ -51379,25 +52269,29 @@ index 29b144ed73..077c5223af 100644 + const void * extdata, size_t extlen) { V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; V4L2Buffer* avbuf; -@@ -616,8 +835,9 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) + int ret; ++ int64_t track_ts; if (!pkt->size) { ret = v4l2_stop_decode(ctx); + // Log but otherwise ignore stop failure if (ret) - av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name); -+ av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); ++ av_log(avctx, AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret); s->draining = 1; return 0; } -@@ -626,8 +846,11 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) +@@ -626,8 +1013,13 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) if (!avbuf) return AVERROR(EAGAIN); - ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf); - if (ret) -+ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen); ++ track_ts = xlat_pts_pkt_in(avctx, &s->xlat, pkt); ++ ++ ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, track_ts); + if (ret == AVERROR(ENOMEM)) + av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n", + __func__, pkt->size, avbuf->planes[0].length); @@ -51405,9 +52299,12 @@ index 29b144ed73..077c5223af 100644 return ret; return ff_v4l2_buffer_enqueue(avbuf); -@@ -636,19 +859,10 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) +@@ -635,42 +1027,36 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt) + int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) { ++ V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; V4L2Buffer *avbuf; + int rv; @@ -51420,17 +52317,24 @@ index 29b144ed73..077c5223af 100644 - if (!avbuf) { - if (ctx->done) - return AVERROR_EOF; -- ++ do { ++ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) ++ return rv; ++ if ((rv = ff_v4l2_buffer_buf_to_avframe(frame, avbuf)) != 0) ++ return rv; ++ } while (xlat_pts_frame_out(avctx, &s->xlat, frame) != 0); + - return AVERROR(EAGAIN); - } -+ if ((rv = get_qbuf(ctx, &avbuf, timeout)) != 0) -+ return rv; - - return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); +- +- return ff_v4l2_buffer_buf_to_avframe(frame, avbuf); ++ return 0; } -@@ -656,19 +870,10 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout) + int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt) { ++ V4L2m2mContext *s = ctx_to_m2mctx(ctx); ++ AVCodecContext *const avctx = s->avctx; V4L2Buffer *avbuf; + int rv; @@ -51443,15 +52347,22 @@ index 29b144ed73..077c5223af 100644 - if (!avbuf) { - if (ctx->done) - return AVERROR_EOF; -- ++ do { ++ if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) ++ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC ++ if ((rv = ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf)) != 0) ++ return rv; ++ } while (xlat_pts_pkt_out(avctx, &s->xlat, pkt) != 0); + - return AVERROR(EAGAIN); - } -+ if ((rv = get_qbuf(ctx, &avbuf, -1)) != 0) -+ return rv == AVERROR(ENOSPC) ? AVERROR(EAGAIN) : rv; // Caller not currently expecting ENOSPC - - return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); +- +- return ff_v4l2_buffer_buf_to_avpkt(pkt, avbuf); ++ return 0; } -@@ -702,78 +907,160 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) + + int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) +@@ -702,78 +1088,179 @@ int ff_v4l2_context_get_format(V4L2Context* ctx, int probe) int ff_v4l2_context_set_format(V4L2Context* ctx) { @@ -51503,7 +52414,7 @@ index 29b144ed73..077c5223af 100644 -int ff_v4l2_context_init(V4L2Context* ctx) + -+static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers) ++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers, const enum v4l2_memory mem) { - V4L2m2mContext *s = ctx_to_m2mctx(ctx); + V4L2m2mContext * const s = ctx_to_m2mctx(ctx); @@ -51524,8 +52435,9 @@ index 29b144ed73..077c5223af 100644 memset(&req, 0, sizeof(req)); - req.count = ctx->num_buffers; +- req.memory = V4L2_MEMORY_MMAP; + req.count = req_buffers; - req.memory = V4L2_MEMORY_MMAP; ++ req.memory = mem; req.type = ctx->type; - ret = ioctl(s->fd, VIDIOC_REQBUFS, &req); - if (ret < 0) { @@ -51560,7 +52472,7 @@ index 29b144ed73..077c5223af 100644 + } + + for (i = 0; i < ctx->num_buffers; i++) { -+ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx); ++ ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx, mem); + if (ret) { av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret)); - goto error; @@ -51589,13 +52501,13 @@ index 29b144ed73..077c5223af 100644 + +int ff_v4l2_context_init(V4L2Context* ctx) +{ ++ struct v4l2_queryctrl qctrl; + V4L2m2mContext * const s = ctx_to_m2mctx(ctx); + int ret; + + // It is not valid to reinit a context without a previous release + av_assert0(ctx->bufrefs == NULL); - -- av_freep(&ctx->buffers); ++ + if (!v4l2_type_supported(ctx)) { + av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type); + return AVERROR_PATCHWELCOME; @@ -51604,7 +52516,8 @@ index 29b144ed73..077c5223af 100644 + ff_mutex_init(&ctx->lock, NULL); + pthread_cond_init(&ctx->cond, NULL); + atomic_init(&ctx->q_count, 0); -+ + +- av_freep(&ctx->buffers); + if (s->output_drm) { + AVHWFramesContext *hwframes; + @@ -51631,7 +52544,25 @@ index 29b144ed73..077c5223af 100644 + goto fail_unref_hwframes; + } + -+ ret = create_buffers(ctx, ctx->num_buffers); ++ memset(&qctrl, 0, sizeof(qctrl)); ++ qctrl.id = V4L2_CID_MIN_BUFFERS_FOR_OUTPUT; ++ if (ioctl(s->fd, VIDIOC_QUERYCTRL, &qctrl) != 0) { ++ ret = AVERROR(errno); ++ if (ret != AVERROR(EINVAL)) { ++ av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_QUERCTRL failed: %s\n", ctx->name, av_err2str(ret)); ++ goto fail_unref_hwframes; ++ } ++ // Control unsupported - set default if wanted ++ if (ctx->num_buffers < 2) ++ ctx->num_buffers = 4; ++ } ++ else { ++ if (ctx->num_buffers < 2) ++ ctx->num_buffers = qctrl.minimum + 2; ++ ctx->num_buffers = av_clip(ctx->num_buffers, qctrl.minimum, qctrl.maximum); ++ } ++ ++ ret = create_buffers(ctx, ctx->num_buffers, ctx->buf_mem); + if (ret < 0) + goto fail_unref_hwframes; + @@ -51644,7 +52575,7 @@ index 29b144ed73..077c5223af 100644 return ret; } diff --git a/libavcodec/v4l2_context.h b/libavcodec/v4l2_context.h -index 22a9532444..a56216e990 100644 +index 22a9532444..311b6f10a4 100644 --- a/libavcodec/v4l2_context.h +++ b/libavcodec/v4l2_context.h @@ -31,6 +31,7 @@ @@ -51655,7 +52586,7 @@ index 22a9532444..a56216e990 100644 #include "v4l2_buffers.h" typedef struct V4L2Context { -@@ -70,11 +71,18 @@ typedef struct V4L2Context { +@@ -70,28 +71,57 @@ typedef struct V4L2Context { */ int width, height; AVRational sample_aspect_ratio; @@ -51676,18 +52607,35 @@ index 22a9532444..a56216e990 100644 /** * Readonly after init. -@@ -92,6 +100,21 @@ typedef struct V4L2Context { + */ + int num_buffers; + ++ /** ++ * Buffer memory type V4L2_MEMORY_MMAP or V4L2_MEMORY_DMABUF ++ */ ++ enum v4l2_memory buf_mem; ++ + /** + * Whether the stream has been started (VIDIOC_STREAMON has been sent). + */ + int streamon; + ++ /* 1st buffer after stream on */ ++ int first_buf; ++ + /** + * Either no more buffers available or an unrecoverable error was notified + * by the V4L2 kernel driver: once set the context has to be exited. */ int done; + int flag_last; + + /** -+ * PTS rescale not wanted -+ * If the PTS is just a dummy frame count then rescale is -+ * actively harmful ++ * If NZ then when Qing frame/pkt use this rather than the ++ * "real" PTS + */ -+ int no_pts_rescale; ++ uint64_t track_ts; + + AVBufferRef *frames_ref; + atomic_int q_count; @@ -51698,7 +52646,7 @@ index 22a9532444..a56216e990 100644 } V4L2Context; /** -@@ -156,7 +179,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); +@@ -156,7 +186,10 @@ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt); * @param[in] ctx The V4L2Context to dequeue from. * @param[inout] f The AVFrame to dequeue to. * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds) @@ -51709,7 +52657,7 @@ index 22a9532444..a56216e990 100644 */ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); -@@ -170,7 +196,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); +@@ -170,7 +203,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout); * @param[in] pkt A pointer to an AVPacket. * @return 0 in case of success, a negative error otherwise. */ @@ -51718,11 +52666,43 @@ index 22a9532444..a56216e990 100644 /** * Enqueues a buffer to a V4L2Context from an AVFrame +@@ -183,4 +216,6 @@ int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt); + */ + int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* f); + ++void ff_v4l2_dq_all(V4L2Context *const ctx); ++ + #endif // AVCODEC_V4L2_CONTEXT_H diff --git a/libavcodec/v4l2_m2m.c b/libavcodec/v4l2_m2m.c -index e48b3a8ccf..32288e5a99 100644 +index e48b3a8ccf..2a9fe32776 100644 --- a/libavcodec/v4l2_m2m.c +++ b/libavcodec/v4l2_m2m.c -@@ -215,13 +215,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) +@@ -36,6 +36,14 @@ + #include "v4l2_fmt.h" + #include "v4l2_m2m.h" + ++static void ++xlat_init(xlat_track_t * const x) ++{ ++ memset(x, 0, sizeof(*x)); ++ x->last_pts = AV_NOPTS_VALUE; ++} ++ ++ + static inline int v4l2_splane_video(struct v4l2_capability *cap) + { + if (cap->capabilities & (V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT) && +@@ -68,7 +76,9 @@ static int v4l2_prepare_contexts(V4L2m2mContext *s, int probe) + + s->capture.done = s->output.done = 0; + s->capture.name = "capture"; ++ s->capture.buf_mem = V4L2_MEMORY_MMAP; + s->output.name = "output"; ++ s->output.buf_mem = s->input_drm ? V4L2_MEMORY_DMABUF : V4L2_MEMORY_MMAP; + atomic_init(&s->refcount, 0); + sem_init(&s->refsync, 0, 0); + +@@ -215,13 +225,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n"); /* 2. unmap the capture buffers (v4l2 and ffmpeg): @@ -51736,7 +52716,7 @@ index e48b3a8ccf..32288e5a99 100644 ff_v4l2_context_release(&s->capture); /* 3. get the new capture format */ -@@ -240,7 +234,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) +@@ -240,7 +244,6 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *s) /* 5. complete reinit */ s->draining = 0; @@ -51744,7 +52724,7 @@ index e48b3a8ccf..32288e5a99 100644 return 0; } -@@ -274,7 +267,6 @@ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *s) +@@ -274,7 +277,6 @@ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *s) /* start again now that we know the stream dimensions */ s->draining = 0; @@ -51752,7 +52732,7 @@ index e48b3a8ccf..32288e5a99 100644 ret = ff_v4l2_context_get_format(&s->output, 0); if (ret) { -@@ -328,7 +320,13 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context) +@@ -328,7 +330,13 @@ static void v4l2_m2m_destroy_context(void *opaque, uint8_t *context) ff_v4l2_context_release(&s->capture); sem_destroy(&s->refsync); @@ -51767,7 +52747,7 @@ index e48b3a8ccf..32288e5a99 100644 av_free(s); } -@@ -338,17 +336,34 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) +@@ -338,17 +346,34 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *priv) V4L2m2mContext *s = priv->context; int ret; @@ -51808,8 +52788,53 @@ index e48b3a8ccf..32288e5a99 100644 av_buffer_unref(&priv->context_ref); return 0; +@@ -392,28 +417,33 @@ int ff_v4l2_m2m_codec_init(V4L2m2mPriv *priv) + return v4l2_configure_contexts(s); + } + +-int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **s) ++int ff_v4l2_m2m_create_context(V4L2m2mPriv *priv, V4L2m2mContext **pps) + { +- *s = av_mallocz(sizeof(V4L2m2mContext)); +- if (!*s) ++ V4L2m2mContext * const s = av_mallocz(sizeof(V4L2m2mContext)); ++ ++ *pps = NULL; ++ if (!s) + return AVERROR(ENOMEM); + +- priv->context_ref = av_buffer_create((uint8_t *) *s, sizeof(V4L2m2mContext), ++ priv->context_ref = av_buffer_create((uint8_t *)s, sizeof(*s), + &v4l2_m2m_destroy_context, NULL, 0); + if (!priv->context_ref) { +- av_freep(s); ++ av_free(s); + return AVERROR(ENOMEM); + } + + /* assign the context */ +- priv->context = *s; +- (*s)->priv = priv; ++ priv->context = s; ++ s->priv = priv; + + /* populate it */ +- priv->context->capture.num_buffers = priv->num_capture_buffers; +- priv->context->output.num_buffers = priv->num_output_buffers; +- priv->context->self_ref = priv->context_ref; +- priv->context->fd = -1; ++ s->capture.num_buffers = priv->num_capture_buffers; ++ s->output.num_buffers = priv->num_output_buffers; ++ s->self_ref = priv->context_ref; ++ s->fd = -1; ++ ++ xlat_init(&s->xlat); + ++ *pps = s; + return 0; + } diff --git a/libavcodec/v4l2_m2m.h b/libavcodec/v4l2_m2m.h -index 456281f48c..8544b528e1 100644 +index 456281f48c..ea39b0a757 100644 --- a/libavcodec/v4l2_m2m.h +++ b/libavcodec/v4l2_m2m.h @@ -30,6 +30,7 @@ @@ -51820,7 +52845,7 @@ index 456281f48c..8544b528e1 100644 #include "v4l2_context.h" #define container_of(ptr, type, member) ({ \ -@@ -38,7 +39,38 @@ +@@ -38,7 +39,37 @@ #define V4L_M2M_DEFAULT_OPTS \ { "num_output_buffers", "Number of buffers in the output context",\ @@ -51853,14 +52878,13 @@ index 456281f48c..8544b528e1 100644 +typedef struct xlat_track_s { + unsigned int track_no; + int64_t last_pts; -+ int64_t last_pkt_dts; + int64_t last_opaque; + V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE]; +} xlat_track_t; typedef struct V4L2m2mContext { char devname[PATH_MAX]; -@@ -52,7 +84,6 @@ typedef struct V4L2m2mContext { +@@ -52,7 +83,6 @@ typedef struct V4L2m2mContext { AVCodecContext *avctx; sem_t refsync; atomic_uint refcount; @@ -51868,7 +52892,7 @@ index 456281f48c..8544b528e1 100644 /* null frame/packet received */ int draining; -@@ -63,6 +94,33 @@ typedef struct V4L2m2mContext { +@@ -63,6 +93,36 @@ typedef struct V4L2m2mContext { /* reference back to V4L2m2mPriv */ void *priv; @@ -51878,6 +52902,9 @@ index 456281f48c..8544b528e1 100644 + /* generate DRM frames */ + int output_drm; + ++ /* input frames are drmprime */ ++ int input_drm; ++ + /* Frame tracking */ + xlat_track_t xlat; + int pending_hw; @@ -51891,7 +52918,7 @@ index 456281f48c..8544b528e1 100644 + /* Ext data sent */ + int extdata_sent; + /* Ext data sent in packet - overrides ctx */ -+ uint8_t * extdata_data; ++ void * extdata_data; + size_t extdata_size; + +#define FF_V4L2_QUIRK_REINIT_ALWAYS 1 @@ -51902,7 +52929,7 @@ index 456281f48c..8544b528e1 100644 } V4L2m2mContext; typedef struct V4L2m2mPriv { -@@ -73,6 +131,7 @@ typedef struct V4L2m2mPriv { +@@ -73,6 +133,7 @@ typedef struct V4L2m2mPriv { int num_output_buffers; int num_capture_buffers; @@ -51910,7 +52937,7 @@ index 456281f48c..8544b528e1 100644 } V4L2m2mPriv; /** -@@ -126,4 +185,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx); +@@ -126,4 +187,26 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mContext *ctx); */ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx); @@ -51938,7 +52965,7 @@ index 456281f48c..8544b528e1 100644 + #endif /* AVCODEC_V4L2_M2M_H */ diff --git a/libavcodec/v4l2_m2m_dec.c b/libavcodec/v4l2_m2m_dec.c -index 3e17e0fcac..dd383f31e5 100644 +index 3e17e0fcac..18f3bc7ff2 100644 --- a/libavcodec/v4l2_m2m_dec.c +++ b/libavcodec/v4l2_m2m_dec.c @@ -23,6 +23,10 @@ @@ -51952,7 +52979,7 @@ index 3e17e0fcac..dd383f31e5 100644 #include "libavutil/pixfmt.h" #include "libavutil/pixdesc.h" #include "libavutil/opt.h" -@@ -30,75 +34,107 @@ +@@ -30,75 +34,264 @@ #include "libavcodec/decode.h" #include "libavcodec/internal.h" @@ -51969,18 +52996,22 @@ index 3e17e0fcac..dd383f31e5 100644 +#define STATS_LAST_COUNT_MAX 64 +#define STATS_INTERVAL_MAX (1 << 30) + -+static int64_t pts_stats_guess(const pts_stats_t * const stats) ++#ifndef FF_API_BUFFER_SIZE_T ++#define FF_API_BUFFER_SIZE_T 1 ++#endif ++ ++#define DUMP_FAILED_EXTRADATA 0 ++ ++#if DUMP_FAILED_EXTRADATA ++static inline char hex1(unsigned int x) { - V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; - V4L2Context *const capture = &s->capture; - V4L2Context *const output = &s->output; - struct v4l2_selection selection = { 0 }; - int ret; -+ if (stats->last_pts == AV_NOPTS_VALUE || -+ stats->last_interval == 0 || -+ stats->last_count >= STATS_LAST_COUNT_MAX) -+ return AV_NOPTS_VALUE; -+ return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval; ++ x &= 0xf; ++ return x <= 9 ? '0' + x : 'a' + x - 10; +} - /* 1. start the output process */ @@ -51989,17 +53020,104 @@ index 3e17e0fcac..dd383f31e5 100644 - if (ret < 0) { - av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n"); - return ret; +- } ++static inline char * hex2(char * s, unsigned int x) ++{ ++ *s++ = hex1(x >> 4); ++ *s++ = hex1(x); ++ return s; ++} ++ ++static inline char * hex4(char * s, unsigned int x) ++{ ++ s = hex2(s, x >> 8); ++ s = hex2(s, x); ++ return s; ++} ++ ++static inline char * dash2(char * s) ++{ ++ *s++ = '-'; ++ *s++ = '-'; ++ return s; ++} ++ ++static void ++data16(char * s, const unsigned int offset, const uint8_t * m, const size_t len) ++{ ++ size_t i; ++ s = hex4(s, offset); ++ m += offset; ++ for (i = 0; i != 8; ++i) { ++ *s++ = ' '; ++ s = len > i + offset ? hex2(s, *m++) : dash2(s); ++ } ++ *s++ = ' '; ++ *s++ = ':'; ++ for (; i != 16; ++i) { ++ *s++ = ' '; ++ s = len > i + offset ? hex2(s, *m++) : dash2(s); + } ++ *s++ = 0; ++} + +- if (capture->streamon) +- return 0; ++static void ++log_dump(void * logctx, int lvl, const void * const data, const size_t len) ++{ ++ size_t i; ++ for (i = 0; i < len; i += 16) { ++ char buf[80]; ++ data16(buf, i, data, len); ++ av_log(logctx, lvl, "%s\n", buf); ++ } ++} ++#endif + +- /* 2. get the capture format */ +- capture->format.type = capture->type; +- ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format); +- if (ret) { +- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n"); +- return ret; ++static int64_t pts_stats_guess(const pts_stats_t * const stats) ++{ ++ if (stats->last_pts == AV_NOPTS_VALUE || ++ stats->last_interval == 0 || ++ stats->last_count >= STATS_LAST_COUNT_MAX) ++ return AV_NOPTS_VALUE; ++ return stats->last_pts + (int64_t)(stats->last_count - 1) * (int64_t)stats->last_interval; ++} ++ +static void pts_stats_add(pts_stats_t * const stats, int64_t pts) +{ + if (pts == AV_NOPTS_VALUE || pts == stats->last_pts) { + if (stats->last_count < STATS_LAST_COUNT_MAX) + ++stats->last_count; + return; -+ } -+ + } + +- /* 2.1 update the AVCodecContext */ +- avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); +- capture->av_pix_fmt = avctx->pix_fmt; + if (stats->last_pts != AV_NOPTS_VALUE) { + const int64_t interval = pts - stats->last_pts; -+ + +- /* 3. set the crop parameters */ +- selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; +- selection.r.height = avctx->coded_height; +- selection.r.width = avctx->coded_width; +- ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection); +- if (!ret) { +- ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); +- if (ret) { +- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n"); +- } else { +- av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height); +- /* update the size of the resulting frame */ +- capture->height = selection.r.height; +- capture->width = selection.r.width; + if (interval < 0 || interval >= STATS_INTERVAL_MAX || + stats->last_count >= STATS_LAST_COUNT_MAX) { + if (stats->last_interval != 0) @@ -52017,7 +53135,12 @@ index 3e17e0fcac..dd383f31e5 100644 } } -- if (capture->streamon) +- /* 4. init the capture context now that we have the capture format */ +- if (!capture->buffers) { +- ret = ff_v4l2_context_init(capture); +- if (ret) { +- av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); +- return AVERROR(ENOMEM); + stats->last_pts = pts; + stats->last_count = 1; +} @@ -52033,6 +53156,102 @@ index 3e17e0fcac..dd383f31e5 100644 + }; +} + ++// If abdata == NULL then this just counts space required ++// Unpacks avcC if detected ++static int ++h264_xd_copy(const uint8_t * const extradata, const int extrasize, uint8_t * abdata) ++{ ++ const uint8_t * const xdend = extradata + extrasize; ++ const uint8_t * p = extradata; ++ uint8_t * d = abdata; ++ unsigned int n; ++ unsigned int len; ++ const unsigned int hdrlen = 4; ++ unsigned int need_pps = 1; ++ ++ if (extrasize < 8) ++ return AVERROR(EINVAL); ++ ++ if (p[0] == 0 && p[1] == 0) { ++ // Assume a couple of leading zeros are good enough to indicate NAL ++ if (abdata) ++ memcpy(d, p, extrasize); ++ return extrasize; ++ } ++ ++ // avcC starts with a 1 ++ if (p[0] != 1) ++ return AVERROR(EINVAL); ++ ++ p += 5; ++ n = *p++ & 0x1f; ++ ++doxps: ++ while (n--) { ++ if (xdend - p < 2) ++ return AVERROR(EINVAL); ++ len = (p[0] << 8) | p[1]; ++ p += 2; ++ if (xdend - p < (ptrdiff_t)len) ++ return AVERROR(EINVAL); ++ if (abdata) { ++ d[0] = 0; ++ d[1] = 0; ++ d[2] = 0; ++ d[3] = 1; ++ memcpy(d + 4, p, len); + } ++ d += len + hdrlen; ++ p += len; ++ } ++ if (need_pps) { ++ need_pps = 0; ++ if (p >= xdend) ++ return AVERROR(EINVAL); ++ n = *p++; ++ goto doxps; + } + +- /* 5. start the capture process */ +- ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); +- if (ret) { +- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n"); ++ return d - abdata; ++} ++ ++static int ++copy_extradata(AVCodecContext * const avctx, ++ const void * const src_data, const int src_len, ++ void ** const pdst_data, size_t * const pdst_len) ++{ ++ int len; ++ ++ *pdst_len = 0; ++ av_freep(pdst_data); ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264) ++ len = h264_xd_copy(src_data, src_len, NULL); ++ else ++ len = src_len < 0 ? AVERROR(EINVAL) : src_len; ++ ++ // Zero length is OK but we swant to stop - -ve is error val ++ if (len <= 0) ++ return len; ++ ++ if ((*pdst_data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) ++ return AVERROR(ENOMEM); ++ ++ if (avctx->codec_id == AV_CODEC_ID_H264) ++ h264_xd_copy(src_data, src_len, *pdst_data); ++ else ++ memcpy(*pdst_data, src_data, len); ++ *pdst_len = len; ++ ++ return 0; ++} ++ ++ ++ +static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s) +{ + int ret; @@ -52042,163 +53261,47 @@ index 3e17e0fcac..dd383f31e5 100644 + }; + + if (s->output.streamon) - return 0; - -- /* 2. get the capture format */ -- capture->format.type = capture->type; -- ret = ioctl(s->fd, VIDIOC_G_FMT, &capture->format); -- if (ret) { -- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_FMT ioctl\n"); ++ return 0; ++ + ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON); + if (ret != 0) { + av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context: %s\n", av_err2str(ret)); return ret; } -- /* 2.1 update the AVCodecContext */ -- avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO); -- capture->av_pix_fmt = avctx->pix_fmt; -- -- /* 3. set the crop parameters */ -- selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -- selection.r.height = avctx->coded_height; -- selection.r.width = avctx->coded_width; -- ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection); -- if (!ret) { -- ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection); -- if (ret) { -- av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n"); -- } else { -- av_log(avctx, AV_LOG_DEBUG, "crop output %dx%d\n", selection.r.width, selection.r.height); -- /* update the size of the resulting frame */ -- capture->height = selection.r.height; -- capture->width = selection.r.width; -- } + // STREAMON should do implicit START so this just for those that don't. + // It is optional so don't worry if it fails + if (ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd) < 0) { + ret = AVERROR(errno); + av_log(avctx, AV_LOG_WARNING, "VIDIOC_DECODER_CMD start error: %s\n", av_err2str(ret)); - } -- -- /* 4. init the capture context now that we have the capture format */ -- if (!capture->buffers) { -- ret = ff_v4l2_context_init(capture); -- if (ret) { -- av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n"); -- return AVERROR(ENOMEM); -- } ++ } + else { + av_log(avctx, AV_LOG_TRACE, "VIDIOC_DECODER_CMD start OK\n"); - } ++ } + return 0; +} - -- /* 5. start the capture process */ -- ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON); -- if (ret) { -- av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON, on capture context\n"); -- return ret; -- } ++ +static int v4l2_try_start(AVCodecContext *avctx) +{ + V4L2m2mContext * const s = ((V4L2m2mPriv*)avctx->priv_data)->context; + int ret; - ++ + /* 1. start the output process */ + if ((ret = check_output_streamon(avctx, s)) != 0) + return ret; return 0; } -@@ -133,52 +169,606 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) +@@ -133,52 +326,518 @@ static int v4l2_prepare_decoder(V4L2m2mContext *s) return 0; } -static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame) -+static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n) -+{ -+ return (int64_t)n; -+} -+ -+static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts) -+{ -+ return (unsigned int)pts; -+} -+ -+// FFmpeg requires us to propagate a number of vars from the coded pkt into -+// the decoded frame. The only thing that tracks like that in V4L2 stateful -+// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no -+// guarantees about PTS being unique or specified for every frame so replace -+// the supplied PTS with a simple incrementing number and keep a circular -+// buffer of all the things we want preserved (including the original PTS) -+// indexed by the tracking no. +static void -+xlat_pts_in(AVCodecContext *const avctx, xlat_track_t *const x, AVPacket *const avpkt) -+{ -+ int64_t track_pts; -+ -+ // Avoid 0 -+ if (++x->track_no == 0) -+ x->track_no = 1; -+ -+ track_pts = track_to_pts(avctx, x->track_no); -+ -+ av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, x->track_no); -+ x->last_pkt_dts = avpkt->dts; -+ x->track_els[x->track_no % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){ -+ .discard = 0, -+ .pending = 1, -+ .pkt_size = avpkt->size, -+ .pts = avpkt->pts, -+ .dts = avpkt->dts, -+ .reordered_opaque = avctx->reordered_opaque, -+ .pkt_pos = avpkt->pos, -+ .pkt_duration = avpkt->duration, -+ .track_pts = track_pts -+ }; -+ avpkt->pts = track_pts; -+} -+ -+// Returns -1 if we should discard the frame -+static int -+xlat_pts_out(AVCodecContext *const avctx, -+ xlat_track_t * const x, ++set_best_effort_pts(AVCodecContext *const avctx, + pts_stats_t * const ps, + AVFrame *const frame) +{ -+ unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE; -+ V4L2m2mTrackEl *const t = x->track_els + n; -+ if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts) -+ { -+ av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -+ frame->pts = AV_NOPTS_VALUE; -+ frame->pkt_dts = x->last_pkt_dts; -+ frame->reordered_opaque = x->last_opaque; -+ frame->pkt_pos = -1; -+ frame->pkt_duration = 0; -+ frame->pkt_size = -1; -+ } -+ else if (!t->discard) -+ { -+ frame->pts = t->pending ? t->pts : AV_NOPTS_VALUE; -+ frame->pkt_dts = x->last_pkt_dts; -+ frame->reordered_opaque = t->reordered_opaque; -+ frame->pkt_pos = t->pkt_pos; -+ frame->pkt_duration = t->pkt_duration; -+ frame->pkt_size = t->pkt_size; -+ -+ x->last_opaque = x->track_els[n].reordered_opaque; -+ if (frame->pts != AV_NOPTS_VALUE) -+ x->last_pts = frame->pts; -+ t->pending = 0; -+ } -+ else -+ { -+ av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts); -+ return -1; -+ } -+ + pts_stats_add(ps, frame->pts); + +#if FF_API_PKT_PTS @@ -52207,10 +53310,15 @@ index 3e17e0fcac..dd383f31e5 100644 +FF_ENABLE_DEPRECATION_WARNINGS +#endif + frame->best_effort_timestamp = pts_stats_guess(ps); -+ frame->pkt_dts = frame->pts; // We can't emulate what s/w does in a useful manner? -+ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 ", track=%"PRId64", n=%d\n", -+ frame->pts, frame->best_effort_timestamp, frame->pkt_dts, t->track_pts, n); -+ return 0; ++ // If we can't guess from just PTS - try DTS ++ if (frame->best_effort_timestamp == AV_NOPTS_VALUE) ++ frame->best_effort_timestamp = frame->pkt_dts; ++ ++ // We can't emulate what s/w does in a useful manner and using the ++ // "correct" answer seems to just confuse things. ++ frame->pkt_dts = frame->pts; ++ av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 "/%"PRId64", DTS=%" PRId64 "\n", ++ frame->pts, frame->best_effort_timestamp, frame->pkt_dts); +} + +static void @@ -52224,13 +53332,6 @@ index 3e17e0fcac..dd383f31e5 100644 + x->last_pts = AV_NOPTS_VALUE; +} + -+static void -+xlat_init(xlat_track_t * const x) -+{ -+ memset(x, 0, sizeof(*x)); -+ x->last_pts = AV_NOPTS_VALUE; -+} -+ +static int +xlat_pending(const xlat_track_t * const x) +{ @@ -52309,8 +53410,11 @@ index 3e17e0fcac..dd383f31e5 100644 + + for (i = 0; i < 256; ++i) { + uint8_t * side_data; ++#if FF_API_BUFFER_SIZE_T ++ int side_size; ++#else + size_t side_size; -+ ++#endif + ret = ff_decode_get_packet(avctx, &s->buf_pkt); + if (ret != 0) + break; @@ -52319,13 +53423,8 @@ index 3e17e0fcac..dd383f31e5 100644 + side_data = av_packet_get_side_data(&s->buf_pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); + if (side_data) { + av_log(avctx, AV_LOG_DEBUG, "New extradata\n"); -+ av_freep(&s->extdata_data); -+ if ((s->extdata_data = av_malloc(side_size ? side_size : 1)) == NULL) { -+ av_log(avctx, AV_LOG_ERROR, "Failed to alloc %zd bytes of extra data\n", side_size); -+ return AVERROR(ENOMEM); -+ } -+ memcpy(s->extdata_data, side_data, side_size); -+ s->extdata_size = side_size; ++ if ((ret = copy_extradata(avctx, side_data, (int)side_size, &s->extdata_data, &s->extdata_size)) < 0) ++ av_log(avctx, AV_LOG_WARNING, "Failed to copy new extra data: %s\n", av_err2str(ret)); + s->extdata_sent = 0; + } + @@ -52381,20 +53480,18 @@ index 3e17e0fcac..dd383f31e5 100644 + av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret); return ret; + } ++ } + -+ xlat_pts_in(avctx, &s->xlat, &s->buf_pkt); - } - -- if (s->draining) -- goto dequeue; + if (s->draining) { + if (s->buf_pkt.size) { + av_log(avctx, AV_LOG_WARNING, "Unexpected input whilst draining\n"); + av_packet_unref(&s->buf_pkt); + } + return NQ_DRAINING; -+ } -+ + } + +- if (s->draining) +- goto dequeue; + if (!s->buf_pkt.size) + return NQ_NONE; + @@ -52409,8 +53506,6 @@ index 3e17e0fcac..dd383f31e5 100644 + ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0); + else if (s->extdata_data) + ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, s->extdata_data, s->extdata_size); -+ else -+ ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, avctx->extradata, avctx->extradata_size); - s->buf_pkt = avpkt; - /* no input buffers available, continue dequeing */ @@ -52519,53 +53614,51 @@ index 3e17e0fcac..dd383f31e5 100644 + if (dst_rv != 0 && TRY_DQ(src_rv)) { + // Pick a timeout depending on state + const int t = ++ src_rv == NQ_Q_FULL ? -1 : + src_rv == NQ_DRAINING ? 300 : -+ prefer_dq ? 5 : -+ src_rv == NQ_Q_FULL ? -1 : 0; ++ prefer_dq ? 5 : 0; + -+ do { -+ // Dequeue frame will unref any previous contents of frame -+ // if it returns success so we don't need an explicit unref -+ // when discarding -+ // This returns AVERROR(EAGAIN) on timeout or if -+ // there is room in the input Q and timeout == -1 -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); ++ // Dequeue frame will unref any previous contents of frame ++ // if it returns success so we don't need an explicit unref ++ // when discarding ++ // This returns AVERROR(EAGAIN) on timeout or if ++ // there is room in the input Q and timeout == -1 ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); + -+ // Failure due to no buffer in Q? -+ if (dst_rv == AVERROR(ENOSPC)) { -+ // Wait & retry -+ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { -+ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); -+ } ++ // Failure due to no buffer in Q? ++ if (dst_rv == AVERROR(ENOSPC)) { ++ // Wait & retry ++ if ((dst_rv = qbuf_wait(avctx, &s->capture)) == 0) { ++ dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, t); + } ++ } + -+ // Adjust dynamic pending threshold -+ if (dst_rv == 0) { -+ if (--s->pending_hw < PENDING_HW_MIN) -+ s->pending_hw = PENDING_HW_MIN; ++ // Adjust dynamic pending threshold ++ if (dst_rv == 0) { ++ if (--s->pending_hw < PENDING_HW_MIN) ++ s->pending_hw = PENDING_HW_MIN; ++ s->pending_n = 0; ++ ++ set_best_effort_pts(avctx, &s->pts_stat, frame); ++ } ++ else if (dst_rv == AVERROR(EAGAIN)) { ++ if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) { ++ s->pending_hw = pending * 16 + PENDING_HW_OFFSET; + s->pending_n = 0; + } -+ else if (dst_rv == AVERROR(EAGAIN)) { -+ if (prefer_dq && ++s->pending_n > PENDING_N_THRESHOLD) { -+ s->pending_hw = pending * 16 + PENDING_HW_OFFSET; -+ s->pending_n = 0; -+ } -+ } ++ } + -+ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { -+ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); -+ dst_rv = AVERROR_EOF; -+ s->capture.done = 1; -+ } -+ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) -+ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", -+ s->draining, s->capture.done); -+ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) -+ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", -+ s->draining, s->capture.done, dst_rv); -+ -+ // Go again if we got a frame that we need to discard -+ } while (dst_rv == 0 && xlat_pts_out(avctx, &s->xlat, &s->pts_stat, frame)); ++ if (dst_rv == AVERROR(EAGAIN) && src_rv == NQ_DRAINING) { ++ av_log(avctx, AV_LOG_WARNING, "Timeout in drain - assume EOF"); ++ dst_rv = AVERROR_EOF; ++ s->capture.done = 1; ++ } ++ else if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done)) ++ av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n", ++ s->draining, s->capture.done); ++ else if (dst_rv && dst_rv != AVERROR(EAGAIN)) ++ av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n", ++ s->draining, s->capture.done, dst_rv); + } + + ++i; @@ -52748,7 +53841,7 @@ index 3e17e0fcac..dd383f31e5 100644 } static av_cold int v4l2_decode_init(AVCodecContext *avctx) -@@ -186,12 +776,30 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) +@@ -186,12 +845,29 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) V4L2Context *capture, *output; V4L2m2mContext *s; V4L2m2mPriv *priv = avctx->priv_data; @@ -52772,14 +53865,13 @@ index 3e17e0fcac..dd383f31e5 100644 if (ret < 0) return ret; -+ xlat_init(&s->xlat); + pts_stats_init(&s->pts_stat, avctx, "decoder"); + s->pending_hw = PENDING_HW_MIN; + capture = &s->capture; output = &s->output; -@@ -199,34 +807,129 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) +@@ -199,34 +875,136 @@ static av_cold int v4l2_decode_init(AVCodecContext *avctx) * by the v4l2 driver; this event will trigger a full pipeline reconfig and * the proper values will be retrieved from the kernel driver. */ @@ -52793,12 +53885,10 @@ index 3e17e0fcac..dd383f31e5 100644 output->av_codec_id = avctx->codec_id; output->av_pix_fmt = AV_PIX_FMT_NONE; + output->min_buf_size = max_coded_size(avctx); -+ output->no_pts_rescale = 1; capture->av_codec_id = AV_CODEC_ID_RAWVIDEO; capture->av_pix_fmt = avctx->pix_fmt; + capture->min_buf_size = 0; -+ capture->no_pts_rescale = 1; + + /* the client requests the codec to generate DRM frames: + * - data[0] will therefore point to the returned AVDRMFrameDescriptor @@ -52839,7 +53929,15 @@ index 3e17e0fcac..dd383f31e5 100644 av_log(avctx, AV_LOG_ERROR, "can't configure decoder\n"); - s->self_ref = NULL; - av_buffer_unref(&priv->context_ref); -- ++ return ret; ++ } + ++ if (avctx->extradata && ++ (ret = copy_extradata(avctx, avctx->extradata, avctx->extradata_size, &s->extdata_data, &s->extdata_size)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to copy extradata from context: %s\n", av_err2str(ret)); ++#if DUMP_FAILED_EXTRADATA ++ log_dump(avctx, AV_LOG_INFO, avctx->extradata, avctx->extradata_size); ++#endif return ret; } @@ -52918,7 +54016,7 @@ index 3e17e0fcac..dd383f31e5 100644 } #define OFFSET(x) offsetof(V4L2m2mPriv, x) -@@ -235,10 +938,16 @@ static av_cold int v4l2_decode_close(AVCodecContext *avctx) +@@ -235,10 +1013,16 @@ static av_cold int v4l2_decode_close(AVCodecContext *avctx) static const AVOption options[] = { V4L_M2M_DEFAULT_OPTS, { "num_capture_buffers", "Number of buffers in the capture context", @@ -52936,7 +54034,7 @@ index 3e17e0fcac..dd383f31e5 100644 #define M2MDEC_CLASS(NAME) \ static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \ .class_name = #NAME "_v4l2m2m_decoder", \ -@@ -259,9 +968,15 @@ static const AVOption options[] = { +@@ -259,9 +1043,15 @@ static const AVOption options[] = { .init = v4l2_decode_init, \ .receive_frame = v4l2_receive_frame, \ .close = v4l2_decode_close, \ @@ -52953,6 +54051,408 @@ index 3e17e0fcac..dd383f31e5 100644 .wrapper_name = "v4l2m2m", \ } +diff --git a/libavcodec/v4l2_m2m_enc.c b/libavcodec/v4l2_m2m_enc.c +index 32321f392f..08d0c092bd 100644 +--- a/libavcodec/v4l2_m2m_enc.c ++++ b/libavcodec/v4l2_m2m_enc.c +@@ -24,6 +24,8 @@ + #include + #include + #include ++#include ++ + #include "libavcodec/avcodec.h" + #include "libavcodec/internal.h" + #include "libavutil/pixdesc.h" +@@ -37,6 +39,34 @@ + #define MPEG_CID(x) V4L2_CID_MPEG_VIDEO_##x + #define MPEG_VIDEO(x) V4L2_MPEG_VIDEO_##x + ++// P030 should be defined in drm_fourcc.h and hopefully will be sometime ++// in the future but until then... ++#ifndef DRM_FORMAT_P030 ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') ++#endif ++ ++#ifndef DRM_FORMAT_NV15 ++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') ++#endif ++ ++#ifndef DRM_FORMAT_NV20 ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') ++#endif ++ ++#ifndef V4L2_CID_CODEC_BASE ++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE ++#endif ++ ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in videodev2.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ + static inline void v4l2_set_timeperframe(V4L2m2mContext *s, unsigned int num, unsigned int den) + { + struct v4l2_streamparm parm = { 0 }; +@@ -147,15 +177,14 @@ static inline int v4l2_mpeg4_profile_from_ff(int p) + static int v4l2_check_b_frame_support(V4L2m2mContext *s) + { + if (s->avctx->max_b_frames) +- av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support b-frames yet\n"); ++ av_log(s->avctx, AV_LOG_WARNING, "Encoder does not support %d b-frames yet\n", s->avctx->max_b_frames); + +- v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), 0, "number of B-frames", 0); ++ v4l2_set_ext_ctrl(s, MPEG_CID(B_FRAMES), s->avctx->max_b_frames, "number of B-frames", 1); + v4l2_get_ext_ctrl(s, MPEG_CID(B_FRAMES), &s->avctx->max_b_frames, "number of B-frames", 0); + if (s->avctx->max_b_frames == 0) + return 0; + + avpriv_report_missing_feature(s->avctx, "DTS/PTS calculation for V4L2 encoding"); +- + return AVERROR_PATCHWELCOME; + } + +@@ -270,13 +299,186 @@ static int v4l2_prepare_encoder(V4L2m2mContext *s) + return 0; + } + ++static int avdrm_to_v4l2(struct v4l2_format * const format, const AVFrame * const frame) ++{ ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ const uint32_t drm_fmt = src->layers[0].format; ++ // Treat INVALID as LINEAR ++ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? ++ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; ++ uint32_t pix_fmt = 0; ++ uint32_t w = 0; ++ uint32_t h = 0; ++ uint32_t bpl = src->layers[0].planes[0].pitch; ++ ++ // We really don't expect multiple layers ++ // All formats that we currently cope with are single object ++ ++ if (src->nb_layers != 1 || src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ switch (drm_fmt) { ++ case DRM_FORMAT_YUV420: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 3) ++ break; ++ pix_fmt = V4L2_PIX_FMT_YUV420; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ break; ++ ++ case DRM_FORMAT_NV12: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_COL128; ++ w = bpl; ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++ break; ++ ++ case DRM_FORMAT_P030: ++ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; ++ w = bpl / 2; // Matching lie to how we construct this ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!pix_fmt) ++ return AVERROR(EINVAL); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { ++ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->plane_fmt[0].bytesperline = bpl; ++ pix->num_planes = 1; ++ } ++ else { ++ struct v4l2_pix_format *const pix = &format->fmt.pix; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->bytesperline = bpl; ++ } ++ ++ return 0; ++} ++ ++// Do we have similar enough formats to be usable? ++static int fmt_eq(const struct v4l2_format * const a, const struct v4l2_format * const b) ++{ ++ if (a->type != b->type) ++ return 0; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(a->type)) { ++ const struct v4l2_pix_format_mplane *const pa = &a->fmt.pix_mp; ++ const struct v4l2_pix_format_mplane *const pb = &b->fmt.pix_mp; ++ unsigned int i; ++ if (pa->pixelformat != pb->pixelformat || ++ pa->num_planes != pb->num_planes) ++ return 0; ++ for (i = 0; i != pa->num_planes; ++i) { ++ if (pa->plane_fmt[i].bytesperline != pb->plane_fmt[i].bytesperline) ++ return 0; ++ } ++ } ++ else { ++ const struct v4l2_pix_format *const pa = &a->fmt.pix; ++ const struct v4l2_pix_format *const pb = &b->fmt.pix; ++ if (pa->pixelformat != pb->pixelformat || ++ pa->bytesperline != pb->bytesperline) ++ return 0; ++ } ++ return 1; ++} ++ ++ + static int v4l2_send_frame(AVCodecContext *avctx, const AVFrame *frame) + { + V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context; + V4L2Context *const output = &s->output; + ++ ff_v4l2_dq_all(output); ++ ++ // Signal EOF if needed ++ if (!frame) { ++ return ff_v4l2_context_enqueue_frame(output, frame); ++ } ++ ++ if (s->input_drm && !output->streamon) { ++ int rv; ++ struct v4l2_format req_format = {.type = output->format.type}; ++ ++ // Set format when we first get a buffer ++ if ((rv = avdrm_to_v4l2(&req_format, frame)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to get V4L2 format from DRM_PRIME frame\n"); ++ return rv; ++ } ++ ++ ff_v4l2_context_release(output); ++ ++ output->format = req_format; ++ ++ if ((rv = ff_v4l2_context_set_format(output)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to set V4L2 format\n"); ++ return rv; ++ } ++ ++ if (!fmt_eq(&req_format, &output->format)) { ++ av_log(avctx, AV_LOG_ERROR, "Format mismatch after setup\n"); ++ return AVERROR(EINVAL); ++ } ++ ++ output->selection.top = frame->crop_top; ++ output->selection.left = frame->crop_left; ++ output->selection.width = av_frame_cropped_width(frame); ++ output->selection.height = av_frame_cropped_height(frame); ++ ++ if ((rv = ff_v4l2_context_init(output)) != 0) { ++ av_log(avctx, AV_LOG_ERROR, "Failed to (re)init context\n"); ++ return rv; ++ } ++ ++ { ++ struct v4l2_selection selection = { ++ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT, ++ .target = V4L2_SEL_TGT_CROP, ++ .r = output->selection ++ }; ++ if (ioctl(s->fd, VIDIOC_S_SELECTION, &selection) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "S_SELECTION (CROP) %dx%d @ %d,%d failed: %s\n", ++ selection.r.width, selection.r.height, selection.r.left, selection.r.top, ++ av_err2str(AVERROR(errno))); ++ } ++ av_log(avctx, AV_LOG_TRACE, "S_SELECTION (CROP) %dx%d @ %d,%d OK\n", ++ selection.r.width, selection.r.height, selection.r.left, selection.r.top); ++ } ++ } ++ + #ifdef V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME +- if (frame && frame->pict_type == AV_PICTURE_TYPE_I) ++ if (frame->pict_type == AV_PICTURE_TYPE_I) + v4l2_set_ext_ctrl(s, MPEG_CID(FORCE_KEY_FRAME), 0, "force key frame", 1); + #endif + +@@ -290,6 +492,8 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) + V4L2Context *const output = &s->output; + int ret; + ++ ff_v4l2_dq_all(output); ++ + if (s->draining) + goto dequeue; + +@@ -310,7 +514,87 @@ static int v4l2_receive_packet(AVCodecContext *avctx, AVPacket *avpkt) + } + + dequeue: +- return ff_v4l2_context_dequeue_packet(capture, avpkt); ++ ret = ff_v4l2_context_dequeue_packet(capture, avpkt); ++ ff_v4l2_dq_all(output); ++ if (ret) ++ return ret; ++ ++ if (capture->first_buf == 1) { ++ uint8_t * data; ++ const int len = avpkt->size; ++ ++ // 1st buffer after streamon should be SPS/PPS ++ capture->first_buf = 2; ++ ++ // Clear both possible stores so there is no chance of confusion ++ av_freep(&s->extdata_data); ++ s->extdata_size = 0; ++ av_freep(&avctx->extradata); ++ avctx->extradata_size = 0; ++ ++ if ((data = av_malloc(len + AV_INPUT_BUFFER_PADDING_SIZE)) == NULL) ++ goto fail_no_mem; ++ ++ memcpy(data, avpkt->data, len); ++ av_packet_unref(avpkt); ++ ++ // We need to copy the header, but keep local if not global ++ if ((avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) != 0) { ++ avctx->extradata = data; ++ avctx->extradata_size = len; ++ } ++ else { ++ s->extdata_data = data; ++ s->extdata_size = len; ++ } ++ ++ ret = ff_v4l2_context_dequeue_packet(capture, avpkt); ++ ff_v4l2_dq_all(output); ++ if (ret) ++ return ret; ++ } ++ ++ // First frame must be key so mark as such even if encoder forgot ++ if (capture->first_buf == 2) { ++ avpkt->flags |= AV_PKT_FLAG_KEY; ++ ++ // Add any extradata to the 1st packet we emit as we cannot create it at init ++ if (avctx->extradata_size > 0 && avctx->extradata) { ++ void * const side = av_packet_new_side_data(avpkt, ++ AV_PKT_DATA_NEW_EXTRADATA, ++ avctx->extradata_size); ++ if (!side) ++ goto fail_no_mem; ++ ++ memcpy(side, avctx->extradata, avctx->extradata_size); ++ } ++ } ++ ++ // Add SPS/PPS to the start of every key frame if non-global headers ++ if ((avpkt->flags & AV_PKT_FLAG_KEY) != 0 && s->extdata_size != 0) { ++ const size_t newlen = s->extdata_size + avpkt->size; ++ AVBufferRef * const buf = av_buffer_alloc(newlen + AV_INPUT_BUFFER_PADDING_SIZE); ++ ++ if (buf == NULL) ++ goto fail_no_mem; ++ ++ memcpy(buf->data, s->extdata_data, s->extdata_size); ++ memcpy(buf->data + s->extdata_size, avpkt->data, avpkt->size); ++ ++ av_buffer_unref(&avpkt->buf); ++ avpkt->buf = buf; ++ avpkt->data = buf->data; ++ avpkt->size = newlen; ++ } ++ ++// av_log(avctx, AV_LOG_INFO, "%s: PTS out=%"PRId64", size=%d, ret=%d\n", __func__, avpkt->pts, avpkt->size, ret); ++ capture->first_buf = 0; ++ return 0; ++ ++fail_no_mem: ++ ret = AVERROR(ENOMEM); ++ av_packet_unref(avpkt); ++ return ret; + } + + static av_cold int v4l2_encode_init(AVCodecContext *avctx) +@@ -322,6 +606,8 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + uint32_t v4l2_fmt_output; + int ret; + ++ av_log(avctx, AV_LOG_INFO, " <<< %s: fmt=%d/%d\n", __func__, avctx->pix_fmt, avctx->sw_pix_fmt); ++ + ret = ff_v4l2_m2m_create_context(priv, &s); + if (ret < 0) + return ret; +@@ -329,13 +615,17 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + capture = &s->capture; + output = &s->output; + ++ s->input_drm = (avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME); ++ + /* common settings output/capture */ + output->height = capture->height = avctx->height; + output->width = capture->width = avctx->width; + + /* output context */ + output->av_codec_id = AV_CODEC_ID_RAWVIDEO; +- output->av_pix_fmt = avctx->pix_fmt; ++ output->av_pix_fmt = !s->input_drm ? avctx->pix_fmt : ++ avctx->sw_pix_fmt != AV_PIX_FMT_NONE ? avctx->sw_pix_fmt : ++ AV_PIX_FMT_YUV420P; + + /* capture context */ + capture->av_codec_id = avctx->codec_id; +@@ -354,7 +644,7 @@ static av_cold int v4l2_encode_init(AVCodecContext *avctx) + v4l2_fmt_output = output->format.fmt.pix.pixelformat; + + pix_fmt_output = ff_v4l2_format_v4l2_to_avfmt(v4l2_fmt_output, AV_CODEC_ID_RAWVIDEO); +- if (pix_fmt_output != avctx->pix_fmt) { ++ if (!s->input_drm && pix_fmt_output != avctx->pix_fmt) { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt_output); + av_log(avctx, AV_LOG_ERROR, "Encoder requires %s pixel format.\n", desc->name); + return AVERROR(EINVAL); +@@ -372,9 +662,10 @@ static av_cold int v4l2_encode_close(AVCodecContext *avctx) + #define FLAGS AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM + + #define V4L_M2M_CAPTURE_OPTS \ +- V4L_M2M_DEFAULT_OPTS,\ ++ { "num_output_buffers", "Number of buffers in the output context",\ ++ OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS },\ + { "num_capture_buffers", "Number of buffers in the capture context", \ +- OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 4 }, 4, INT_MAX, FLAGS } ++ OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 8 }, 8, INT_MAX, FLAGS } + + static const AVOption mpeg4_options[] = { + V4L_M2M_CAPTURE_OPTS, diff --git a/libavcodec/v4l2_req_decode_q.c b/libavcodec/v4l2_req_decode_q.c new file mode 100644 index 0000000000..5b3fb958fa @@ -53531,13 +55031,15 @@ index 0000000000..cfa94d55c4 + diff --git a/libavcodec/v4l2_req_devscan.h b/libavcodec/v4l2_req_devscan.h new file mode 100644 -index 0000000000..0baef36535 +index 0000000000..956d9234f1 --- /dev/null +++ b/libavcodec/v4l2_req_devscan.h -@@ -0,0 +1,21 @@ +@@ -0,0 +1,23 @@ +#ifndef _DEVSCAN_H_ +#define _DEVSCAN_H_ + ++#include ++ +struct devscan; +struct decdev; +enum v4l2_buf_type; @@ -53830,13 +55332,15 @@ index 0000000000..ae6c648369 + diff --git a/libavcodec/v4l2_req_dmabufs.h b/libavcodec/v4l2_req_dmabufs.h new file mode 100644 -index 0000000000..8d909c4297 +index 0000000000..cfb17e801d --- /dev/null +++ b/libavcodec/v4l2_req_dmabufs.h -@@ -0,0 +1,38 @@ +@@ -0,0 +1,40 @@ +#ifndef DMABUFS_H +#define DMABUFS_H + ++#include ++ +struct dmabufs_ctl; +struct dmabuf_h; + @@ -53899,20 +55403,27 @@ index 0000000000..dcc8d95632 +#define HEVC_CTRLS_VERSION 3 +#include "v4l2_req_hevc_vx.c" + +diff --git a/libavcodec/v4l2_req_hevc_v4.c b/libavcodec/v4l2_req_hevc_v4.c +new file mode 100644 +index 0000000000..c35579d8e0 +--- /dev/null ++++ b/libavcodec/v4l2_req_hevc_v4.c +@@ -0,0 +1,3 @@ ++#define HEVC_CTRLS_VERSION 4 ++#include "v4l2_req_hevc_vx.c" ++ diff --git a/libavcodec/v4l2_req_hevc_vx.c b/libavcodec/v4l2_req_hevc_vx.c new file mode 100644 -index 0000000000..55c41ae679 +index 0000000000..9ff5592e61 --- /dev/null +++ b/libavcodec/v4l2_req_hevc_vx.c -@@ -0,0 +1,1228 @@ +@@ -0,0 +1,1365 @@ +// File included by v4l2_req_hevc_v* - not compiled on its own + +#include "decode.h" +#include "hevcdec.h" +#include "hwconfig.h" + -+#include "v4l2_request_hevc.h" -+ +#if HEVC_CTRLS_VERSION == 1 +#include "hevc-ctrls-v1.h" + @@ -53923,10 +55434,37 @@ index 0000000000..55c41ae679 +#include "hevc-ctrls-v2.h" +#elif HEVC_CTRLS_VERSION == 3 +#include "hevc-ctrls-v3.h" ++#elif HEVC_CTRLS_VERSION == 4 ++#include ++#if !defined(V4L2_CID_STATELESS_HEVC_SPS) ++#include "hevc-ctrls-v4.h" ++#endif +#else +#error Unknown HEVC_CTRLS_VERSION +#endif + ++#ifndef V4L2_CID_STATELESS_HEVC_SPS ++#define V4L2_CID_STATELESS_HEVC_SPS V4L2_CID_MPEG_VIDEO_HEVC_SPS ++#define V4L2_CID_STATELESS_HEVC_PPS V4L2_CID_MPEG_VIDEO_HEVC_PPS ++#define V4L2_CID_STATELESS_HEVC_SLICE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS ++#define V4L2_CID_STATELESS_HEVC_SCALING_MATRIX V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX ++#define V4L2_CID_STATELESS_HEVC_DECODE_PARAMS V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS ++#define V4L2_CID_STATELESS_HEVC_DECODE_MODE V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE ++#define V4L2_CID_STATELESS_HEVC_START_CODE V4L2_CID_MPEG_VIDEO_HEVC_START_CODE ++ ++#define V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED ++#define V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED ++#define V4L2_STATELESS_HEVC_START_CODE_NONE V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE ++#define V4L2_STATELESS_HEVC_START_CODE_ANNEX_B V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B ++#endif ++ ++// Should be in videodev2 but we might not have a good enough one ++#ifndef V4L2_PIX_FMT_HEVC_SLICE ++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */ ++#endif ++ ++#include "v4l2_request_hevc.h" ++ +#include "libavutil/hwcontext_drm.h" + +#include @@ -53962,11 +55500,16 @@ index 0000000000..55c41ae679 + struct v4l2_ctrl_hevc_slice_params * slice_params; + struct slice_info * slices; + ++ size_t num_offsets; ++ size_t alloced_offsets; ++ uint32_t *offsets; ++ +} V4L2MediaReqDescriptor; + +struct slice_info { + const uint8_t * ptr; + size_t len; // bytes ++ size_t n_offsets; +}; + +// Handy container for accumulating controls before setting @@ -54125,7 +55668,7 @@ index 0000000000..55c41ae679 + if (rd->num_slices >= rd->alloced_slices) { + struct v4l2_ctrl_hevc_slice_params * p2; + struct slice_info * s2; -+ size_t n2 = rd->num_slices == 0 ? 8 : rd->num_slices * 2; ++ size_t n2 = rd->alloced_slices == 0 ? 8 : rd->alloced_slices * 2; + + p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2)); + if (p2 == NULL) @@ -54143,6 +55686,23 @@ index 0000000000..55c41ae679 + return 0; +} + ++static int offsets_add(V4L2MediaReqDescriptor *const rd, const size_t n, const unsigned * const offsets) ++{ ++ if (rd->num_offsets + n > rd->alloced_offsets) { ++ size_t n2 = rd->alloced_slices == 0 ? 128 : rd->alloced_slices * 2; ++ void * p2; ++ while (rd->num_offsets + n > n2) ++ n2 *= 2; ++ if ((p2 = av_realloc_array(rd->offsets, n2, sizeof(*rd->offsets))) == NULL) ++ return AVERROR(ENOMEM); ++ rd->offsets = p2; ++ rd->alloced_offsets = n2; ++ } ++ for (size_t i = 0; i != n; ++i) ++ rd->offsets[rd->num_offsets++] = offsets[i] - 1; ++ return 0; ++} ++ +static unsigned int +fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries) +{ @@ -54164,9 +55724,13 @@ index 0000000000..55c41ae679 +#endif + entry->field_pic = frame->frame->interlaced_frame; + ++#if HEVC_CTRLS_VERSION <= 3 + /* TODO: Interleaved: Get the POC for each field. */ + entry->pic_order_cnt[0] = frame->poc; + entry->pic_order_cnt[1] = frame->poc; ++#else ++ entry->pic_order_cnt_val = frame->poc; ++#endif + } + } + return n; @@ -54192,8 +55756,11 @@ index 0000000000..55c41ae679 + + *slice_params = (struct v4l2_ctrl_hevc_slice_params) { + .bit_size = bit_size, ++#if HEVC_CTRLS_VERSION <= 3 + .data_bit_offset = bit_offset, -+ ++#else ++ .data_byte_offset = bit_offset / 8 + 1, ++#endif + /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ + .slice_segment_addr = sh->slice_segment_addr, + @@ -54276,6 +55843,7 @@ index 0000000000..55c41ae679 + fill_pred_table(h, &slice_params->pred_weight_table); + + slice_params->num_entry_point_offsets = sh->num_entry_point_offsets; ++#if HEVC_CTRLS_VERSION <= 3 + if (slice_params->num_entry_point_offsets > 256) { + slice_params->num_entry_point_offsets = 256; + av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets); @@ -54283,6 +55851,7 @@ index 0000000000..55c41ae679 + + for (i = 0; i < slice_params->num_entry_point_offsets; i++) + slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1; ++#endif +} + +#if HEVC_CTRLS_VERSION >= 2 @@ -54658,51 +56227,66 @@ index 0000000000..55c41ae679 +#if HEVC_CTRLS_VERSION >= 2 + struct v4l2_ctrl_hevc_decode_params * const dec, +#endif -+ struct v4l2_ctrl_hevc_slice_params * const slices, -+ const unsigned int slice_no, -+ const unsigned int slice_count) ++ struct v4l2_ctrl_hevc_slice_params * const slices, const unsigned int slice_count, ++ void * const offsets, const size_t offset_count) +{ + int rv; ++#if HEVC_CTRLS_VERSION >= 2 ++ unsigned int n = 3; ++#else ++ unsigned int n = 2; ++#endif + -+ struct v4l2_ext_control control[] = { ++ struct v4l2_ext_control control[6] = { + { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS, ++ .id = V4L2_CID_STATELESS_HEVC_SPS, + .ptr = &controls->sps, + .size = sizeof(controls->sps), + }, + { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS, ++ .id = V4L2_CID_STATELESS_HEVC_PPS, + .ptr = &controls->pps, + .size = sizeof(controls->pps), + }, +#if HEVC_CTRLS_VERSION >= 2 + { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS, ++ .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS, + .ptr = dec, + .size = sizeof(*dec), + }, +#endif -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, -+ .ptr = slices + slice_no, -+ .size = sizeof(*slices) * slice_count, -+ }, -+ // Optional -+ { -+ .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX, -+ .ptr = &controls->scaling_matrix, -+ .size = sizeof(controls->scaling_matrix), -+ }, + }; + -+ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, -+ controls->has_scaling ? -+ FF_ARRAY_ELEMS(control) : -+ FF_ARRAY_ELEMS(control) - 1); ++ if (slices) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, ++ .ptr = slices, ++ .size = sizeof(*slices) * slice_count, ++ }; ++ ++ if (controls->has_scaling) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX, ++ .ptr = &controls->scaling_matrix, ++ .size = sizeof(controls->scaling_matrix), ++ }; ++ ++#if HEVC_CTRLS_VERSION >= 4 ++ if (offsets) ++ control[n++] = (struct v4l2_ext_control) { ++ .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, ++ .ptr = offsets, ++ .size = sizeof(((struct V4L2MediaReqDescriptor *)0)->offsets[0]) * offset_count, ++ }; ++#endif ++ ++ rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control, n); + + return rv; +} + ++// This only works because we started out from a single coded frame buffer ++// that will remain intact until after end_frame +static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size) +{ + const HEVCContext * const h = avctx->priv_data; @@ -54711,18 +56295,45 @@ index 0000000000..55c41ae679 + int bcount = get_bits_count(&h->HEVClc->gb); + uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount; + ++ const unsigned int n = rd->num_slices; ++ const unsigned int block_start = (n / ctx->max_slices) * ctx->max_slices; ++ + int rv; + struct slice_info * si; + ++ // This looks dodgy but we know that FFmpeg has parsed this from a buffer ++ // that contains the entire frame including the start code ++ if (ctx->start_code == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) { ++ buffer -= 3; ++ size += 3; ++ boff += 24; ++ if (buffer[0] != 0 || buffer[1] != 0 || buffer[2] != 1) { ++ av_log(avctx, AV_LOG_ERROR, "Start code requested but missing %02x:%02x:%02x\n", ++ buffer[0], buffer[1], buffer[2]); ++ } ++ } ++ ++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) { ++ if (rd->slices == NULL) { ++ if ((rd->slices = av_mallocz(sizeof(*rd->slices))) == NULL) ++ return AVERROR(ENOMEM); ++ rd->slices->ptr = buffer; ++ rd->num_slices = 1; ++ } ++ rd->slices->len = buffer - rd->slices->ptr + size; ++ return 0; ++ } ++ + if ((rv = slice_add(rd)) != 0) + return rv; + -+ si = rd->slices + rd->num_slices - 1; ++ si = rd->slices + n; + si->ptr = buffer; + si->len = size; ++ si->n_offsets = rd->num_offsets; + -+ if (ctx->multi_slice && rd->num_slices > 1) { -+ struct slice_info *const si0 = rd->slices; ++ if (n != block_start) { ++ struct slice_info *const si0 = rd->slices + block_start; + const size_t offset = (buffer - si0->ptr); + boff += offset * 8; + size += offset; @@ -54730,12 +56341,15 @@ index 0000000000..55c41ae679 + } + +#if HEVC_CTRLS_VERSION >= 2 -+ if (rd->num_slices == 1) ++ if (n == 0) + fill_decode_params(h, &rd->dec); -+ fill_slice_params(h, &rd->dec, rd->slice_params + rd->num_slices - 1, size * 8, boff); ++ fill_slice_params(h, &rd->dec, rd->slice_params + n, size * 8, boff); +#else -+ fill_slice_params(h, rd->slice_params + rd->num_slices - 1, size * 8, boff); ++ fill_slice_params(h, rd->slice_params + n, size * 8, boff); +#endif ++ if (ctx->max_offsets != 0 && ++ (rv = offsets_add(rd, h->sh.num_entry_point_offsets, h->sh.entry_point_offset)) != 0) ++ return rv; + + return 0; +} @@ -54761,10 +56375,13 @@ index 0000000000..55c41ae679 +{ + V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data; + ++ const int is_last = (j == rd->num_slices); + struct slice_info *const si = rd->slices + i; + struct media_request * req = NULL; + struct qent_src * src = NULL; + MediaBufsStatus stat; ++ void * offsets = rd->offsets + rd->slices[i].n_offsets; ++ size_t n_offsets = (is_last ? rd->num_offsets : rd->slices[j].n_offsets) - rd->slices[i].n_offsets; + + if ((req = media_request_get(ctx->mpool)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__); @@ -54776,8 +56393,8 @@ index 0000000000..55c41ae679 +#if HEVC_CTRLS_VERSION >= 2 + &rd->dec, +#endif -+ rd->slice_params, -+ i, j - i)) { ++ rd->slice_params + i, j - i, ++ offsets, n_offsets)) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__); + goto fail1; + } @@ -54797,13 +56414,9 @@ index 0000000000..55c41ae679 + goto fail2; + } + -+#warning ANNEX_B start code -+// if (ctx->start_code == V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) { -+// } -+ + stat = mediabufs_start_request(ctx->mbufs, &req, &src, + i == 0 ? rd->qe_dst : NULL, -+ j == rd->num_slices); ++ is_last); + + if (stat != MEDIABUFS_STATUS_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__); @@ -54868,18 +56481,11 @@ index 0000000000..55c41ae679 + } + + // Send as slices -+ if (ctx->multi_slice) -+ { -+ if ((rv = send_slice(avctx, rd, &rc, 0, rd->num_slices)) != 0) ++ for (i = 0; i < rd->num_slices; i += ctx->max_slices) { ++ const unsigned int e = FFMIN(rd->num_slices, i + ctx->max_slices); ++ if ((rv = send_slice(avctx, rd, &rc, i, e)) != 0) + goto fail; + } -+ else -+ { -+ for (i = 0; i != rd->num_slices; ++i) { -+ if ((rv = send_slice(avctx, rd, &rc, i, i + 1)) != 0) -+ goto fail; -+ } -+ } + + // Set the drm_prime desriptor + drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs)); @@ -54894,6 +56500,12 @@ index 0000000000..55c41ae679 + return rv; +} + ++static inline int ++ctrl_valid(const struct v4l2_query_ext_ctrl * const c, const int64_t v) ++{ ++ return v >= c->minimum && v <= c->maximum; ++} ++ +// Initial check & init +static int +probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx) @@ -54905,17 +56517,19 @@ index 0000000000..55c41ae679 + + // Check for var slice array + struct v4l2_query_ext_ctrl qc[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX }, ++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_SPS }, ++ { .id = V4L2_CID_STATELESS_HEVC_PPS }, ++ { .id = V4L2_CID_STATELESS_HEVC_SCALING_MATRIX }, +#if HEVC_CTRLS_VERSION >= 2 -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_PARAMS }, +#endif + }; + // Order & size must match! + static const size_t ctrl_sizes[] = { + sizeof(struct v4l2_ctrl_hevc_slice_params), ++ sizeof(int32_t), + sizeof(struct v4l2_ctrl_hevc_sps), + sizeof(struct v4l2_ctrl_hevc_pps), + sizeof(struct v4l2_ctrl_hevc_scaling_matrix), @@ -54933,11 +56547,22 @@ index 0000000000..55c41ae679 + return AVERROR(EINVAL); +#endif + -+ if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) { -+ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION); ++ mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls); ++ i = 0; ++#if HEVC_CTRLS_VERSION >= 4 ++ // Skip slice check if no slice mode ++ if (qc[1].type != 0 && !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ i = 1; ++#else ++ // Fail frame mode silently for anything prior to V4 ++ if (qc[1].type == 0 || !ctrl_valid(qc + 1, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) + return AVERROR(EINVAL); -+ } -+ for (i = 0; i != noof_ctrls; ++i) { ++#endif ++ for (; i != noof_ctrls; ++i) { ++ if (qc[i].type == 0) { ++ av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %#x missing\n", HEVC_CTRLS_VERSION, qc[i].id); ++ return AVERROR(EINVAL); ++ } + if (ctrl_sizes[i] != (size_t)qc[i].elem_size) { + av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %zu != %zu\n", + HEVC_CTRLS_VERSION, i, ctrl_sizes[i], (size_t)qc[i].elem_size); @@ -54947,12 +56572,11 @@ index 0000000000..55c41ae679 + + fill_sps(&ctrl_sps, sps); + -+ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_MPEG_VIDEO_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { ++ if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_STATELESS_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) { + av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n"); + return AVERROR(EINVAL); + } + -+ ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0; + return 0; +} + @@ -54963,38 +56587,63 @@ index 0000000000..55c41ae679 + int ret; + + struct v4l2_query_ext_ctrl querys[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_SLICE_PARAMS, }, ++#if HEVC_CTRLS_VERSION >= 4 ++ { .id = V4L2_CID_STATELESS_HEVC_ENTRY_POINT_OFFSETS, }, ++#endif + }; + + struct v4l2_ext_control ctrls[] = { -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, }, -+ { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_DECODE_MODE, }, ++ { .id = V4L2_CID_STATELESS_HEVC_START_CODE, }, + }; + + mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys)); + -+ ctx->decode_mode = querys[0].default_value; ++ ctx->max_slices = (!(querys[2].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) || ++ querys[2].nr_of_dims != 1 || querys[2].dims[0] == 0) ? ++ 1 : querys[2].dims[0]; ++ av_log(avctx, AV_LOG_DEBUG, "%s: Max slices %d\n", __func__, ctx->max_slices); + -+ if (ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED && -+ ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode); ++#if HEVC_CTRLS_VERSION >= 4 ++ ctx->max_offsets = (querys[3].type == 0 || querys[3].nr_of_dims != 1) ? ++ 0 : querys[3].dims[0]; ++ av_log(avctx, AV_LOG_DEBUG, "%s: Entry point offsets %d\n", __func__, ctx->max_offsets); ++#else ++ ctx->max_offsets = 0; ++#endif ++ ++ if (querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED || ++ querys[0].default_value == V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED) ++ ctx->decode_mode = querys[0].default_value; ++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED)) ++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_FRAME_BASED; ++ else if (ctrl_valid(querys + 0, V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED)) ++ ctx->decode_mode = V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED; ++ else { ++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode\n", __func__); + return AVERROR(EINVAL); + } + -+ ctx->start_code = querys[1].default_value; -+ if (ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE && -+ ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code); ++ if (querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_NONE || ++ querys[1].default_value == V4L2_STATELESS_HEVC_START_CODE_ANNEX_B) ++ ctx->start_code = querys[1].default_value; ++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_ANNEX_B)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_ANNEX_B; ++ else if (ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; ++ else { ++ av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code\n", __func__); + return AVERROR(EINVAL); + } + -+ ctx->max_slices = querys[2].elems; -+ if (ctx->max_slices > MAX_SLICES) { -+ av_log(avctx, AV_LOG_ERROR, "%s: unsupported max slices, %d\n", __func__, ctx->max_slices); -+ return AVERROR(EINVAL); -+ } ++ // If we are in slice mode & START_CODE_NONE supported then pick that ++ // as it doesn't require the slightly dodgy look backwards in our raw buffer ++ if (ctx->decode_mode == V4L2_STATELESS_HEVC_DECODE_MODE_SLICE_BASED && ++ ctrl_valid(querys + 1, V4L2_STATELESS_HEVC_START_CODE_NONE)) ++ ctx->start_code = V4L2_STATELESS_HEVC_START_CODE_NONE; + + ctrls[0].value = ctx->decode_mode; + ctrls[1].value = ctx->start_code; @@ -55018,6 +56667,7 @@ index 0000000000..55c41ae679 + + av_freep(&rd->slices); + av_freep(&rd->slice_params); ++ av_freep(&rd->offsets); + + av_free(rd); +} @@ -57293,10 +58943,14 @@ index 0000000000..e1182cb2fc +#endif /* POLLQUEUE_H_ */ diff --git a/libavcodec/v4l2_req_utils.h b/libavcodec/v4l2_req_utils.h new file mode 100644 -index 0000000000..cb4bd164b4 +index 0000000000..a31cc1f4ec --- /dev/null +++ b/libavcodec/v4l2_req_utils.h -@@ -0,0 +1,22 @@ +@@ -0,0 +1,27 @@ ++#ifndef AVCODEC_V4L2_REQ_UTILS_H ++#define AVCODEC_V4L2_REQ_UTILS_H ++ ++#include +#include "libavutil/log.h" + +#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__) @@ -57319,12 +58973,13 @@ index 0000000000..cb4bd164b4 + return tbuf; +} + ++#endif diff --git a/libavcodec/v4l2_request_hevc.c b/libavcodec/v4l2_request_hevc.c new file mode 100644 -index 0000000000..0ae14db90b +index 0000000000..27b1b8dd6d --- /dev/null +++ b/libavcodec/v4l2_request_hevc.c -@@ -0,0 +1,311 @@ +@@ -0,0 +1,315 @@ +/* + * This file is part of FFmpeg. + * @@ -57536,7 +59191,11 @@ index 0000000000..0ae14db90b + goto fail4; + } + -+ if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { ++ if (V2(ff_v4l2_req_hevc, 4).probe(avctx, ctx) == 0) { ++ av_log(avctx, AV_LOG_DEBUG, "HEVC API version 4 probed successfully\n"); ++ ctx->fns = &V2(ff_v4l2_req_hevc, 4); ++ } ++ else if (V2(ff_v4l2_req_hevc, 3).probe(avctx, ctx) == 0) { + av_log(avctx, AV_LOG_DEBUG, "HEVC API version 3 probed successfully\n"); + ctx->fns = &V2(ff_v4l2_req_hevc, 3); + } @@ -57638,13 +59297,14 @@ index 0000000000..0ae14db90b +}; diff --git a/libavcodec/v4l2_request_hevc.h b/libavcodec/v4l2_request_hevc.h new file mode 100644 -index 0000000000..b2cb8c8584 +index 0000000000..99c90064ea --- /dev/null +++ b/libavcodec/v4l2_request_hevc.h @@ -0,0 +1,102 @@ +#ifndef AVCODEC_V4L2_REQUEST_HEVC_H +#define AVCODEC_V4L2_REQUEST_HEVC_H + ++#include +#include +#include "v4l2_req_decode_q.h" + @@ -57689,8 +59349,6 @@ index 0000000000..b2cb8c8584 +#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY 0x0800 +#endif + -+#define MAX_SLICES 128 -+ +#define VCAT(name, version) name##_v##version +#define V2(n,v) VCAT(n, v) +#define V(n) V2(n, HEVC_CTRLS_VERSION) @@ -57707,10 +59365,10 @@ index 0000000000..b2cb8c8584 + + unsigned int timestamp; // ?? maybe uint64_t + -+ int multi_slice; + int decode_mode; + int start_code; -+ int max_slices; ++ unsigned int max_slices; // 0 => not wanted (frame mode) ++ unsigned int max_offsets; // 0 => not wanted + + req_decode_q decode_q; + @@ -57742,6 +59400,7 @@ index 0000000000..b2cb8c8584 +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1); +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2); +extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 3); ++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 4); + +#endif diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c @@ -60049,7 +61708,7 @@ index 5123540653..eb1e755982 100644 OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER) += vf_unsharp_opencl.o opencl.o \ opencl/unsharp.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c -index 1183e40267..174e3f7ef7 100644 +index 1183e40267..4be094ecc9 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -204,6 +204,7 @@ extern AVFilter ff_vf_dedot; @@ -60060,7 +61719,15 @@ index 1183e40267..174e3f7ef7 100644 extern AVFilter ff_vf_deinterlace_vaapi; extern AVFilter ff_vf_dejudder; extern AVFilter ff_vf_delogo; -@@ -414,6 +415,7 @@ extern AVFilter ff_vf_transpose_opencl; +@@ -356,6 +357,7 @@ extern AVFilter ff_vf_scale; + extern AVFilter ff_vf_scale_cuda; + extern AVFilter ff_vf_scale_npp; + extern AVFilter ff_vf_scale_qsv; ++extern AVFilter ff_vf_scale_v4l2m2m; + extern AVFilter ff_vf_scale_vaapi; + extern AVFilter ff_vf_scale_vulkan; + extern AVFilter ff_vf_scale2ref; +@@ -414,6 +416,7 @@ extern AVFilter ff_vf_transpose_opencl; extern AVFilter ff_vf_transpose_vaapi; extern AVFilter ff_vf_trim; extern AVFilter ff_vf_unpremultiply; @@ -60221,10 +61888,10 @@ index bf30f54177..eb5dfa22f8 100644 case AVMEDIA_TYPE_AUDIO: diff --git a/libavfilter/vf_deinterlace_v4l2m2m.c b/libavfilter/vf_deinterlace_v4l2m2m.c new file mode 100644 -index 0000000000..d1c714b805 +index 0000000000..4ed510cd19 --- /dev/null +++ b/libavfilter/vf_deinterlace_v4l2m2m.c -@@ -0,0 +1,1282 @@ +@@ -0,0 +1,1994 @@ +/* + * This file is part of FFmpeg. + * @@ -60262,6 +61929,8 @@ index 0000000000..d1c714b805 +#include +#include + ++#include "config.h" ++ +#include "libavutil/avassert.h" +#include "libavutil/avstring.h" +#include "libavutil/common.h" @@ -60279,33 +61948,49 @@ index 0000000000..d1c714b805 +#include "avfilter.h" +#include "formats.h" +#include "internal.h" ++#include "scale_eval.h" +#include "video.h" + ++#ifndef DRM_FORMAT_P030 ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ ++#endif ++ ++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined ++// in drm_fourcc.h hopefully will be sometime in the future but until then... ++#ifndef V4L2_PIX_FMT_NV12_10_COL128 ++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0') ++#endif ++ ++#ifndef V4L2_PIX_FMT_NV12_COL128 ++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12 Y/CbCr 4:2:0 128 pixel wide column */ ++#endif ++ +typedef struct V4L2Queue V4L2Queue; +typedef struct DeintV4L2M2MContextShared DeintV4L2M2MContextShared; + -+typedef struct V4L2PlaneInfo { -+ int bytesperline; -+ size_t length; -+} V4L2PlaneInfo; ++typedef enum filter_type_v4l2_e ++{ ++ FILTER_V4L2_DEINTERLACE = 1, ++ FILTER_V4L2_SCALE, ++} filter_type_v4l2_t; + +typedef struct V4L2Buffer { + int enqueued; + int reenqueue; -+ int fd; + struct v4l2_buffer buffer; + AVFrame frame; + struct v4l2_plane planes[VIDEO_MAX_PLANES]; + int num_planes; -+ V4L2PlaneInfo plane_info[VIDEO_MAX_PLANES]; + AVDRMFrameDescriptor drm_frame; + V4L2Queue *q; +} V4L2Buffer; + +typedef struct V4L2Queue { + struct v4l2_format format; ++ struct v4l2_selection sel; + int num_buffers; + V4L2Buffer *buffers; ++ const char * name; + DeintV4L2M2MContextShared *ctx; +} V4L2Queue; + @@ -60338,11 +62023,18 @@ index 0000000000..d1c714b805 + +typedef struct DeintV4L2M2MContextShared { + void * logctx; // For logging - will be NULL when done ++ filter_type_v4l2_t filter_type; + + int fd; + int done; + int width; + int height; ++ ++ // from options ++ int output_width; ++ int output_height; ++ enum AVPixelFormat output_format; ++ + int orig_width; + int orig_height; + atomic_uint refcount; @@ -60361,8 +62053,64 @@ index 0000000000..d1c714b805 + const AVClass *class; + + DeintV4L2M2MContextShared *shared; ++ ++ char * w_expr; ++ char * h_expr; ++ char * output_format_string;; ++ ++ int force_original_aspect_ratio; ++ int force_divisible_by; ++ ++ char *colour_primaries_string; ++ char *colour_transfer_string; ++ char *colour_matrix_string; ++ int colour_range; ++ char *chroma_location_string; ++ ++ enum AVColorPrimaries colour_primaries; ++ enum AVColorTransferCharacteristic colour_transfer; ++ enum AVColorSpace colour_matrix; ++ enum AVChromaLocation chroma_location; +} DeintV4L2M2MContext; + ++// These just list the ones we know we can cope with ++static uint32_t ++fmt_av_to_v4l2(const enum AVPixelFormat avfmt) ++{ ++ switch (avfmt) { ++ case AV_PIX_FMT_YUV420P: ++ return V4L2_PIX_FMT_YUV420; ++ case AV_PIX_FMT_NV12: ++ return V4L2_PIX_FMT_NV12; ++#if CONFIG_SAND ++ case AV_PIX_FMT_RPI4_8: ++ case AV_PIX_FMT_SAND128: ++ return V4L2_PIX_FMT_NV12_COL128; ++#endif ++ default: ++ break; ++ } ++ return 0; ++} ++ ++static enum AVPixelFormat ++fmt_v4l2_to_av(const uint32_t pixfmt) ++{ ++ switch (pixfmt) { ++ case V4L2_PIX_FMT_YUV420: ++ return AV_PIX_FMT_YUV420P; ++ case V4L2_PIX_FMT_NV12: ++ return AV_PIX_FMT_NV12; ++#if CONFIG_SAND ++ case V4L2_PIX_FMT_NV12_COL128: ++ return AV_PIX_FMT_RPI4_8; ++#endif ++ default: ++ break; ++ } ++ return AV_PIX_FMT_NONE; ++} ++ +static unsigned int pts_stats_interval(const pts_stats_t * const stats) +{ + return stats->last_interval; @@ -60528,6 +62276,39 @@ index 0000000000..d1c714b805 + return 0; +} + ++static inline uint32_t ++fmt_bpl(const struct v4l2_format * const fmt, const unsigned int plane_n) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.plane_fmt[plane_n].bytesperline : fmt->fmt.pix.bytesperline; ++} ++ ++static inline uint32_t ++fmt_height(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height; ++} ++ ++static inline uint32_t ++fmt_width(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width; ++} ++ ++static inline uint32_t ++fmt_pixelformat(const struct v4l2_format * const fmt) ++{ ++ return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.pixelformat : fmt->fmt.pix.pixelformat; ++} ++ ++static void ++init_format(V4L2Queue * const q, const uint32_t format_type) ++{ ++ memset(&q->format, 0, sizeof(q->format)); ++ memset(&q->sel, 0, sizeof(q->sel)); ++ q->format.type = format_type; ++ q->sel.type = format_type; ++} ++ +static int deint_v4l2m2m_prepare_context(DeintV4L2M2MContextShared *ctx) +{ + struct v4l2_capability cap; @@ -60538,78 +62319,99 @@ index 0000000000..d1c714b805 + if (ret < 0) + return ret; + -+ if (!(cap.capabilities & V4L2_CAP_STREAMING)) ++ if (ctx->filter_type == FILTER_V4L2_SCALE && ++ strcmp("bcm2835-codec-isp", cap.card) != 0) ++ { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Not ISP\n"); + return AVERROR(EINVAL); ++ } + -+ if (cap.capabilities & V4L2_CAP_VIDEO_M2M) { -+ ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; -+ ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT; -+ -+ return 0; ++ if (!(cap.capabilities & V4L2_CAP_STREAMING)) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "No streaming\n"); ++ return AVERROR(EINVAL); + } + + if (cap.capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) { -+ ctx->capture.format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE; -+ ctx->output.format.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE; -+ -+ return 0; ++ init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); ++ init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); ++ } ++ else if (cap.capabilities & V4L2_CAP_VIDEO_M2M) { ++ init_format(&ctx->capture, V4L2_BUF_TYPE_VIDEO_CAPTURE); ++ init_format(&ctx->output, V4L2_BUF_TYPE_VIDEO_OUTPUT); ++ } ++ else { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Not M2M\n"); ++ return AVERROR(EINVAL); + } + -+ return AVERROR(EINVAL); ++ return 0; +} + -+static int deint_v4l2m2m_try_format(V4L2Queue *queue) ++// Just use for probe - doesn't modify q format ++static int deint_v4l2m2m_try_format(V4L2Queue *queue, const uint32_t width, const uint32_t height, const enum AVPixelFormat avfmt) +{ -+ struct v4l2_format *fmt = &queue->format; ++ struct v4l2_format fmt = {.type = queue->format.type}; + DeintV4L2M2MContextShared *ctx = queue->ctx; + int ret, field; ++ // Pick YUV to test with if not otherwise specified ++ uint32_t pixelformat = avfmt == AV_PIX_FMT_NONE ? V4L2_PIX_FMT_YUV420 : fmt_av_to_v4l2(avfmt); ++ enum AVPixelFormat r_avfmt; + -+ ret = ioctl(ctx->fd, VIDIOC_G_FMT, fmt); ++ ++ ret = ioctl(ctx->fd, VIDIOC_G_FMT, &fmt); + if (ret) + av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_FMT failed: %d\n", ret); + -+ if (V4L2_TYPE_IS_OUTPUT(fmt->type)) ++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && V4L2_TYPE_IS_OUTPUT(fmt.type)) + field = V4L2_FIELD_INTERLACED_TB; + else + field = V4L2_FIELD_NONE; + -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ fmt->fmt.pix_mp.pixelformat = V4L2_PIX_FMT_YUV420; -+ fmt->fmt.pix_mp.field = field; -+ fmt->fmt.pix_mp.width = ctx->width; -+ fmt->fmt.pix_mp.height = ctx->height; ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { ++ fmt.fmt.pix_mp.pixelformat = pixelformat; ++ fmt.fmt.pix_mp.field = field; ++ fmt.fmt.pix_mp.width = width; ++ fmt.fmt.pix_mp.height = height; + } else { -+ fmt->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420; -+ fmt->fmt.pix.field = field; -+ fmt->fmt.pix.width = ctx->width; -+ fmt->fmt.pix.height = ctx->height; ++ fmt.fmt.pix.pixelformat = pixelformat; ++ fmt.fmt.pix.field = field; ++ fmt.fmt.pix.width = width; ++ fmt.fmt.pix.height = height; + } + -+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__, -+ fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height, -+ fmt->fmt.pix_mp.pixelformat, -+ fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline); ++ av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u pre\n", __func__, ++ fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, ++ fmt.fmt.pix_mp.pixelformat, ++ fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); + -+ ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, fmt); ++ ret = ioctl(ctx->fd, VIDIOC_TRY_FMT, &fmt); + if (ret) + return AVERROR(EINVAL); + -+ av_log(ctx->logctx, AV_LOG_DEBUG, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__, -+ fmt->type, fmt->fmt.pix_mp.width, fmt->fmt.pix_mp.height, -+ fmt->fmt.pix_mp.pixelformat, -+ fmt->fmt.pix_mp.plane_fmt[0].sizeimage, fmt->fmt.pix_mp.plane_fmt[0].bytesperline); ++ av_log(ctx->logctx, AV_LOG_TRACE, "%s: Trying format for type %d, wxh: %dx%d, fmt: %08x, size %u bpl %u post\n", __func__, ++ fmt.type, fmt.fmt.pix_mp.width, fmt.fmt.pix_mp.height, ++ fmt.fmt.pix_mp.pixelformat, ++ fmt.fmt.pix_mp.plane_fmt[0].sizeimage, fmt.fmt.pix_mp.plane_fmt[0].bytesperline); + -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ if (fmt->fmt.pix_mp.pixelformat != V4L2_PIX_FMT_YUV420 || -+ fmt->fmt.pix_mp.field != field) { -+ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type); ++ r_avfmt = fmt_v4l2_to_av(fmt_pixelformat(&fmt)); ++ if (r_avfmt != avfmt && avfmt != AV_PIX_FMT_NONE) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Unable to set format %s on %s port\n", av_get_pix_fmt_name(avfmt), V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); ++ return AVERROR(EINVAL); ++ } ++ if (r_avfmt == AV_PIX_FMT_NONE) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "No supported format on %s port\n", V4L2_TYPE_IS_CAPTURE(fmt.type) ? "dest" : "src"); ++ return AVERROR(EINVAL); ++ } ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt.type)) { ++ if (fmt.fmt.pix_mp.field != field) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); + + return AVERROR(EINVAL); + } + } else { -+ if (fmt->fmt.pix.pixelformat != V4L2_PIX_FMT_YUV420 || -+ fmt->fmt.pix.field != field) { -+ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt->type); ++ if (fmt.fmt.pix.field != field) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "format not supported for type %d\n", fmt.type); + + return AVERROR(EINVAL); + } @@ -60618,51 +62420,412 @@ index 0000000000..d1c714b805 + return 0; +} + -+static int deint_v4l2m2m_set_format(V4L2Queue *queue, uint32_t field, int width, int height, int pitch, int ysize) ++static int ++do_s_fmt(V4L2Queue * const q) +{ -+ struct v4l2_format *fmt = &queue->format; -+ DeintV4L2M2MContextShared *ctx = queue->ctx; ++ DeintV4L2M2MContextShared * const ctx = q->ctx; ++ const uint32_t pixelformat = fmt_pixelformat(&q->format); + int ret; + -+ struct v4l2_selection sel = { -+ .type = fmt->type, -+ .target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP_BOUNDS : V4L2_SEL_TGT_COMPOSE_BOUNDS, -+ }; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { -+ fmt->fmt.pix_mp.field = field; -+ fmt->fmt.pix_mp.width = width; -+ fmt->fmt.pix_mp.height = ysize / pitch; -+ fmt->fmt.pix_mp.plane_fmt[0].bytesperline = pitch; -+ fmt->fmt.pix_mp.plane_fmt[0].sizeimage = ysize + (ysize >> 1); -+ } else { -+ fmt->fmt.pix.field = field; -+ fmt->fmt.pix.width = width; -+ fmt->fmt.pix.height = height; -+ fmt->fmt.pix.sizeimage = 0; -+ fmt->fmt.pix.bytesperline = 0; ++ ret = ioctl(ctx->fd, VIDIOC_S_FMT, &q->format); ++ if (ret) { ++ ret = AVERROR(errno); ++ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %s\n", av_err2str(ret)); ++ return ret; + } + -+ ret = ioctl(ctx->fd, VIDIOC_S_FMT, fmt); -+ if (ret) -+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_FMT failed: %d\n", ret); ++ if (pixelformat != fmt_pixelformat(&q->format)) { ++ av_log(ctx->logctx, AV_LOG_ERROR, "Format not supported: %s; S_FMT returned %s\n", av_fourcc2str(pixelformat), av_fourcc2str(fmt_pixelformat(&q->format))); ++ return AVERROR(EINVAL); ++ } + -+ ret = ioctl(ctx->fd, VIDIOC_G_SELECTION, &sel); -+ if (ret) -+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_G_SELECTION failed: %d\n", ret); ++ q->sel.target = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE, ++ q->sel.flags = V4L2_TYPE_IS_OUTPUT(q->sel.type) ? V4L2_SEL_FLAG_LE : V4L2_SEL_FLAG_GE; + -+ sel.r.width = width; -+ sel.r.height = height; -+ sel.r.left = 0; -+ sel.r.top = 0; -+ sel.target = V4L2_TYPE_IS_OUTPUT(fmt->type) ? V4L2_SEL_TGT_CROP : V4L2_SEL_TGT_COMPOSE, -+ sel.flags = V4L2_SEL_FLAG_LE; ++ ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &q->sel); ++ if (ret) { ++ ret = AVERROR(errno); ++ av_log(ctx->logctx, AV_LOG_WARNING, "VIDIOC_S_SELECTION failed: %s\n", av_err2str(ret)); ++ } + -+ ret = ioctl(ctx->fd, VIDIOC_S_SELECTION, &sel); -+ if (ret) -+ av_log(ctx->logctx, AV_LOG_ERROR, "VIDIOC_S_SELECTION failed: %d\n", ret); ++ return 0; ++} + -+ return ret; ++static void ++set_fmt_color(struct v4l2_format *const fmt, ++ const enum AVColorPrimaries avcp, ++ const enum AVColorSpace avcs, ++ const enum AVColorTransferCharacteristic avxc) ++{ ++ enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT; ++ enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT; ++ enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT; ++ ++ switch (avcp) { ++ case AVCOL_PRI_BT709: ++ cs = V4L2_COLORSPACE_REC709; ++ ycbcr = V4L2_YCBCR_ENC_709; ++ break; ++ case AVCOL_PRI_BT470M: ++ cs = V4L2_COLORSPACE_470_SYSTEM_M; ++ ycbcr = V4L2_YCBCR_ENC_601; ++ break; ++ case AVCOL_PRI_BT470BG: ++ cs = V4L2_COLORSPACE_470_SYSTEM_BG; ++ break; ++ case AVCOL_PRI_SMPTE170M: ++ cs = V4L2_COLORSPACE_SMPTE170M; ++ break; ++ case AVCOL_PRI_SMPTE240M: ++ cs = V4L2_COLORSPACE_SMPTE240M; ++ break; ++ case AVCOL_PRI_BT2020: ++ cs = V4L2_COLORSPACE_BT2020; ++ break; ++ case AVCOL_PRI_SMPTE428: ++ case AVCOL_PRI_SMPTE431: ++ case AVCOL_PRI_SMPTE432: ++ case AVCOL_PRI_EBU3213: ++ case AVCOL_PRI_RESERVED: ++ case AVCOL_PRI_FILM: ++ case AVCOL_PRI_UNSPECIFIED: ++ default: ++ break; ++ } ++ ++ switch (avcs) { ++ case AVCOL_SPC_RGB: ++ cs = V4L2_COLORSPACE_SRGB; ++ break; ++ case AVCOL_SPC_BT709: ++ cs = V4L2_COLORSPACE_REC709; ++ break; ++ case AVCOL_SPC_FCC: ++ cs = V4L2_COLORSPACE_470_SYSTEM_M; ++ break; ++ case AVCOL_SPC_BT470BG: ++ cs = V4L2_COLORSPACE_470_SYSTEM_BG; ++ break; ++ case AVCOL_SPC_SMPTE170M: ++ cs = V4L2_COLORSPACE_SMPTE170M; ++ break; ++ case AVCOL_SPC_SMPTE240M: ++ cs = V4L2_COLORSPACE_SMPTE240M; ++ break; ++ case AVCOL_SPC_BT2020_CL: ++ cs = V4L2_COLORSPACE_BT2020; ++ ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM; ++ break; ++ case AVCOL_SPC_BT2020_NCL: ++ cs = V4L2_COLORSPACE_BT2020; ++ break; ++ default: ++ break; ++ } ++ ++ switch (xfer) { ++ case AVCOL_TRC_BT709: ++ xfer = V4L2_XFER_FUNC_709; ++ break; ++ case AVCOL_TRC_IEC61966_2_1: ++ xfer = V4L2_XFER_FUNC_SRGB; ++ break; ++ case AVCOL_TRC_SMPTE240M: ++ xfer = V4L2_XFER_FUNC_SMPTE240M; ++ break; ++ case AVCOL_TRC_SMPTE2084: ++ xfer = V4L2_XFER_FUNC_SMPTE2084; ++ break; ++ default: ++ break; ++ } ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ fmt->fmt.pix_mp.colorspace = cs; ++ fmt->fmt.pix_mp.ycbcr_enc = ycbcr; ++ fmt->fmt.pix_mp.xfer_func = xfer; ++ } else { ++ fmt->fmt.pix.colorspace = cs; ++ fmt->fmt.pix.ycbcr_enc = ycbcr; ++ fmt->fmt.pix.xfer_func = xfer; ++ } ++} ++ ++static void ++set_fmt_color_range(struct v4l2_format *const fmt, const enum AVColorRange avcr) ++{ ++ const enum v4l2_quantization q = ++ avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE : ++ avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE : ++ V4L2_QUANTIZATION_DEFAULT; ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ fmt->fmt.pix_mp.quantization = q; ++ } else { ++ fmt->fmt.pix.quantization = q; ++ } ++} ++ ++static enum AVColorPrimaries get_color_primaries(const struct v4l2_format *const fmt) ++{ ++ enum v4l2_ycbcr_encoding ycbcr; ++ enum v4l2_colorspace cs; ++ ++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.colorspace : ++ fmt->fmt.pix.colorspace; ++ ++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.ycbcr_enc: ++ fmt->fmt.pix.ycbcr_enc; ++ ++ switch(ycbcr) { ++ case V4L2_YCBCR_ENC_XV709: ++ case V4L2_YCBCR_ENC_709: return AVCOL_PRI_BT709; ++ case V4L2_YCBCR_ENC_XV601: ++ case V4L2_YCBCR_ENC_601:return AVCOL_PRI_BT470M; ++ default: ++ break; ++ } ++ ++ switch(cs) { ++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_PRI_BT470BG; ++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_PRI_SMPTE170M; ++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_PRI_SMPTE240M; ++ case V4L2_COLORSPACE_BT2020: return AVCOL_PRI_BT2020; ++ default: ++ break; ++ } ++ ++ return AVCOL_PRI_UNSPECIFIED; ++} ++ ++static enum AVColorSpace get_color_space(const struct v4l2_format *const fmt) ++{ ++ enum v4l2_ycbcr_encoding ycbcr; ++ enum v4l2_colorspace cs; ++ ++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.colorspace : ++ fmt->fmt.pix.colorspace; ++ ++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.ycbcr_enc: ++ fmt->fmt.pix.ycbcr_enc; ++ ++ switch(cs) { ++ case V4L2_COLORSPACE_SRGB: return AVCOL_SPC_RGB; ++ case V4L2_COLORSPACE_REC709: return AVCOL_SPC_BT709; ++ case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_SPC_FCC; ++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_SPC_BT470BG; ++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_SPC_SMPTE170M; ++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_SPC_SMPTE240M; ++ case V4L2_COLORSPACE_BT2020: ++ if (ycbcr == V4L2_YCBCR_ENC_BT2020_CONST_LUM) ++ return AVCOL_SPC_BT2020_CL; ++ else ++ return AVCOL_SPC_BT2020_NCL; ++ default: ++ break; ++ } ++ ++ return AVCOL_SPC_UNSPECIFIED; ++} ++ ++static enum AVColorTransferCharacteristic get_color_trc(const struct v4l2_format *const fmt) ++{ ++ enum v4l2_ycbcr_encoding ycbcr; ++ enum v4l2_xfer_func xfer; ++ enum v4l2_colorspace cs; ++ ++ cs = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.colorspace : ++ fmt->fmt.pix.colorspace; ++ ++ ycbcr = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.ycbcr_enc: ++ fmt->fmt.pix.ycbcr_enc; ++ ++ xfer = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.xfer_func: ++ fmt->fmt.pix.xfer_func; ++ ++ switch (xfer) { ++ case V4L2_XFER_FUNC_709: return AVCOL_TRC_BT709; ++ case V4L2_XFER_FUNC_SRGB: return AVCOL_TRC_IEC61966_2_1; ++ default: ++ break; ++ } ++ ++ switch (cs) { ++ case V4L2_COLORSPACE_470_SYSTEM_M: return AVCOL_TRC_GAMMA22; ++ case V4L2_COLORSPACE_470_SYSTEM_BG: return AVCOL_TRC_GAMMA28; ++ case V4L2_COLORSPACE_SMPTE170M: return AVCOL_TRC_SMPTE170M; ++ case V4L2_COLORSPACE_SMPTE240M: return AVCOL_TRC_SMPTE240M; ++ default: ++ break; ++ } ++ ++ switch (ycbcr) { ++ case V4L2_YCBCR_ENC_XV709: ++ case V4L2_YCBCR_ENC_XV601: return AVCOL_TRC_BT1361_ECG; ++ default: ++ break; ++ } ++ ++ return AVCOL_TRC_UNSPECIFIED; ++} ++ ++static enum AVColorRange get_color_range(const struct v4l2_format *const fmt) ++{ ++ enum v4l2_quantization qt; ++ ++ qt = V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? ++ fmt->fmt.pix_mp.quantization : ++ fmt->fmt.pix.quantization; ++ ++ switch (qt) { ++ case V4L2_QUANTIZATION_LIM_RANGE: return AVCOL_RANGE_MPEG; ++ case V4L2_QUANTIZATION_FULL_RANGE: return AVCOL_RANGE_JPEG; ++ default: ++ break; ++ } ++ ++ return AVCOL_RANGE_UNSPECIFIED; ++} ++ ++static int set_src_fmt(V4L2Queue * const q, const AVFrame * const frame) ++{ ++ struct v4l2_format *const format = &q->format; ++ const AVDRMFrameDescriptor *const src = (const AVDRMFrameDescriptor *)frame->data[0]; ++ ++ const uint32_t drm_fmt = src->layers[0].format; ++ // Treat INVALID as LINEAR ++ const uint64_t mod = src->objects[0].format_modifier == DRM_FORMAT_MOD_INVALID ? ++ DRM_FORMAT_MOD_LINEAR : src->objects[0].format_modifier; ++ uint32_t pix_fmt = 0; ++ uint32_t w = 0; ++ uint32_t h = 0; ++ uint32_t bpl = src->layers[0].planes[0].pitch; ++ ++ // We really don't expect multiple layers ++ // All formats that we currently cope with are single object ++ ++ if (src->nb_layers != 1 || src->nb_objects != 1) ++ return AVERROR(EINVAL); ++ ++ switch (drm_fmt) { ++ case DRM_FORMAT_YUV420: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 3) ++ break; ++ pix_fmt = V4L2_PIX_FMT_YUV420; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++ break; ++ ++ case DRM_FORMAT_NV12: ++ if (mod == DRM_FORMAT_MOD_LINEAR) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12; ++ h = src->layers[0].planes[1].offset / bpl; ++ w = bpl; ++ } ++#if CONFIG_SAND ++ else if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_COL128; ++ w = bpl; ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++#endif ++ break; ++ ++ case DRM_FORMAT_P030: ++#if CONFIG_SAND ++ if (fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128) { ++ if (src->layers[0].nb_planes != 2) ++ break; ++ pix_fmt = V4L2_PIX_FMT_NV12_10_COL128; ++ w = bpl / 2; // Matching lie to how we construct this ++ h = src->layers[0].planes[1].offset / 128; ++ bpl = fourcc_mod_broadcom_param(mod); ++ } ++#endif ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!pix_fmt) ++ return AVERROR(EINVAL); ++ ++ if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) { ++ struct v4l2_pix_format_mplane *const pix = &format->fmt.pix_mp; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->plane_fmt[0].bytesperline = bpl; ++ pix->num_planes = 1; ++ } ++ else { ++ struct v4l2_pix_format *const pix = &format->fmt.pix; ++ ++ pix->width = w; ++ pix->height = h; ++ pix->pixelformat = pix_fmt; ++ pix->bytesperline = bpl; ++ } ++ ++ set_fmt_color(format, frame->color_primaries, frame->colorspace, frame->color_trc); ++ set_fmt_color_range(format, frame->color_range); ++ ++ q->sel.r.width = frame->width - (frame->crop_left + frame->crop_right); ++ q->sel.r.height = frame->height - (frame->crop_top + frame->crop_bottom); ++ q->sel.r.left = frame->crop_left; ++ q->sel.r.top = frame->crop_top; ++ ++ return 0; ++} ++ ++ ++static int set_dst_format(DeintV4L2M2MContext * const priv, V4L2Queue *queue, uint32_t pixelformat, uint32_t field, int width, int height) ++{ ++ struct v4l2_format * const fmt = &queue->format; ++ struct v4l2_selection *const sel = &queue->sel; ++ ++ memset(&fmt->fmt, 0, sizeof(fmt->fmt)); ++ ++ // Align w/h to 16 here in case there are alignment requirements at the next ++ // stage of the filter chain (also RPi deinterlace setup is bust and this ++ // fixes it) ++ if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) { ++ fmt->fmt.pix_mp.pixelformat = pixelformat; ++ fmt->fmt.pix_mp.field = field; ++ fmt->fmt.pix_mp.width = FFALIGN(width, 16); ++ fmt->fmt.pix_mp.height = FFALIGN(height, 16); ++ } else { ++ fmt->fmt.pix.pixelformat = pixelformat; ++ fmt->fmt.pix.field = field; ++ fmt->fmt.pix.width = FFALIGN(width, 16); ++ fmt->fmt.pix.height = FFALIGN(height, 16); ++ } ++ ++ set_fmt_color(fmt, priv->colour_primaries, priv->colour_matrix, priv->colour_transfer); ++ set_fmt_color_range(fmt, priv->colour_range); ++ ++ sel->r.width = width; ++ sel->r.height = height; ++ sel->r.left = 0; ++ sel->r.top = 0; ++ ++ return do_s_fmt(queue); +} + +static int deint_v4l2m2m_probe_device(DeintV4L2M2MContextShared *ctx, char *node) @@ -60674,16 +62837,22 @@ index 0000000000..d1c714b805 + return AVERROR(errno); + + ret = deint_v4l2m2m_prepare_context(ctx); -+ if (ret) ++ if (ret) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to prepare context\n"); + goto fail; ++ } + -+ ret = deint_v4l2m2m_try_format(&ctx->capture); -+ if (ret) ++ ret = deint_v4l2m2m_try_format(&ctx->capture, ctx->output_width, ctx->output_height, ctx->output_format); ++ if (ret) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try dst format\n"); + goto fail; ++ } + -+ ret = deint_v4l2m2m_try_format(&ctx->output); -+ if (ret) ++ ret = deint_v4l2m2m_try_format(&ctx->output, ctx->width, ctx->height, AV_PIX_FMT_NONE); ++ if (ret) { ++ av_log(ctx->logctx, AV_LOG_DEBUG, "Failed to try src format\n"); + goto fail; ++ } + + return 0; + @@ -60744,11 +62913,118 @@ index 0000000000..d1c714b805 + return 0; +} + -+static int v4l2_buffer_export_drm(V4L2Buffer* avbuf) ++static void ++drm_frame_init(AVDRMFrameDescriptor * const d) ++{ ++ unsigned int i; ++ for (i = 0; i != AV_DRM_MAX_PLANES; ++i) { ++ d->objects[i].fd = -1; ++ } ++} ++ ++static void ++drm_frame_uninit(AVDRMFrameDescriptor * const d) ++{ ++ unsigned int i; ++ for (i = 0; i != d->nb_objects; ++i) { ++ if (d->objects[i].fd != -1) { ++ close(d->objects[i].fd); ++ d->objects[i].fd = -1; ++ } ++ } ++} ++ ++static void ++avbufs_delete(V4L2Buffer** ppavbufs, const unsigned int n) ++{ ++ unsigned int i; ++ V4L2Buffer* const avbufs = *ppavbufs; ++ ++ if (avbufs == NULL) ++ return; ++ *ppavbufs = NULL; ++ ++ for (i = 0; i != n; ++i) { ++ V4L2Buffer* const avbuf = avbufs + i; ++ drm_frame_uninit(&avbuf->drm_frame); ++ } ++ ++ av_free(avbufs); ++} ++ ++static int v4l2_buffer_export_drm(V4L2Queue * const q, V4L2Buffer * const avbuf) +{ + struct v4l2_exportbuffer expbuf; + int i, ret; ++ uint64_t mod = DRM_FORMAT_MOD_LINEAR; + ++ AVDRMFrameDescriptor * const drm_desc = &avbuf->drm_frame; ++ AVDRMLayerDescriptor * const layer = &drm_desc->layers[0]; ++ const struct v4l2_format *const fmt = &q->format; ++ const uint32_t height = fmt_height(fmt); ++ ptrdiff_t bpl0; ++ ++ /* fill the DRM frame descriptor */ ++ drm_desc->nb_layers = 1; ++ layer->nb_planes = avbuf->num_planes; ++ ++ for (int i = 0; i < avbuf->num_planes; i++) { ++ layer->planes[i].object_index = i; ++ layer->planes[i].offset = 0; ++ layer->planes[i].pitch = fmt_bpl(fmt, i); ++ } ++ bpl0 = layer->planes[0].pitch; ++ ++ switch (fmt_pixelformat(fmt)) { ++#if CONFIG_SAND ++ case V4L2_PIX_FMT_NV12_COL128: ++ mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl0); ++ layer->format = V4L2_PIX_FMT_NV12; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 2; ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = height * 128; ++ layer->planes[0].pitch = fmt_width(fmt); ++ layer->planes[1].pitch = layer->planes[0].pitch; ++ break; ++#endif ++ ++ case DRM_FORMAT_NV12: ++ layer->format = V4L2_PIX_FMT_NV12; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 2; ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = bpl0 * height; ++ layer->planes[1].pitch = bpl0; ++ break; ++ ++ case V4L2_PIX_FMT_YUV420: ++ layer->format = DRM_FORMAT_YUV420; ++ ++ if (avbuf->num_planes > 1) ++ break; ++ ++ layer->nb_planes = 3; ++ layer->planes[1].object_index = 0; ++ layer->planes[1].offset = bpl0 * height; ++ layer->planes[1].pitch = bpl0 / 2; ++ layer->planes[2].object_index = 0; ++ layer->planes[2].offset = layer->planes[1].offset + ((bpl0 * height) / 4); ++ layer->planes[2].pitch = bpl0 / 2; ++ break; ++ ++ default: ++ drm_desc->nb_layers = 0; ++ return AVERROR(EINVAL); ++ } ++ ++ drm_desc->nb_objects = 0; + for (i = 0; i < avbuf->num_planes; i++) { + memset(&expbuf, 0, sizeof(expbuf)); + @@ -60760,19 +63036,11 @@ index 0000000000..d1c714b805 + if (ret < 0) + return AVERROR(errno); + -+ avbuf->fd = expbuf.fd; -+ -+ if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type)) { -+ /* drm frame */ -+ avbuf->drm_frame.objects[i].size = avbuf->buffer.m.planes[i].length; -+ avbuf->drm_frame.objects[i].fd = expbuf.fd; -+ avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ } else { -+ /* drm frame */ -+ avbuf->drm_frame.objects[0].size = avbuf->buffer.length; -+ avbuf->drm_frame.objects[0].fd = expbuf.fd; -+ avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR; -+ } ++ drm_desc->objects[i].size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buffer.type) ? ++ avbuf->buffer.m.planes[i].length : avbuf->buffer.length; ++ drm_desc->objects[i].fd = expbuf.fd; ++ drm_desc->objects[i].format_modifier = mod; ++ drm_desc->nb_objects = i + 1; + } + + return 0; @@ -60783,7 +63051,7 @@ index 0000000000..d1c714b805 + struct v4l2_format *fmt = &queue->format; + DeintV4L2M2MContextShared *ctx = queue->ctx; + struct v4l2_requestbuffers req; -+ int ret, i, j, multiplanar; ++ int ret, i, multiplanar; + uint32_t memory; + + memory = V4L2_TYPE_IS_OUTPUT(fmt->type) ? @@ -60812,10 +63080,9 @@ index 0000000000..d1c714b805 + } + + for (i = 0; i < queue->num_buffers; i++) { -+ V4L2Buffer *buf = &queue->buffers[i]; ++ V4L2Buffer * const buf = &queue->buffers[i]; + + buf->enqueued = 0; -+ buf->fd = -1; + buf->q = queue; + + buf->buffer.type = fmt->type; @@ -60827,6 +63094,12 @@ index 0000000000..d1c714b805 + buf->buffer.m.planes = buf->planes; + } + ++ drm_frame_init(&buf->drm_frame); ++ } ++ ++ for (i = 0; i < queue->num_buffers; i++) { ++ V4L2Buffer * const buf = &queue->buffers[i]; ++ + ret = ioctl(ctx->fd, VIDIOC_QUERYBUF, &buf->buffer); + if (ret < 0) { + ret = AVERROR(errno); @@ -60834,29 +63107,14 @@ index 0000000000..d1c714b805 + goto fail; + } + -+ if (multiplanar) -+ buf->num_planes = buf->buffer.length; -+ else -+ buf->num_planes = 1; -+ -+ for (j = 0; j < buf->num_planes; j++) { -+ V4L2PlaneInfo *info = &buf->plane_info[j]; -+ -+ if (multiplanar) { -+ info->bytesperline = fmt->fmt.pix_mp.plane_fmt[j].bytesperline; -+ info->length = buf->buffer.m.planes[j].length; -+ } else { -+ info->bytesperline = fmt->fmt.pix.bytesperline; -+ info->length = buf->buffer.length; -+ } -+ } ++ buf->num_planes = multiplanar ? buf->buffer.length : 1; + + if (!V4L2_TYPE_IS_OUTPUT(fmt->type)) { + ret = deint_v4l2m2m_enqueue_buffer(buf); + if (ret) + goto fail; + -+ ret = v4l2_buffer_export_drm(buf); ++ ret = v4l2_buffer_export_drm(queue, buf); + if (ret) + goto fail; + } @@ -60865,12 +63123,8 @@ index 0000000000..d1c714b805 + return 0; + +fail: -+ for (i = 0; i < queue->num_buffers; i++) -+ if (queue->buffers[i].fd >= 0) -+ close(queue->buffers[i].fd); -+ av_free(queue->buffers); -+ queue->buffers = NULL; -+ ++ avbufs_delete(&queue->buffers, queue->num_buffers); ++ queue->num_buffers = 0; + return ret; +} + @@ -61057,7 +63311,6 @@ index 0000000000..d1c714b805 + if (atomic_fetch_sub(&ctx->refcount, 1) == 1) { + V4L2Queue *capture = &ctx->capture; + V4L2Queue *output = &ctx->output; -+ int i; + + av_log(NULL, AV_LOG_DEBUG, "%s - destroying context\n", __func__); + @@ -61066,12 +63319,7 @@ index 0000000000..d1c714b805 + deint_v4l2m2m_streamoff(output); + } + -+ if (capture->buffers) -+ for (i = 0; i < capture->num_buffers; i++) { -+ capture->buffers[i].q = NULL; -+ if (capture->buffers[i].fd >= 0) -+ close(capture->buffers[i].fd); -+ } ++ avbufs_delete(&capture->buffers, capture->num_buffers); + + deint_v4l2m2m_unref_queued(output); + @@ -61103,84 +63351,15 @@ index 0000000000..d1c714b805 + deint_v4l2m2m_destroy_context(ctx); +} + -+static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf, int height) -+{ -+ int av_pix_fmt = AV_PIX_FMT_YUV420P; -+ AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame; -+ AVDRMLayerDescriptor *layer; -+ -+ /* fill the DRM frame descriptor */ -+ drm_desc->nb_objects = avbuf->num_planes; -+ drm_desc->nb_layers = 1; -+ -+ layer = &drm_desc->layers[0]; -+ layer->nb_planes = avbuf->num_planes; -+ -+ for (int i = 0; i < avbuf->num_planes; i++) { -+ layer->planes[i].object_index = i; -+ layer->planes[i].offset = 0; -+ layer->planes[i].pitch = avbuf->plane_info[i].bytesperline; -+ } -+ -+ switch (av_pix_fmt) { -+ case AV_PIX_FMT_YUYV422: -+ -+ layer->format = DRM_FORMAT_YUYV; -+ layer->nb_planes = 1; -+ -+ break; -+ -+ case AV_PIX_FMT_NV12: -+ case AV_PIX_FMT_NV21: -+ -+ layer->format = av_pix_fmt == AV_PIX_FMT_NV12 ? -+ DRM_FORMAT_NV12 : DRM_FORMAT_NV21; -+ -+ if (avbuf->num_planes > 1) -+ break; -+ -+ layer->nb_planes = 2; -+ -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * -+ height; -+ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline; -+ break; -+ -+ case AV_PIX_FMT_YUV420P: -+ -+ layer->format = DRM_FORMAT_YUV420; -+ -+ if (avbuf->num_planes > 1) -+ break; -+ -+ layer->nb_planes = 3; -+ -+ layer->planes[1].object_index = 0; -+ layer->planes[1].offset = avbuf->plane_info[0].bytesperline * -+ height; -+ layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1; -+ -+ layer->planes[2].object_index = 0; -+ layer->planes[2].offset = layer->planes[1].offset + -+ ((avbuf->plane_info[0].bytesperline * -+ height) >> 2); -+ layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1; -+ break; -+ -+ default: -+ drm_desc->nb_layers = 0; -+ break; -+ } -+ -+ return (uint8_t *) drm_desc; -+} -+ +// timeout in ms +static int deint_v4l2m2m_dequeue_frame(V4L2Queue *queue, AVFrame* frame, int timeout) +{ + DeintV4L2M2MContextShared *ctx = queue->ctx; + V4L2Buffer* avbuf; ++ enum AVColorPrimaries color_primaries; ++ enum AVColorSpace colorspace; ++ enum AVColorTransferCharacteristic color_trc; ++ enum AVColorRange color_range; + + av_log(ctx->logctx, AV_LOG_TRACE, "<<< %s\n", __func__); + @@ -61191,8 +63370,6 @@ index 0000000000..d1c714b805 + } + + // Fill in PTS and anciliary info from src frame -+ // we will want to overwrite some fields as only the pts/dts -+ // fields are updated with new timing in this fn + pts_track_get_frame(&ctx->track, avbuf->buffer.timestamp, frame); + + frame->buf[0] = av_buffer_create((uint8_t *) &avbuf->drm_frame, @@ -61205,18 +63382,36 @@ index 0000000000..d1c714b805 + + atomic_fetch_add(&ctx->refcount, 1); + -+ frame->data[0] = (uint8_t *)v4l2_get_drm_frame(avbuf, ctx->orig_height); ++ frame->data[0] = (uint8_t *)&avbuf->drm_frame; + frame->format = AV_PIX_FMT_DRM_PRIME; + if (ctx->hw_frames_ctx) + frame->hw_frames_ctx = av_buffer_ref(ctx->hw_frames_ctx); -+ frame->height = ctx->height; -+ frame->width = ctx->width; ++ frame->height = ctx->output_height; ++ frame->width = ctx->output_width; + -+ // Not interlaced now -+ frame->interlaced_frame = 0; -+ frame->top_field_first = 0; -+ // Pkt duration halved -+ frame->pkt_duration /= 2; ++ color_primaries = get_color_primaries(&ctx->capture.format); ++ colorspace = get_color_space(&ctx->capture.format); ++ color_trc = get_color_trc(&ctx->capture.format); ++ color_range = get_color_range(&ctx->capture.format); ++ ++ // If the color parameters are unspecified by V4L2 then leave alone as they ++ // will have been copied from src ++ if (color_primaries != AVCOL_PRI_UNSPECIFIED) ++ frame->color_primaries = color_primaries; ++ if (colorspace != AVCOL_SPC_UNSPECIFIED) ++ frame->colorspace = colorspace; ++ if (color_trc != AVCOL_TRC_UNSPECIFIED) ++ frame->color_trc = color_trc; ++ if (color_range != AVCOL_RANGE_UNSPECIFIED) ++ frame->color_range = color_range; ++ ++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE) { ++ // Not interlaced now ++ frame->interlaced_frame = 0; // *** Fill in from dst buffer? ++ frame->top_field_first = 0; ++ // Pkt duration halved ++ frame->pkt_duration /= 2; ++ } + + if (avbuf->buffer.flags & V4L2_BUF_FLAG_ERROR) { + av_log(ctx->logctx, AV_LOG_ERROR, "driver decode error\n"); @@ -61238,14 +63433,36 @@ index 0000000000..d1c714b805 + ctx->height = avctx->inputs[0]->h; + ctx->width = avctx->inputs[0]->w; + -+ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d\n", __func__, ctx->width, ctx->height); ++ if (ctx->filter_type == FILTER_V4L2_SCALE) { ++ if ((ret = ff_scale_eval_dimensions(priv, ++ priv->w_expr, priv->h_expr, ++ inlink, outlink, ++ &ctx->output_width, &ctx->output_height)) < 0) ++ return ret; ++ ++ ff_scale_adjust_dimensions(inlink, &ctx->output_width, &ctx->output_height, ++ priv->force_original_aspect_ratio, priv->force_divisible_by); ++ } ++ else { ++ ctx->output_width = ctx->width; ++ ctx->output_height = ctx->height; ++ } ++ ++ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d->%dx%d FR: %d/%d->%d/%d\n", __func__, ++ ctx->width, ctx->height, ctx->output_width, ctx->output_height, ++ inlink->frame_rate.num, inlink->frame_rate.den, outlink->frame_rate.num, outlink->frame_rate.den); + + outlink->time_base = inlink->time_base; -+ outlink->w = inlink->w; -+ outlink->h = inlink->h; -+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; ++ outlink->w = ctx->output_width; ++ outlink->h = ctx->output_height; + outlink->format = inlink->format; -+ outlink->frame_rate = (AVRational) {1, 0}; // Deny knowledge of frame rate ++ if (ctx->filter_type == FILTER_V4L2_DEINTERLACE && inlink->frame_rate.den != 0) ++ outlink->frame_rate = (AVRational){inlink->frame_rate.num * 2, inlink->frame_rate.den}; ++ ++ if (inlink->sample_aspect_ratio.num) ++ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio); ++ else ++ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio; + + ret = deint_v4l2m2m_find_device(ctx); + if (ret) @@ -61263,13 +63480,37 @@ index 0000000000..d1c714b805 +{ + static const enum AVPixelFormat pixel_formats[] = { + AV_PIX_FMT_DRM_PRIME, -+ AV_PIX_FMT_YUV420P, ++// AV_PIX_FMT_YUV420P, + AV_PIX_FMT_NONE, + }; + + return ff_set_common_formats(avctx, ff_make_format_list(pixel_formats)); +} + ++static uint32_t desc_pixelformat(const AVDRMFrameDescriptor * const drm_desc) ++{ ++ const uint64_t mod = drm_desc->objects[0].format_modifier; ++ const int is_linear = (mod == DRM_FORMAT_MOD_LINEAR || mod == DRM_FORMAT_MOD_INVALID); ++ ++ // Only currently support single object things ++ if (drm_desc->nb_objects != 1) ++ return 0; ++ ++ switch (drm_desc->layers[0].format) { ++ case DRM_FORMAT_YUV420: ++ return is_linear ? V4L2_PIX_FMT_YUV420 : 0; ++ case DRM_FORMAT_NV12: ++ return is_linear ? V4L2_PIX_FMT_NV12 : ++#if CONFIG_SAND ++ fourcc_mod_broadcom_mod(mod) == DRM_FORMAT_MOD_BROADCOM_SAND128 ? V4L2_PIX_FMT_NV12_COL128 : ++#endif ++ 0; ++ default: ++ break; ++ } ++ return 0; ++} ++ +static int deint_v4l2m2m_filter_frame(AVFilterLink *link, AVFrame *in) +{ + AVFilterContext *avctx = link->dst; @@ -61285,41 +63526,71 @@ index 0000000000..d1c714b805 + avctx->inputs[0]->status_in, avctx->inputs[0]->status_out, avctx->outputs[0]->status_in, avctx->outputs[0]->status_out); + + if (ctx->field_order == V4L2_FIELD_ANY) { -+ AVDRMFrameDescriptor *drm_desc = (AVDRMFrameDescriptor *)in->data[0]; ++ const AVDRMFrameDescriptor * const drm_desc = (AVDRMFrameDescriptor *)in->data[0]; ++ uint32_t pixelformat = desc_pixelformat(drm_desc); ++ ++ if (pixelformat == 0) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported DRM format %s in %d objects, modifier %#" PRIx64 "\n", ++ av_fourcc2str(drm_desc->layers[0].format), ++ drm_desc->nb_objects, drm_desc->objects[0].format_modifier); ++ return AVERROR(EINVAL); ++ } ++ + ctx->orig_width = drm_desc->layers[0].planes[0].pitch; + ctx->orig_height = drm_desc->layers[0].planes[1].offset / ctx->orig_width; + -+ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%d,%d)\n", __func__, ctx->width, ctx->height, ++ av_log(priv, AV_LOG_DEBUG, "%s: %dx%d (%td,%td)\n", __func__, ctx->width, ctx->height, + drm_desc->layers[0].planes[0].pitch, drm_desc->layers[0].planes[1].offset); + ++ if ((ret = set_src_fmt(output, in)) != 0) { ++ av_log(avctx, AV_LOG_WARNING, "Unknown input DRM format: %s mod: %#" PRIx64 "\n", ++ av_fourcc2str(drm_desc->layers[0].format), drm_desc->objects[0].format_modifier); ++ return ret; ++ } ++ ++ ret = do_s_fmt(output); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to set source format\n"); ++ return ret; ++ } ++ ++ if (ctx->output_format != AV_PIX_FMT_NONE) ++ pixelformat = fmt_av_to_v4l2(ctx->output_format); ++ ret = set_dst_format(priv, capture, pixelformat, V4L2_FIELD_NONE, ctx->output_width, ctx->output_height); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to set destination format\n"); ++ return ret; ++ } ++ ++ ret = deint_v4l2m2m_allocate_buffers(capture); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to allocate destination buffers\n"); ++ return ret; ++ } ++ ++ ret = deint_v4l2m2m_streamon(capture); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed set destination streamon: %s\n", av_err2str(ret)); ++ return ret; ++ } ++ ++ ret = deint_v4l2m2m_allocate_buffers(output); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed to allocate src buffers\n"); ++ return ret; ++ } ++ ++ ret = deint_v4l2m2m_streamon(output); ++ if (ret) { ++ av_log(avctx, AV_LOG_WARNING, "Failed set src streamon: %s\n", av_err2str(ret)); ++ return ret; ++ } ++ + if (in->top_field_first) + ctx->field_order = V4L2_FIELD_INTERLACED_TB; + else + ctx->field_order = V4L2_FIELD_INTERLACED_BT; + -+ ret = deint_v4l2m2m_set_format(output, ctx->field_order, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_set_format(capture, V4L2_FIELD_NONE, ctx->width, ctx->height, ctx->orig_width, drm_desc->layers[0].planes[1].offset); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_allocate_buffers(capture); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_streamon(capture); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_allocate_buffers(output); -+ if (ret) -+ return ret; -+ -+ ret = deint_v4l2m2m_streamon(output); -+ if (ret) -+ return ret; + } + + ret = deint_v4l2m2m_enqueue_frame(output, in); @@ -61395,28 +63666,31 @@ index 0000000000..d1c714b805 + return 0; + } + -+ { ++ recycle_q(&s->output); ++ n = count_enqueued(&s->output); ++ ++ while (n < 6) { + AVFrame * frame; + int rv; + -+ recycle_q(&s->output); -+ n = count_enqueued(&s->output); -+ -+ while (n < 6) { -+ if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { -+ av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); -+ return rv; -+ } -+ -+ if (frame == NULL) { -+ av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); -+ break; -+ } -+ -+ deint_v4l2m2m_filter_frame(inlink, frame); -+ av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); -+ ++n; ++ if ((rv = ff_inlink_consume_frame(inlink, &frame)) < 0) { ++ av_log(priv, AV_LOG_ERROR, "%s: consume in failed: %s\n", __func__, av_err2str(rv)); ++ return rv; + } ++ ++ if (frame == NULL) { ++ av_log(priv, AV_LOG_TRACE, "%s: No frame\n", __func__); ++ break; ++ } ++ ++ rv = deint_v4l2m2m_filter_frame(inlink, frame); ++ av_frame_free(&frame); ++ ++ if (rv != 0) ++ return rv; ++ ++ av_log(priv, AV_LOG_TRACE, "%s: Q frame\n", __func__); ++ ++n; + } + + if (n < 6) { @@ -61435,7 +63709,7 @@ index 0000000000..d1c714b805 + return did_something ? 0 : FFERROR_NOT_READY; +} + -+static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) ++static av_cold int common_v4l2m2m_init(AVFilterContext * const avctx, const filter_type_v4l2_t filter_type) +{ + DeintV4L2M2MContext * const priv = avctx->priv; + DeintV4L2M2MContextShared * const ctx = av_mallocz(sizeof(DeintV4L2M2MContextShared)); @@ -61446,11 +63720,14 @@ index 0000000000..d1c714b805 + } + priv->shared = ctx; + ctx->logctx = priv; ++ ctx->filter_type = filter_type; + ctx->fd = -1; + ctx->output.ctx = ctx; + ctx->output.num_buffers = 8; ++ ctx->output.name = "OUTPUT"; + ctx->capture.ctx = ctx; + ctx->capture.num_buffers = 12; ++ ctx->capture.name = "CAPTURE"; + ctx->done = 0; + ctx->field_order = V4L2_FIELD_ANY; + @@ -61458,9 +63735,52 @@ index 0000000000..d1c714b805 + + atomic_init(&ctx->refcount, 1); + ++ if (priv->output_format_string) { ++ ctx->output_format = av_get_pix_fmt(priv->output_format_string); ++ if (ctx->output_format == AV_PIX_FMT_NONE) { ++ av_log(avctx, AV_LOG_ERROR, "Invalid ffmpeg output format '%s'.\n", priv->output_format_string); ++ return AVERROR(EINVAL); ++ } ++ if (fmt_av_to_v4l2(ctx->output_format) == 0) { ++ av_log(avctx, AV_LOG_ERROR, "Unsupported output format for V4L2: %s.\n", av_get_pix_fmt_name(ctx->output_format)); ++ return AVERROR(EINVAL); ++ } ++ } else { ++ // Use the input format once that is configured. ++ ctx->output_format = AV_PIX_FMT_NONE; ++ } ++ ++#define STRING_OPTION(var_name, func_name, default_value) do { \ ++ if (priv->var_name ## _string) { \ ++ int var = av_ ## func_name ## _from_name(priv->var_name ## _string); \ ++ if (var < 0) { \ ++ av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \ ++ return AVERROR(EINVAL); \ ++ } \ ++ priv->var_name = var; \ ++ } else { \ ++ priv->var_name = default_value; \ ++ } \ ++ } while (0) ++ ++ STRING_OPTION(colour_primaries, color_primaries, AVCOL_PRI_UNSPECIFIED); ++ STRING_OPTION(colour_transfer, color_transfer, AVCOL_TRC_UNSPECIFIED); ++ STRING_OPTION(colour_matrix, color_space, AVCOL_SPC_UNSPECIFIED); ++ STRING_OPTION(chroma_location, chroma_location, AVCHROMA_LOC_UNSPECIFIED); ++ + return 0; +} + ++static av_cold int deint_v4l2m2m_init(AVFilterContext *avctx) ++{ ++ return common_v4l2m2m_init(avctx, FILTER_V4L2_DEINTERLACE); ++} ++ ++static av_cold int scale_v4l2m2m_init(AVFilterContext *avctx) ++{ ++ return common_v4l2m2m_init(avctx, FILTER_V4L2_SCALE); ++} ++ +static void deint_v4l2m2m_uninit(AVFilterContext *avctx) +{ + DeintV4L2M2MContext *priv = avctx->priv; @@ -61478,6 +63798,51 @@ index 0000000000..d1c714b805 + +AVFILTER_DEFINE_CLASS(deinterlace_v4l2m2m); + ++#define OFFSET(x) offsetof(DeintV4L2M2MContext, x) ++#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM) ++ ++static const AVOption scale_v4l2m2m_options[] = { ++ { "w", "Output video width", ++ OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS }, ++ { "h", "Output video height", ++ OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS }, ++ { "format", "Output video format (software format of hardware frames)", ++ OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS }, ++ // These colour properties match the ones of the same name in vf_scale. ++ { "out_color_matrix", "Output colour matrix coefficient set", ++ OFFSET(colour_matrix_string), AV_OPT_TYPE_STRING, { .str = NULL }, .flags = FLAGS }, ++ { "out_range", "Output colour range", ++ OFFSET(colour_range), AV_OPT_TYPE_INT, { .i64 = AVCOL_RANGE_UNSPECIFIED }, ++ AVCOL_RANGE_UNSPECIFIED, AVCOL_RANGE_JPEG, FLAGS, "range" }, ++ { "full", "Full range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ { "limited", "Limited range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "jpeg", "Full range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ { "mpeg", "Limited range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "tv", "Limited range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_MPEG }, 0, 0, FLAGS, "range" }, ++ { "pc", "Full range", ++ 0, AV_OPT_TYPE_CONST, { .i64 = AVCOL_RANGE_JPEG }, 0, 0, FLAGS, "range" }, ++ // These colour properties match the ones in the VAAPI scaler ++ { "out_color_primaries", "Output colour primaries", ++ OFFSET(colour_primaries_string), AV_OPT_TYPE_STRING, ++ { .str = NULL }, .flags = FLAGS }, ++ { "out_color_transfer", "Output colour transfer characteristics", ++ OFFSET(colour_transfer_string), AV_OPT_TYPE_STRING, ++ { .str = NULL }, .flags = FLAGS }, ++ { "out_chroma_location", "Output chroma sample location", ++ OFFSET(chroma_location_string), AV_OPT_TYPE_STRING, ++ { .str = NULL }, .flags = FLAGS }, ++ { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0}, 0, 2, FLAGS, "force_oar" }, ++ { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1}, 1, 256, FLAGS }, ++ { NULL }, ++}; ++ ++AVFILTER_DEFINE_CLASS(scale_v4l2m2m); ++ +static const AVFilterPad deint_v4l2m2m_inputs[] = { + { + .name = "default", @@ -61507,6 +63872,20 @@ index 0000000000..d1c714b805 + .priv_class = &deinterlace_v4l2m2m_class, + .activate = deint_v4l2m2m_activate, +}; ++ ++AVFilter ff_vf_scale_v4l2m2m = { ++ .name = "scale_v4l2m2m", ++ .description = NULL_IF_CONFIG_SMALL("V4L2 M2M scaler"), ++ .priv_size = sizeof(DeintV4L2M2MContext), ++ .init = &scale_v4l2m2m_init, ++ .uninit = &deint_v4l2m2m_uninit, ++ .query_formats = &deint_v4l2m2m_query_formats, ++ .inputs = deint_v4l2m2m_inputs, ++ .outputs = deint_v4l2m2m_outputs, ++ .priv_class = &scale_v4l2m2m_class, ++ .activate = deint_v4l2m2m_activate, ++}; ++ diff --git a/libavfilter/vf_unsand.c b/libavfilter/vf_unsand.c new file mode 100644 index 0000000000..fbea56dd09 @@ -61747,6 +64126,97 @@ index 0000000000..fbea56dd09 + .outputs = avfilter_vf_unsand_outputs, +}; + +diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c +index eaed02bc92..181fedcd20 100644 +--- a/libavformat/matroskaenc.c ++++ b/libavformat/matroskaenc.c +@@ -58,6 +58,9 @@ + * Info, Tracks, Chapters, Attachments, Tags (potentially twice) and Cues */ + #define MAX_SEEKHEAD_ENTRIES 7 + ++/* Reserved size for H264 headers if not extant at init time */ ++#define MAX_H264_HEADER_SIZE 1024 ++ + #define IS_SEEKABLE(pb, mkv) (((pb)->seekable & AVIO_SEEKABLE_NORMAL) && \ + !(mkv)->is_live) + +@@ -720,8 +723,12 @@ static int mkv_write_native_codecprivate(AVFormatContext *s, AVIOContext *pb, + case AV_CODEC_ID_WAVPACK: + return put_wv_codecpriv(dyn_cp, par); + case AV_CODEC_ID_H264: +- return ff_isom_write_avcc(dyn_cp, par->extradata, +- par->extradata_size); ++ if (par->extradata_size) ++ return ff_isom_write_avcc(dyn_cp, par->extradata, ++ par->extradata_size); ++ else ++ put_ebml_void(pb, MAX_H264_HEADER_SIZE); ++ break; + case AV_CODEC_ID_HEVC: + return ff_isom_write_hvcc(dyn_cp, par->extradata, + par->extradata_size, 0); +@@ -2233,7 +2240,9 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) + break; + // FIXME: Remove the following once libaom starts propagating extradata during init() + // See https://bugs.chromium.org/p/aomedia/issues/detail?id=2012 ++ // H264 V4L2 has a similar issue + case AV_CODEC_ID_AV1: ++ case AV_CODEC_ID_H264: + if (side_data_size && mkv->track.bc && !par->extradata_size) { + AVIOContext *dyn_cp; + uint8_t *codecpriv; +@@ -2241,7 +2250,10 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) + ret = avio_open_dyn_buf(&dyn_cp); + if (ret < 0) + return ret; +- ff_isom_write_av1c(dyn_cp, side_data, side_data_size); ++ if (par->codec_id == AV_CODEC_ID_H264) ++ ff_isom_write_avcc(dyn_cp, side_data, side_data_size); ++ else ++ ff_isom_write_av1c(dyn_cp, side_data, side_data_size); + codecpriv_size = avio_get_dyn_buf(dyn_cp, &codecpriv); + if ((ret = dyn_cp->error) < 0 || + !codecpriv_size && (ret = AVERROR_INVALIDDATA)) { +@@ -2249,8 +2261,25 @@ static int mkv_check_new_extra_data(AVFormatContext *s, const AVPacket *pkt) + return ret; + } + avio_seek(mkv->track.bc, track->codecpriv_offset, SEEK_SET); +- // Do not write the OBUs as we don't have space saved for them +- put_ebml_binary(mkv->track.bc, MATROSKA_ID_CODECPRIVATE, codecpriv, 4); ++ if (par->codec_id == AV_CODEC_ID_H264) { ++ int filler; ++ // Up to 6 bytes for header and the filler must be at least 2 ++ if (codecpriv_size > MAX_H264_HEADER_SIZE - 8) { ++ av_log(s, AV_LOG_ERROR, "H264 header size %d > %d bytes\n", codecpriv_size, MAX_H264_HEADER_SIZE - 8); ++ return AVERROR_INVALIDDATA; ++ } ++ put_ebml_binary(mkv->track.bc, MATROSKA_ID_CODECPRIVATE, codecpriv, codecpriv_size); ++ filler = MAX_H264_HEADER_SIZE - (avio_tell(mkv->track.bc) - track->codecpriv_offset); ++ if (filler < 2) { ++ av_log(s, AV_LOG_ERROR, "Unexpected SPS/PPS filler length: %d\n", filler); ++ return AVERROR_BUG; ++ } ++ put_ebml_void(mkv->track.bc, filler); ++ } ++ else { ++ // Do not write the OBUs as we don't have space saved for them ++ put_ebml_binary(mkv->track.bc, MATROSKA_ID_CODECPRIVATE, codecpriv, 4); ++ } + ffio_free_dyn_buf(&dyn_cp); + ret = ff_alloc_extradata(par, side_data_size); + if (ret < 0) +diff --git a/libavformat/movenc.c b/libavformat/movenc.c +index 5d8dc4fd5d..97fabf260c 100644 +--- a/libavformat/movenc.c ++++ b/libavformat/movenc.c +@@ -5767,6 +5767,7 @@ static int mov_write_single_packet(AVFormatContext *s, AVPacket *pkt) + if (trk->par->codec_id == AV_CODEC_ID_MP4ALS || + trk->par->codec_id == AV_CODEC_ID_AAC || + trk->par->codec_id == AV_CODEC_ID_AV1 || ++ trk->par->codec_id == AV_CODEC_ID_H264 || + trk->par->codec_id == AV_CODEC_ID_FLAC) { + int side_size = 0; + uint8_t *side = av_packet_get_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA, &side_size); diff --git a/libavformat/utils.c b/libavformat/utils.c index 7185fbfd71..c7b0553903 100644 --- a/libavformat/utils.c @@ -61886,10 +64356,10 @@ index 5613813ba8..ab8bcfcf34 100644 + diff --git a/libavutil/aarch64/rpi_sand_neon.S b/libavutil/aarch64/rpi_sand_neon.S new file mode 100644 -index 0000000000..cdcf71ee67 +index 0000000000..2f07d9674c --- /dev/null +++ b/libavutil/aarch64/rpi_sand_neon.S -@@ -0,0 +1,676 @@ +@@ -0,0 +1,781 @@ +/* +Copyright (c) 2021 Michael Eiler + @@ -62140,228 +64610,6 @@ index 0000000000..cdcf71ee67 + ret +endfunc + -+//void ff_rpi_sand30_lines_to_planar_y16( -+// uint8_t * dest, // [x0] -+// unsigned int dst_stride, // [w1] -> assumed to be equal to _w -+// const uint8_t * src, // [x2] -+// unsigned int src_stride1, // [w3] -> 128 -+// unsigned int src_stride2, // [w4] -+// unsigned int _x, // [w5] -+// unsigned int y, // [w6] -+// unsigned int _w, // [w7] -+// unsigned int h); // [sp, #0] -+ -+function ff_rpi_sand30_lines_to_planar_y16, export=1 -+ stp x19, x20, [sp, #-48]! -+ stp x21, x22, [sp, #16] -+ stp x23, x24, [sp, #32] -+ -+ // w6 = argument h -+ ldr w6, [sp, #48] -+ -+ // slice_inc = ((stride2 - 1) * stride1) -+ mov w5, w4 -+ sub w5, w5, #1 -+ lsl w5, w5, #7 -+ -+ // total number of bytes per row = (width / 3) * 4 -+ mov w8, w7 -+ mov w9, #3 -+ udiv w8, w8, w9 -+ lsl w8, w8, #2 -+ -+ // number of full 128 byte blocks to be processed -+ mov w9, #96 -+ udiv w9, w7, w9 // = (width * 4) / (3*128) = width/96 -+ -+ // w10 = number of full integers to process (4 bytes) -+ // w11 = remaning zero to two 10bit values still to copy over -+ mov w12, #96 -+ mul w12, w9, w12 -+ sub w12, w7, w12 // width - blocks*96 = remaining points per row -+ mov w11, #3 -+ udiv w10, w12, w11 // full integers to process = w12 / 3 -+ mul w11, w10, w11 // #integers *3 -+ sub w11, w12, w11 // remaining 0-2 points = remaining points - integers*3 -+ -+ // increase w9 by one if w10+w11 is not zero, and decrease the row count by one -+ // this is to efficiently copy incomplete blocks at the end of the rows -+ // the last row is handled explicitly to avoid writing out of bounds -+ add w22, w10, w11 -+ cmp w22, #0 -+ cset w22, ne // 1 iff w10+w11 not zero, 0 otherwise -+ add w9, w9, w22 -+ sub w6, w6, #1 -+ -+ // store the number of bytes in w20 which we copy too much for every row -+ // when the width of the frame is not a multiple of 96 (128bytes storing 96 10bit values) -+ mov w20, #96*2 -+ mul w20, w20, w9 -+ sub w20, w1, w20 -+ -+ mov w23, #0 // flag to check whether the last line had already been processed -+ -+ // bitmask to clear the uppper 6bits of the result values -+ mov x19, #0x03ff03ff03ff03ff -+ dup v22.2d, x19 -+ -+ // row counter = 0 -+ eor w12, w12, w12 -+row_loop_y16: -+ cmp w12, w6 // jump to row_loop_y16_fin if we processed all rows -+ bge row_loop_y16_fin -+ -+ mov x13, x2 // row src -+ eor w14, w14, w14 // full block counter -+block_loop_y16: -+ cmp w14, w9 -+ bge block_loop_y16_fin -+ -+ // load 64 bytes -+ ld1 { v0.4s, v1.4s, v2.4s, v3.4s }, [x13], #64 -+ -+ // process v0 and v1 -+ xtn v16.4h, v0.4s -+ ushr v0.4s, v0.4s, #10 -+ xtn v17.4h, v0.4s -+ ushr v0.4s, v0.4s, #10 -+ xtn v18.4h, v0.4s -+ -+ xtn2 v16.8h, v1.4s -+ and v16.16b, v16.16b, v22.16b -+ ushr v1.4s, v1.4s, #10 -+ xtn2 v17.8h, v1.4s -+ and v17.16b, v17.16b, v22.16b -+ ushr v1.4s, v1.4s, #10 -+ xtn2 v18.8h, v1.4s -+ and v18.16b, v18.16b, v22.16b -+ -+ st3 { v16.8h, v17.8h, v18.8h }, [x0], #48 -+ -+ // process v2 and v3 -+ xtn v23.4h, v2.4s -+ ushr v2.4s, v2.4s, #10 -+ xtn v24.4h, v2.4s -+ ushr v2.4s, v2.4s, #10 -+ xtn v25.4h, v2.4s -+ -+ xtn2 v23.8h, v3.4s -+ and v23.16b, v23.16b, v22.16b -+ ushr v3.4s, v3.4s, #10 -+ xtn2 v24.8h, v3.4s -+ and v24.16b, v24.16b, v22.16b -+ ushr v3.4s, v3.4s, #10 -+ xtn2 v25.8h, v3.4s -+ and v25.16b, v25.16b, v22.16b -+ -+ st3 { v23.8h, v24.8h, v25.8h }, [x0], #48 -+ -+ // load the second half of the block -> 64 bytes into registers v4-v7 -+ ld1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x13], #64 -+ -+ // process v4 and v5 -+ xtn v16.4h, v4.4s -+ ushr v4.4s, v4.4s, #10 -+ xtn v17.4h, v4.4s -+ ushr v4.4s, v4.4s, #10 -+ xtn v18.4h, v4.4s -+ -+ xtn2 v16.8h, v5.4s -+ and v16.16b, v16.16b, v22.16b -+ ushr v5.4s, v5.4s, #10 -+ xtn2 v17.8h, v5.4s -+ and v17.16b, v17.16b, v22.16b -+ ushr v5.4s, v5.4s, #10 -+ xtn2 v18.8h, v5.4s -+ and v18.16b, v18.16b, v22.16b -+ -+ st3 { v16.8h, v17.8h, v18.8h }, [x0], #48 -+ -+ // v6 and v7 -+ xtn v23.4h, v6.4s -+ ushr v6.4s, v6.4s, #10 -+ xtn v24.4h, v6.4s -+ ushr v6.4s, v6.4s, #10 -+ xtn v25.4h, v6.4s -+ -+ xtn2 v23.8h, v7.4s -+ and v23.16b, v23.16b, v22.16b -+ ushr v7.4s, v7.4s, #10 -+ xtn2 v24.8h, v7.4s -+ and v24.16b, v24.16b, v22.16b -+ ushr v7.4s, v7.4s, #10 -+ xtn2 v25.8h, v7.4s -+ and v25.16b, v25.16b, v22.16b -+ -+ st3 { v23.8h, v24.8h, v25.8h }, [x0], #48 -+ -+ add x13, x13, x5 // row src += slice_inc -+ add w14, w14, #1 -+ b block_loop_y16 -+block_loop_y16_fin: -+ -+ -+ -+ -+ add x2, x2, #128 // src += stride1 (start of the next row) -+ add x0, x0, w20, sxtw // subtract the bytes we copied too much from dst -+ add w12, w12, #1 -+ b row_loop_y16 -+row_loop_y16_fin: -+ -+ // check whether we have incomplete blocks at the end of every row -+ // in that case decrease row block count by one -+ // change height back to it's original value (meaning increase it by 1) -+ // and jump back to another iteration of row_loop_y16 -+ -+ cmp w23, #1 -+ beq row_loop_y16_fin2 // don't continue here if we already processed the last row -+ add w6, w6, #1 // increase height to the original value -+ sub w9, w9, w22 // block count - 1 or 0, depending on the remaining bytes count -+ mov w23, #1 -+ b row_loop_y16 -+row_loop_y16_fin2: -+ -+ sub x0, x0, w20, sxtw // with the last row we didn't actually move the dst ptr to far ahead, therefore readd the diference -+ -+ // now we've got to handle the last block in the last row -+ eor w12, w12, w12 // w12 = 0 = counter -+integer_loop_y16: -+ cmp w12, w10 -+ bge integer_loop_y16_fin -+ ldr w14, [x13], #4 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ add w12, w12, #1 -+ b integer_loop_y16 -+integer_loop_y16_fin: -+ -+final_values_y16: -+ // remaining point count = w11 -+ ldr w14, [x13], #4 -+ cmp w11, #0 -+ beq final_values_y16_fin -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+ cmp w11, #1 -+ beq final_values_y16_fin -+ lsr w14, w14, #10 -+ and w15, w14, #0x3ff -+ strh w15, [x0], #2 -+final_values_y16_fin: -+ -+ ldp x23, x24, [sp, #32] -+ ldp x21, x22, [sp, #16] -+ ldp x19, x20, [sp], #48 -+ ret -+endfunc -+ +//void ff_rpi_sand30_lines_to_planar_c16( +// uint8_t * dst_u, // [x0] +// unsigned int dst_stride_u, // [w1] == _w*2 @@ -62566,12 +64814,339 @@ index 0000000000..cdcf71ee67 +// unsigned int _w, +// unsigned int h); + ++// void ff_rpi_sand30_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++// ++// Assumes that we are starting on a stripe boundary and that overreading ++// within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y16, export=1 ++ lsl w4, w4, #7 ++ sub w4, w4, #64 ++ sub w1, w1, w7, lsl #1 ++ uxtw x6, w6 ++ add x8, x2, x6, lsl #7 ++ ldr w6, [sp, #0] ++ ++10: ++ mov x2, x8 ++ mov w5, w7 ++1: ++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 ++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 ++ ++ subs w5, w5, #96 ++ ++ // v0, v1 ++ ++ shrn v18.4h, v0.4s, #14 ++ xtn v16.4h, v0.4s ++ shrn v17.4h, v0.4s, #10 ++ ++ shrn2 v18.8h, v1.4s, #14 ++ xtn2 v16.8h, v1.4s ++ shrn2 v17.8h, v1.4s, #10 ++ ++ ushr v18.8h, v18.8h, #6 ++ bic v16.8h, #0xfc, lsl #8 ++ bic v17.8h, #0xfc, lsl #8 ++ ++ // v2, v3 ++ ++ shrn v21.4h, v2.4s, #14 ++ xtn v19.4h, v2.4s ++ shrn v20.4h, v2.4s, #10 ++ ++ shrn2 v21.8h, v3.4s, #14 ++ xtn2 v19.8h, v3.4s ++ shrn2 v20.8h, v3.4s, #10 ++ ++ ushr v21.8h, v21.8h, #6 ++ bic v19.8h, #0xfc, lsl #8 ++ bic v20.8h, #0xfc, lsl #8 ++ ++ // v4, v5 ++ ++ shrn v24.4h, v4.4s, #14 ++ xtn v22.4h, v4.4s ++ shrn v23.4h, v4.4s, #10 ++ ++ shrn2 v24.8h, v5.4s, #14 ++ xtn2 v22.8h, v5.4s ++ shrn2 v23.8h, v5.4s, #10 ++ ++ ushr v24.8h, v24.8h, #6 ++ bic v22.8h, #0xfc, lsl #8 ++ bic v23.8h, #0xfc, lsl #8 ++ ++ // v6, v7 ++ ++ shrn v27.4h, v6.4s, #14 ++ xtn v25.4h, v6.4s ++ shrn v26.4h, v6.4s, #10 ++ ++ shrn2 v27.8h, v7.4s, #14 ++ xtn2 v25.8h, v7.4s ++ shrn2 v26.8h, v7.4s, #10 ++ ++ ushr v27.8h, v27.8h, #6 ++ bic v25.8h, #0xfc, lsl #8 ++ bic v26.8h, #0xfc, lsl #8 ++ ++ blt 2f ++ ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 ++ st3 {v22.8h, v23.8h, v24.8h}, [x0], #48 ++ st3 {v25.8h, v26.8h, v27.8h}, [x0], #48 ++ ++ bne 1b ++ ++11: ++ subs w6, w6, #1 ++ add x0, x0, w1, uxtw ++ add x8, x8, #128 ++ bne 10b ++ ++ ret ++ ++// Partial final write ++2: ++ cmp w5, #48-96 ++ blt 1f ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ st3 {v19.8h, v20.8h, v21.8h}, [x0], #48 ++ beq 11b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ sub w5, w5, #48 ++ mov v18.16b, v24.16b ++ mov v19.16b, v25.16b ++ mov v20.16b, v26.16b ++ mov v21.16b, v27.16b ++1: ++ cmp w5, #24-96 ++ blt 1f ++ st3 {v16.8h, v17.8h, v18.8h}, [x0], #48 ++ beq 11b ++ mov v16.16b, v19.16b ++ mov v17.16b, v20.16b ++ sub w5, w5, #24 ++ mov v18.16b, v21.16b ++1: ++ cmp w5, #12-96 ++ blt 1f ++ st3 {v16.4h, v17.4h, v18.4h}, [x0], #24 ++ beq 11b ++ mov v16.2d[0], v16.2d[1] ++ sub w5, w5, #12 ++ mov v17.2d[0], v17.2d[1] ++ mov v18.2d[0], v18.2d[1] ++1: ++ cmp w5, #6-96 ++ blt 1f ++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 ++ st3 {v16.h, v17.h, v18.h}[1], [x0], #6 ++ beq 11b ++ mov v16.2s[0], v16.2s[1] ++ sub w5, w5, #6 ++ mov v17.2s[0], v17.2s[1] ++ mov v18.2s[0], v18.2s[1] ++1: ++ cmp w5, #3-96 ++ blt 1f ++ st3 {v16.h, v17.h, v18.h}[0], [x0], #6 ++ beq 11b ++ mov v16.4h[0], v16.4h[1] ++ sub w5, w5, #3 ++ mov v17.4h[0], v17.4h[1] ++1: ++ cmp w5, #2-96 ++ blt 1f ++ st2 {v16.h, v17.h}[0], [x0], #4 ++ b 11b ++1: ++ st1 {v16.h}[0], [x0], #2 ++ b 11b ++ ++endfunc ++ ++// void ff_rpi_sand30_lines_to_planar_y8( ++// uint8_t * dest, : x0 ++// unsigned int dst_stride, : w1 ++// const uint8_t * src, : x2 ++// unsigned int src_stride1, : w3, always 128 ++// unsigned int src_stride2, : w4 ++// unsigned int _x, : w5 ++// unsigned int y, : w6 ++// unsigned int _w, : w7 ++// unsigned int h); : [sp, #0] ++// ++// Assumes that we are starting on a stripe boundary and that overreading ++// within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y8, export=1 ++ lsl w4, w4, #7 ++ sub w4, w4, #64 ++ sub w1, w1, w7 ++ uxtw x6, w6 ++ add x8, x2, x6, lsl #7 ++ ldr w6, [sp, #0] ++ ++10: ++ mov x2, x8 ++ mov w5, w7 ++1: ++ ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [x2], #64 ++ ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [x2], x4 ++ ++ subs w5, w5, #96 ++ ++ // v0, v1 ++ ++ shrn v18.4h, v0.4s, #16 ++ xtn v16.4h, v0.4s ++ shrn v17.4h, v0.4s, #12 ++ ++ shrn2 v18.8h, v1.4s, #16 ++ xtn2 v16.8h, v1.4s ++ shrn2 v17.8h, v1.4s, #12 ++ ++ shrn v18.8b, v18.8h, #6 ++ shrn v16.8b, v16.8h, #2 ++ xtn v17.8b, v17.8h ++ ++ // v2, v3 ++ ++ shrn v21.4h, v2.4s, #16 ++ xtn v19.4h, v2.4s ++ shrn v20.4h, v2.4s, #12 ++ ++ shrn2 v21.8h, v3.4s, #16 ++ xtn2 v19.8h, v3.4s ++ shrn2 v20.8h, v3.4s, #12 ++ ++ shrn2 v18.16b, v21.8h, #6 ++ shrn2 v16.16b, v19.8h, #2 ++ xtn2 v17.16b, v20.8h ++ ++ // v4, v5 ++ ++ shrn v24.4h, v4.4s, #16 ++ xtn v22.4h, v4.4s ++ shrn v23.4h, v4.4s, #12 ++ ++ shrn2 v24.8h, v5.4s, #16 ++ xtn2 v22.8h, v5.4s ++ shrn2 v23.8h, v5.4s, #12 ++ ++ shrn v21.8b, v24.8h, #6 ++ shrn v19.8b, v22.8h, #2 ++ xtn v20.8b, v23.8h ++ ++ // v6, v7 ++ ++ shrn v27.4h, v6.4s, #16 ++ xtn v25.4h, v6.4s ++ shrn v26.4h, v6.4s, #12 ++ ++ shrn2 v27.8h, v7.4s, #16 ++ xtn2 v25.8h, v7.4s ++ shrn2 v26.8h, v7.4s, #12 ++ ++ shrn2 v21.16b, v27.8h, #6 ++ shrn2 v19.16b, v25.8h, #2 ++ xtn2 v20.16b, v26.8h ++ ++ blt 2f ++ ++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 ++ st3 {v19.16b, v20.16b, v21.16b}, [x0], #48 ++ ++ bne 1b ++ ++11: ++ subs w6, w6, #1 ++ add x0, x0, w1, uxtw ++ add x8, x8, #128 ++ bne 10b ++ ++ ret ++ ++// Partial final write ++2: ++ cmp w5, #48-96 ++ blt 1f ++ st3 {v16.16b, v17.16b, v18.16b}, [x0], #48 ++ beq 11b ++ mov v16.16b, v22.16b ++ mov v17.16b, v23.16b ++ sub w5, w5, #48 ++ mov v18.16b, v24.16b ++1: ++ cmp w5, #24-96 ++ blt 1f ++ st3 {v16.8b, v17.8b, v18.8b}, [x0], #24 ++ beq 11b ++ mov v16.2d[0], v16.2d[1] ++ sub w5, w5, #24 ++ mov v17.2d[0], v17.2d[1] ++ mov v18.2d[0], v18.2d[1] ++1: ++ cmp w5, #12-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[2], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[3], [x0], #3 ++ beq 11b ++ mov v16.2s[0], v16.2s[1] ++ sub w5, w5, #12 ++ mov v17.2s[0], v17.2s[1] ++ mov v18.2s[0], v18.2s[1] ++1: ++ cmp w5, #6-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ st3 {v16.b, v17.b, v18.b}[1], [x0], #3 ++ beq 11b ++ mov v16.4h[0], v16.4h[1] ++ sub w5, w5, #6 ++ mov v17.4h[0], v17.4h[1] ++ mov v18.4h[0], v18.4h[1] ++1: ++ cmp w5, #3-96 ++ blt 1f ++ st3 {v16.b, v17.b, v18.b}[0], [x0], #3 ++ beq 11b ++ mov v16.8b[0], v16.8b[1] ++ sub w5, w5, #3 ++ mov v17.8b[0], v17.8b[1] ++1: ++ cmp w5, #2-96 ++ blt 1f ++ st2 {v16.b, v17.b}[0], [x0], #2 ++ b 11b ++1: ++ st1 {v16.b}[0], [x0], #1 ++ b 11b ++ ++endfunc ++ diff --git a/libavutil/aarch64/rpi_sand_neon.h b/libavutil/aarch64/rpi_sand_neon.h new file mode 100644 -index 0000000000..b3aa481ea4 +index 0000000000..2a56135bc3 --- /dev/null +++ b/libavutil/aarch64/rpi_sand_neon.h -@@ -0,0 +1,55 @@ +@@ -0,0 +1,59 @@ +/* +Copyright (c) 2021 Michael Eiler + @@ -62623,6 +65198,10 @@ index 0000000000..b3aa481ea4 + uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1, + unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); + ++void ff_rpi_sand30_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride, ++ const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2, ++ unsigned int _x, unsigned int y, unsigned int _w, unsigned int h); ++ +#ifdef __cplusplus +} +#endif @@ -62638,10 +65217,10 @@ index 5da44b0542..b74b7c4e2f 100644 + arm/rpi_sand_neon.o \ diff --git a/libavutil/arm/rpi_sand_neon.S b/libavutil/arm/rpi_sand_neon.S new file mode 100644 -index 0000000000..80890fe985 +index 0000000000..60e697f681 --- /dev/null +++ b/libavutil/arm/rpi_sand_neon.S -@@ -0,0 +1,768 @@ +@@ -0,0 +1,925 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -63004,7 +65583,6 @@ index 0000000000..80890fe985 + ldr r6, [sp, #36] + ldr r7, [sp, #32] @ y + mov r12, #48 -+ vmov.u16 q15, #0x3ff + sub r3, #1 + lsl r3, #7 + sub r1, r1, r6, lsl #1 @@ -63020,37 +65598,33 @@ index 0000000000..80890fe985 + vldm r2!, {q10-q13} + add lr, #64 + -+ vshr.u32 q14, q10, #20 @ Cannot vshrn.u32 #20! ++ vshrn.u32 d4 , q10, #14 @ Cannot vshrn.u32 #20! + ands lr, #127 + vshrn.u32 d2, q10, #10 + vmovn.u32 d0, q10 -+ vmovn.u32 d4, q14 + -+ vshr.u32 q14, q11, #20 ++ vshrn.u32 d5, q11, #14 + it eq + addeq r2, r3 + vshrn.u32 d3, q11, #10 + vmovn.u32 d1, q11 -+ vmovn.u32 d5, q14 + + subs r5, #48 -+ vand q0, q15 -+ vand q1, q15 -+ vand q2, q15 ++ vshr.u16 q2, #6 ++ vbic.u16 q0, #0xfc00 ++ vbic.u16 q1, #0xfc00 + -+ vshr.u32 q14, q12, #20 ++ vshrn.u32 d20, q12, #14 + vshrn.u32 d18, q12, #10 + vmovn.u32 d16, q12 -+ vmovn.u32 d20, q14 + -+ vshr.u32 q14, q13, #20 ++ vshrn.u32 d21, q13, #14 + vshrn.u32 d19, q13, #10 + vmovn.u32 d17, q13 -+ vmovn.u32 d21, q14 + -+ vand q8, q15 -+ vand q9, q15 -+ vand q10, q15 ++ vshr.u16 q10, #6 ++ vbic.u16 q8, #0xfc00 ++ vbic.u16 q9 , #0xfc00 + blt 2f + + vst3.16 {d0, d2, d4}, [r0], r12 @@ -63143,7 +65717,6 @@ index 0000000000..80890fe985 + ldr r7, [sp, #48] + ldr r9, [sp, #52] + mov r12, #48 -+ vmov.u16 q15, #0x3ff + sub r8, #1 + lsl r8, #7 + add r5, r5, r7, lsl #7 @@ -63159,48 +65732,44 @@ index 0000000000..80890fe985 + add lr, #64 + + @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2 -+ vshr.u32 q14, q0, #20 -+ vshrn.u32 d16, q0, #10 ++ vshrn.u32 d20, q0, #14 + vmovn.u32 d18, q0 ++ vshrn.u32 d0, q0, #10 + ands lr, #127 -+ vmovn.u32 d20, q14 + -+ vshr.u32 q14, q1, #20 -+ vshrn.u32 d17, q1, #10 ++ vshrn.u32 d21, q1, #14 + vmovn.u32 d19, q1 -+ vmovn.u32 d21, q14 ++ vshrn.u32 d1, q1, #10 + -+ vshr.u32 q14, q2, #20 + vshrn.u32 d22, q2, #10 -+ vmovn.u32 d24, q2 -+ vmovn.u32 d26, q14 ++ vmovn.u32 d2, q2 ++ vshrn.u32 d4, q2, #14 + -+ vshr.u32 q14, q3, #20 -+ vshrn.u32 d23, q3, #10 -+ vmovn.u32 d25, q3 + add r10, r0, #24 -+ vmovn.u32 d27, q14 ++ vshrn.u32 d23, q3, #10 ++ vmovn.u32 d3, q3 ++ vshrn.u32 d5, q3, #14 + + it eq + addeq r4, r8 -+ vuzp.16 q8, q11 -+ vuzp.16 q9, q12 -+ vuzp.16 q10, q13 ++ vuzp.16 q0, q11 ++ vuzp.16 q9, q1 ++ vuzp.16 q10, q2 + -+ @ q8 V0, V3,.. -> q0 ++ @ q0 V0, V3,.. + @ q9 U0, U3... + @ q10 U1, U4... + @ q11 U2, U5,.. -+ @ q12 V1, V4,.. -> q1 -+ @ q13 V2, V5,.. -> q2 ++ @ q1 V1, V4, ++ @ q2 V2, V5,.. + + subs r6, #24 -+ vand q11, q15 -+ vand q9, q15 -+ vand q10, q15 -+ vand q0, q8, q15 -+ vand q1, q12, q15 -+ vand q2, q13, q15 ++ vbic.u16 q11, #0xfc00 ++ vbic.u16 q9, #0xfc00 ++ vshr.u16 q10, #6 ++ vshr.u16 q2, #6 ++ vbic.u16 q0, #0xfc00 ++ vbic.u16 q1, #0xfc00 + + blt 2f + @@ -63409,13 +65978,180 @@ index 0000000000..80890fe985 +endfunc + + ++@ void ff_rpi_sand30_lines_to_planar_y8( ++@ uint8_t * dest, // [r0] ++@ unsigned int dst_stride, // [r1] ++@ const uint8_t * src, // [r2] ++@ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++@ unsigned int src_stride2, // [sp, #0] -> r3 ++@ unsigned int _x, // [sp, #4] Ignored - 0 ++@ unsigned int y, // [sp, #8] (r7 in prefix) ++@ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++@ unsigned int h); // [sp, #16] -> r7 ++@ ++@ Assumes that we are starting on a stripe boundary and that overreading ++@ within the stripe is OK. However it does respect the dest size for wri ++ ++function ff_rpi_sand30_lines_to_planar_y8, export=1 ++ push {r4-r8, lr} @ +24 ++ ldr r3, [sp, #24] ++ ldr r6, [sp, #36] ++ ldr r7, [sp, #32] @ y ++ mov r12, #48 ++ lsl r3, #7 ++ sub r1, r1, r6 ++ add r8, r2, r7, lsl #7 ++ ldr r7, [sp, #40] ++ ++10: ++ mov r2, r8 ++ add r4, r0, #24 ++ mov r5, r6 ++1: ++ vldm r2, {q8-q15} ++ ++ subs r5, #96 ++ ++ vmovn.u32 d0, q8 ++ vshrn.u32 d2, q8, #12 ++ vshrn.u32 d4, q8, #16 @ Cannot vshrn.u32 #20! ++ ++ add r2, r3 ++ ++ vmovn.u32 d1, q9 ++ vshrn.u32 d3, q9, #12 ++ vshrn.u32 d5, q9, #16 ++ ++ pld [r2, #0] ++ ++ vshrn.u16 d0, q0, #2 ++ vmovn.u16 d1, q1 ++ vshrn.u16 d2, q2, #6 ++ ++ vmovn.u32 d16, q10 ++ vshrn.u32 d18, q10, #12 ++ vshrn.u32 d20, q10, #16 ++ ++ vmovn.u32 d17, q11 ++ vshrn.u32 d19, q11, #12 ++ vshrn.u32 d21, q11, #16 ++ ++ pld [r2, #64] ++ ++ vshrn.u16 d4, q8, #2 ++ vmovn.u16 d5, q9 ++ vshrn.u16 d6, q10, #6 ++ ++ vmovn.u32 d16, q12 ++ vshrn.u32 d18, q12, #12 ++ vshrn.u32 d20, q12, #16 ++ ++ vmovn.u32 d17, q13 ++ vshrn.u32 d19, q13, #12 ++ vshrn.u32 d21, q13, #16 ++ ++ vshrn.u16 d16, q8, #2 ++ vmovn.u16 d17, q9 ++ vshrn.u16 d18, q10, #6 ++ ++ vmovn.u32 d20, q14 ++ vshrn.u32 d22, q14, #12 ++ vshrn.u32 d24, q14, #16 ++ ++ vmovn.u32 d21, q15 ++ vshrn.u32 d23, q15, #12 ++ vshrn.u32 d25, q15, #16 ++ ++ vshrn.u16 d20, q10, #2 ++ vmovn.u16 d21, q11 ++ vshrn.u16 d22, q12, #6 ++ ++ blt 2f ++ ++ vst3.8 {d0, d1, d2}, [r0], r12 ++ vst3.8 {d4, d5, d6}, [r4], r12 ++ vst3.8 {d16, d17, d18}, [r0], r12 ++ vst3.8 {d20, d21, d22}, [r4], r12 ++ ++ bne 1b ++ ++11: ++ subs r7, #1 ++ add r0, r1 ++ add r8, #128 ++ bne 10b ++ ++ pop {r4-r8, pc} ++ ++@ Partial final write ++2: ++ cmp r5, #48-96 ++ blt 1f ++ vst3.8 {d0, d1, d2}, [r0], r12 ++ vst3.8 {d4, d5, d6}, [r4], r12 ++ beq 11b ++ vmov q0, q8 ++ vmov q2, q10 ++ sub r5, #48 ++ vmov d2, d18 ++ vmov d6, d22 ++1: ++ cmp r5, #24-96 ++ blt 1f ++ vst3.8 {d0, d1, d2}, [r0]! ++ beq 11b ++ vmov q0, q2 ++ sub r5, #24 ++ vmov d2, d6 ++1: ++ cmp r5, #12-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! ++ vst3.8 {d0[2], d1[2], d2[2]}, [r0]! ++ vst3.8 {d0[3], d1[3], d2[3]}, [r0]! ++ beq 11b ++ vmov s0, s1 ++ sub r5, #12 ++ vmov s2, s3 ++ vmov s4, s5 ++1: ++ cmp r5, #6-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ vst3.8 {d0[1], d1[1], d2[1]}, [r0]! ++ add r0, #12 ++ beq 11b ++ vshr.u32 d0, #16 ++ sub r5, #6 ++ vshr.u32 d1, #16 ++ vshr.u32 d2, #16 ++1: ++ cmp r5, #3-96 ++ blt 1f ++ vst3.8 {d0[0], d1[0], d2[0]}, [r0]! ++ beq 11b ++ sub r5, #3 ++ vshr.u32 d0, #8 ++ vshr.u32 d1, #8 ++1: ++ cmp r5, #2-96 ++ blt 1f ++ vst2.8 {d0[0], d1[0]}, [r0]! ++ b 11b ++1: ++ vst1.8 {d0[0]}, [r0]! ++ b 11b ++ ++endfunc ++ + diff --git a/libavutil/arm/rpi_sand_neon.h b/libavutil/arm/rpi_sand_neon.h new file mode 100644 -index 0000000000..447f367bea +index 0000000000..d457c10870 --- /dev/null +++ b/libavutil/arm/rpi_sand_neon.h -@@ -0,0 +1,99 @@ +@@ -0,0 +1,110 @@ +/* +Copyright (c) 2020 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -63513,6 +66249,17 @@ index 0000000000..447f367bea + unsigned int _w, // [sp, #12] -> r6 (cur r5) + unsigned int h); // [sp, #16] -> r7 + ++void ff_rpi_sand30_lines_to_planar_y8( ++ uint8_t * dest, // [r0] ++ unsigned int dst_stride, // [r1] ++ const uint8_t * src, // [r2] ++ unsigned int src_stride1, // [r3] Ignored - assumed 128 ++ unsigned int src_stride2, // [sp, #0] -> r3 ++ unsigned int _x, // [sp, #4] Ignored - 0 ++ unsigned int y, // [sp, #8] (r7 in prefix) ++ unsigned int _w, // [sp, #12] -> r6 (cur r5) ++ unsigned int h); // [sp, #16] -> r7 ++ +#endif // AVUTIL_ARM_SAND_NEON_H + diff --git a/libavutil/frame.c b/libavutil/frame.c @@ -63573,7 +66320,7 @@ index fc67db0f6c..b1a7eb4858 100644 * @} */ diff --git a/libavutil/hwcontext_drm.c b/libavutil/hwcontext_drm.c -index 32cbde82eb..c5d0b960af 100644 +index 32cbde82eb..b0868608c6 100644 --- a/libavutil/hwcontext_drm.c +++ b/libavutil/hwcontext_drm.c @@ -19,8 +19,10 @@ @@ -63717,13 +66464,25 @@ index 32cbde82eb..c5d0b960af 100644 if (map->address[i]) munmap(map->address[i], map->length[i]); } -@@ -178,7 +241,15 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, - if (!pix_fmts) +@@ -172,16 +235,29 @@ static int drm_transfer_get_formats(AVHWFramesContext *ctx, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) + { +- enum AVPixelFormat *pix_fmts; ++ enum AVPixelFormat *p; + +- pix_fmts = av_malloc_array(2, sizeof(*pix_fmts)); +- if (!pix_fmts) ++ p = *formats = av_malloc_array(3, sizeof(*p)); ++ if (!p) return AVERROR(ENOMEM); - pix_fmts[0] = ctx->sw_format; +- pix_fmts[1] = AV_PIX_FMT_NONE; +- +- *formats = pix_fmts; + // **** Offer native sand too ???? -+ pix_fmts[0] = ++ *p++ = +#if CONFIG_SAND + ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ? + AV_PIX_FMT_YUV420P : @@ -63731,10 +66490,18 @@ index 32cbde82eb..c5d0b960af 100644 + AV_PIX_FMT_YUV420P10LE : +#endif + ctx->sw_format; - pix_fmts[1] = AV_PIX_FMT_NONE; ++ ++#if CONFIG_SAND ++ if (ctx->sw_format == AV_PIX_FMT_RPI4_10 || ++ ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128) ++ *p++ = AV_PIX_FMT_NV12; ++#endif ++ ++ *p = AV_PIX_FMT_NONE; + return 0; + } - *formats = pix_fmts; -@@ -197,18 +268,80 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, +@@ -197,18 +273,63 @@ static int drm_transfer_data_from(AVHWFramesContext *hwfc, map = av_frame_alloc(); if (!map) return AVERROR(ENOMEM); @@ -63769,29 +66536,12 @@ index 32cbde82eb..c5d0b960af 100644 + const unsigned int w = FFMIN(dst->width, map->width); + const unsigned int h = FFMIN(dst->height, map->height); + -+ if (map->format == AV_PIX_FMT_RPI4_8 && dst->format == AV_PIX_FMT_YUV420P) { -+ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], -+ map->data[0], -+ 128, stride2, -+ 0, 0, w, h); -+ av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ map->data[1], -+ 128, stride2, -+ 0, 0, w / 2, h / 2); -+ } -+ else if (map->format == AV_PIX_FMT_RPI4_10 && dst->format == AV_PIX_FMT_YUV420P10LE) { -+ av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0], -+ map->data[0], -+ 128, stride2, -+ 0, 0, w, h); -+ av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1], -+ dst->data[2], dst->linesize[2], -+ map->data[1], -+ 128, stride2, -+ 0, 0, w / 2, h / 2); -+ } -+ else ++ map->crop_top = 0; ++ map->crop_bottom = 0; ++ map->crop_left = 0; ++ map->crop_right = 0; ++ ++ if (av_rpi_sand_to_planar_frame(dst, map) != 0) + { + av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__); + err = AVERROR(EINVAL); @@ -63819,7 +66569,7 @@ index 32cbde82eb..c5d0b960af 100644 err = 0; fail: -@@ -223,7 +356,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc, +@@ -223,7 +344,10 @@ static int drm_transfer_data_to(AVHWFramesContext *hwfc, int err; if (src->width > hwfc->width || src->height > hwfc->height) @@ -63831,10 +66581,10 @@ index 32cbde82eb..c5d0b960af 100644 map = av_frame_alloc(); if (!map) diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c -index 9d61c52567..701c0e356b 100644 +index 9d61c52567..b40b8dc6c9 100644 --- a/libavutil/pixdesc.c +++ b/libavutil/pixdesc.c -@@ -2371,6 +2371,38 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { +@@ -2371,6 +2371,50 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { .name = "vulkan", .flags = AV_PIX_FMT_FLAG_HWACCEL, }, @@ -63862,12 +66612,24 @@ index 9d61c52567..701c0e356b 100644 + }, + .flags = 0, + }, ++ [AV_PIX_FMT_SAND64_16] = { ++ .name = "sand64_16", ++ .nb_components = 3, ++ .log2_chroma_w = 1, ++ .log2_chroma_h = 1, ++ .comp = { ++ { 0, 2, 0, 0, 16, 0, 15, 1 }, /* Y */ ++ { 1, 4, 0, 0, 16, 3, 15, 1 }, /* U */ ++ { 1, 4, 2, 0, 16, 3, 15, 3 }, /* V */ ++ }, ++ .flags = 0, ++ }, + [AV_PIX_FMT_RPI4_8] = { -+ .name = "rpi", ++ .name = "rpi4_8", + .flags = AV_PIX_FMT_FLAG_HWACCEL, + }, + [AV_PIX_FMT_RPI4_10] = { -+ .name = "rpi", ++ .name = "rpi4_10", + .flags = AV_PIX_FMT_FLAG_HWACCEL, + }, }; @@ -64125,10 +66887,10 @@ index 0000000000..0d5d203dc3 + diff --git a/libavutil/rpi_sand_fns.c b/libavutil/rpi_sand_fns.c new file mode 100644 -index 0000000000..1f543e9357 +index 0000000000..b6071e2928 --- /dev/null +++ b/libavutil/rpi_sand_fns.c -@@ -0,0 +1,356 @@ +@@ -0,0 +1,445 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -64360,6 +67122,75 @@ index 0000000000..1f543e9357 + } +} + ++// Fetches a single patch - offscreen fixup not done here ++// w <= stride1 ++// single lose bottom 2 bits truncation ++// _x & _w in pixels, strides in bytes ++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h) ++{ ++ const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word ++ const unsigned int xskip0 = _x - (x0 >> 2) * 3; ++ const unsigned int x1 = ((_x + _w) / 3) * 4; ++ const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3; ++ const unsigned int mask = stride1 - 1; ++ const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2; ++ const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2; // RHS of a stripe to LHS of next in words ++ ++#if HAVE_SAND_ASM ++ if (_x == 0) { ++ ff_rpi_sand30_lines_to_planar_y8(dst, dst_stride, src, stride1, stride2, _x, y, _w, h); ++ return; ++ } ++#endif ++ ++ if (x0 == x1) { ++ // ******************* ++ // Partial single word xfer ++ return; ++ } ++ ++ for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1) ++ { ++ unsigned int x = x0; ++ const uint32_t * p = (const uint32_t *)p0; ++ uint8_t * d = dst; ++ ++ if (xskip0 != 0) { ++ const uint32_t p3 = *p++; ++ ++ if (xskip0 == 1) ++ *d++ = (p3 >> 12) & 0xff; ++ *d++ = (p3 >> 22) & 0xff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ while (x != x1) { ++ const uint32_t p3 = *p++; ++ *d++ = (p3 >> 2) & 0xff; ++ *d++ = (p3 >> 12) & 0xff; ++ *d++ = (p3 >> 22) & 0xff; ++ ++ if (((x += 4) & mask) == 0) ++ p += slice_inc; ++ } ++ ++ if (xrem1 != 0) { ++ const uint32_t p3 = *p; ++ ++ *d++ = (p3 >> 2) & 0xff; ++ if (xrem1 == 2) ++ *d++ = (p3 >> 12) & 0xff; ++ } ++ } ++} ++ ++ + +// w/h in pixels +void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, @@ -64441,6 +67272,16 @@ index 0000000000..1f543e9357 + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x/2, y/2, w/2, h/2); + break; ++ case AV_PIX_FMT_NV12: ++ av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand_to_planar_y8(dst->data[1], dst->linesize[1], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w, h/2); ++ break; + default: + return -1; + } @@ -64475,6 +67316,16 @@ index 0000000000..1f543e9357 + av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), + x/2, y/2, w/2, h/2); + break; ++ case AV_PIX_FMT_NV12: ++ av_rpi_sand30_to_planar_y8(dst->data[0], dst->linesize[0], ++ src->data[0], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x, y, w, h); ++ av_rpi_sand30_to_planar_y8(dst->data[1], dst->linesize[1], ++ src->data[1], ++ av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src), ++ x/2, y/2, w, h/2); ++ break; + default: + return -1; + } @@ -64487,10 +67338,10 @@ index 0000000000..1f543e9357 +} diff --git a/libavutil/rpi_sand_fns.h b/libavutil/rpi_sand_fns.h new file mode 100644 -index 0000000000..634b55e800 +index 0000000000..462ccb8abd --- /dev/null +++ b/libavutil/rpi_sand_fns.h -@@ -0,0 +1,183 @@ +@@ -0,0 +1,188 @@ +/* +Copyright (c) 2018 Raspberry Pi (Trading) Ltd. +All rights reserved. @@ -64578,6 +67429,11 @@ index 0000000000..634b55e800 + unsigned int _x, unsigned int y, + unsigned int _w, unsigned int h); + ++void av_rpi_sand30_to_planar_y8(uint8_t * dst, const unsigned int dst_stride, ++ const uint8_t * src, ++ unsigned int stride1, unsigned int stride2, ++ unsigned int _x, unsigned int y, ++ unsigned int _w, unsigned int h); + +// w/h in pixels +void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2, @@ -66308,3 +69164,644 @@ index 0000000000..5935a11ca5 + + do_logparse(args.logfile) + +diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile +index 9e9569777b..7bafda7ee5 100644 +--- a/tests/checkasm/Makefile ++++ b/tests/checkasm/Makefile +@@ -9,8 +9,10 @@ AVCODECOBJS-$(CONFIG_G722DSP) += g722dsp.o + AVCODECOBJS-$(CONFIG_H264DSP) += h264dsp.o + AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o + AVCODECOBJS-$(CONFIG_H264QPEL) += h264qpel.o ++AVCODECOBJS-$(CONFIG_IDCTDSP) += idctdsp.o + AVCODECOBJS-$(CONFIG_LLVIDDSP) += llviddsp.o + AVCODECOBJS-$(CONFIG_LLVIDENCDSP) += llviddspenc.o ++AVCODECOBJS-$(CONFIG_VC1DSP) += vc1dsp.o + AVCODECOBJS-$(CONFIG_VP8DSP) += vp8dsp.o + AVCODECOBJS-$(CONFIG_VIDEODSP) += videodsp.o + +diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c +index 899f68bb32..4d75291057 100644 +--- a/tests/checkasm/checkasm.c ++++ b/tests/checkasm/checkasm.c +@@ -121,6 +121,9 @@ static const struct { + #if CONFIG_HUFFYUV_DECODER + { "huffyuvdsp", checkasm_check_huffyuvdsp }, + #endif ++ #if CONFIG_IDCTDSP ++ { "idctdsp", checkasm_check_idctdsp }, ++ #endif + #if CONFIG_JPEG2000_DECODER + { "jpeg2000dsp", checkasm_check_jpeg2000dsp }, + #endif +@@ -145,6 +148,9 @@ static const struct { + #if CONFIG_V210_ENCODER + { "v210enc", checkasm_check_v210enc }, + #endif ++ #if CONFIG_VC1DSP ++ { "vc1dsp", checkasm_check_vc1dsp }, ++ #endif + #if CONFIG_VP8DSP + { "vp8dsp", checkasm_check_vp8dsp }, + #endif +diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h +index 0190bc912c..5178c49aed 100644 +--- a/tests/checkasm/checkasm.h ++++ b/tests/checkasm/checkasm.h +@@ -60,6 +60,7 @@ void checkasm_check_hevc_add_res(void); + void checkasm_check_hevc_idct(void); + void checkasm_check_hevc_sao(void); + void checkasm_check_huffyuvdsp(void); ++void checkasm_check_idctdsp(void); + void checkasm_check_jpeg2000dsp(void); + void checkasm_check_llviddsp(void); + void checkasm_check_llviddspenc(void); +@@ -73,6 +74,7 @@ void checkasm_check_sw_scale(void); + void checkasm_check_utvideodsp(void); + void checkasm_check_v210dec(void); + void checkasm_check_v210enc(void); ++void checkasm_check_vc1dsp(void); + void checkasm_check_vf_eq(void); + void checkasm_check_vf_gblur(void); + void checkasm_check_vf_hflip(void); +diff --git a/tests/checkasm/idctdsp.c b/tests/checkasm/idctdsp.c +new file mode 100644 +index 0000000000..02724536a7 +--- /dev/null ++++ b/tests/checkasm/idctdsp.c +@@ -0,0 +1,98 @@ ++/* ++ * Copyright (c) 2022 Ben Avison ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with FFmpeg; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include ++ ++#include "checkasm.h" ++ ++#include "libavcodec/idctdsp.h" ++ ++#include "libavutil/common.h" ++#include "libavutil/internal.h" ++#include "libavutil/intreadwrite.h" ++#include "libavutil/mem_internal.h" ++ ++#define IDCTDSP_TEST(func) { #func, offsetof(IDCTDSPContext, func) }, ++ ++typedef struct { ++ const char *name; ++ size_t offset; ++} test; ++ ++#define RANDOMIZE_BUFFER16(name, size) \ ++ do { \ ++ int i; \ ++ for (i = 0; i < size; ++i) { \ ++ uint16_t r = rnd() % 0x201 - 0x100; \ ++ AV_WN16A(name##0 + i, r); \ ++ AV_WN16A(name##1 + i, r); \ ++ } \ ++ } while (0) ++ ++#define RANDOMIZE_BUFFER8(name, size) \ ++ do { \ ++ int i; \ ++ for (i = 0; i < size; ++i) { \ ++ uint8_t r = rnd(); \ ++ name##0[i] = r; \ ++ name##1[i] = r; \ ++ } \ ++ } while (0) ++ ++static void check_add_put_clamped(void) ++{ ++ /* Source buffers are only as big as needed, since any over-read won't affect results */ ++ LOCAL_ALIGNED_16(int16_t, src0, [64]); ++ LOCAL_ALIGNED_16(int16_t, src1, [64]); ++ /* Destination buffers have borders of one row above/below and 8 columns left/right to catch overflows */ ++ LOCAL_ALIGNED_8(uint8_t, dst0, [10 * 24]); ++ LOCAL_ALIGNED_8(uint8_t, dst1, [10 * 24]); ++ ++ AVCodecContext avctx = { 0 }; ++ IDCTDSPContext h; ++ ++ const test tests[] = { ++ IDCTDSP_TEST(add_pixels_clamped) ++ IDCTDSP_TEST(put_pixels_clamped) ++ IDCTDSP_TEST(put_signed_pixels_clamped) ++ }; ++ ++ ff_idctdsp_init(&h, &avctx); ++ ++ for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { ++ void (*func)(const int16_t *, uint8_t * ptrdiff_t) = *(void **)((intptr_t) &h + tests[t].offset); ++ if (check_func(func, "idctdsp.%s", tests[t].name)) { ++ declare_func_emms(AV_CPU_FLAG_MMX, void, const int16_t *, uint8_t *, ptrdiff_t); ++ RANDOMIZE_BUFFER16(src, 64); ++ RANDOMIZE_BUFFER8(dst, 10 * 24); ++ call_ref(src0, dst0 + 24 + 8, 24); ++ call_new(src1, dst1 + 24 + 8, 24); ++ if (memcmp(dst0, dst1, 10 * 24)) ++ fail(); ++ bench_new(src1, dst1 + 24 + 8, 24); ++ } ++ } ++} ++ ++void checkasm_check_idctdsp(void) ++{ ++ check_add_put_clamped(); ++ report("idctdsp"); ++} +diff --git a/tests/checkasm/vc1dsp.c b/tests/checkasm/vc1dsp.c +new file mode 100644 +index 0000000000..52628d15e4 +--- /dev/null ++++ b/tests/checkasm/vc1dsp.c +@@ -0,0 +1,452 @@ ++/* ++ * Copyright (c) 2022 Ben Avison ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with FFmpeg; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include ++ ++#include "checkasm.h" ++ ++#include "libavcodec/vc1dsp.h" ++ ++#include "libavutil/common.h" ++#include "libavutil/internal.h" ++#include "libavutil/intreadwrite.h" ++#include "libavutil/mem_internal.h" ++ ++#define VC1DSP_TEST(func) { #func, offsetof(VC1DSPContext, func) }, ++#define VC1DSP_SIZED_TEST(func, width, height) { #func, offsetof(VC1DSPContext, func), width, height }, ++ ++typedef struct { ++ const char *name; ++ size_t offset; ++ int width; ++ int height; ++} test; ++ ++typedef struct matrix { ++ size_t width; ++ size_t height; ++ float d[]; ++} matrix; ++ ++static const matrix T8 = { 8, 8, { ++ 12, 12, 12, 12, 12, 12, 12, 12, ++ 16, 15, 9, 4, -4, -9, -15, -16, ++ 16, 6, -6, -16, -16, -6, 6, 16, ++ 15, -4, -16, -9, 9, 16, 4, -15, ++ 12, -12, -12, 12, 12, -12, -12, 12, ++ 9, -16, 4, 15, -15, -4, 16, -9, ++ 6, -16, 16, -6, -6, 16, -16, 6, ++ 4, -9, 15, -16, 16, -15, 9, -4 ++} }; ++ ++static const matrix T4 = { 4, 4, { ++ 17, 17, 17, 17, ++ 22, 10, -10, -22, ++ 17, -17, -17, 17, ++ 10, -22, 22, -10 ++} }; ++ ++static const matrix T8t = { 8, 8, { ++ 12, 16, 16, 15, 12, 9, 6, 4, ++ 12, 15, 6, -4, -12, -16, -16, -9, ++ 12, 9, -6, -16, -12, 4, 16, 15, ++ 12, 4, -16, -9, 12, 15, -6, -16, ++ 12, -4, -16, 9, 12, -15, -6, 16, ++ 12, -9, -6, 16, -12, -4, 16, -15, ++ 12, -15, 6, 4, -12, 16, -16, 9, ++ 12, -16, 16, -15, 12, -9, 6, -4 ++} }; ++ ++static const matrix T4t = { 4, 4, { ++ 17, 22, 17, 10, ++ 17, 10, -17, -22, ++ 17, -10, -17, 22, ++ 17, -22, 17, -10 ++} }; ++ ++static matrix *new_matrix(size_t width, size_t height) ++{ ++ matrix *out = av_mallocz(sizeof (matrix) + height * width * sizeof (float)); ++ if (out == NULL) { ++ fprintf(stderr, "Memory allocation failure\n"); ++ exit(EXIT_FAILURE); ++ } ++ out->width = width; ++ out->height = height; ++ return out; ++} ++ ++static matrix *multiply(const matrix *a, const matrix *b) ++{ ++ matrix *out; ++ if (a->width != b->height) { ++ fprintf(stderr, "Incompatible multiplication\n"); ++ exit(EXIT_FAILURE); ++ } ++ out = new_matrix(b->width, a->height); ++ for (int j = 0; j < out->height; ++j) ++ for (int i = 0; i < out->width; ++i) { ++ float sum = 0; ++ for (int k = 0; k < a->width; ++k) ++ sum += a->d[j * a->width + k] * b->d[k * b->width + i]; ++ out->d[j * out->width + i] = sum; ++ } ++ return out; ++} ++ ++static void normalise(matrix *a) ++{ ++ for (int j = 0; j < a->height; ++j) ++ for (int i = 0; i < a->width; ++i) { ++ float *p = a->d + j * a->width + i; ++ *p *= 64; ++ if (a->height == 4) ++ *p /= (const unsigned[]) { 289, 292, 289, 292 } [j]; ++ else ++ *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [j]; ++ if (a->width == 4) ++ *p /= (const unsigned[]) { 289, 292, 289, 292 } [i]; ++ else ++ *p /= (const unsigned[]) { 288, 289, 292, 289, 288, 289, 292, 289 } [i]; ++ } ++} ++ ++static void divide_and_round_nearest(matrix *a, float by) ++{ ++ for (int j = 0; j < a->height; ++j) ++ for (int i = 0; i < a->width; ++i) { ++ float *p = a->d + j * a->width + i; ++ *p = rintf(*p / by); ++ } ++} ++ ++static void tweak(matrix *a) ++{ ++ for (int j = 4; j < a->height; ++j) ++ for (int i = 0; i < a->width; ++i) { ++ float *p = a->d + j * a->width + i; ++ *p += 1; ++ } ++} ++ ++/* The VC-1 spec places restrictions on the values permitted at three ++ * different stages: ++ * - D: the input coefficients in frequency domain ++ * - E: the intermediate coefficients, inverse-transformed only horizontally ++ * - R: the fully inverse-transformed coefficients ++ * ++ * To fully cater for the ranges specified requires various intermediate ++ * values to be held to 17-bit precision; yet these conditions do not appear ++ * to be utilised in real-world streams. At least some assembly ++ * implementations have chosen to restrict these values to 16-bit precision, ++ * to accelerate the decoding of real-world streams at the cost of strict ++ * adherence to the spec. To avoid our test marking these as failures, ++ * reduce our random inputs. ++ */ ++#define ATTENUATION 4 ++ ++static matrix *generate_inverse_quantized_transform_coefficients(size_t width, size_t height) ++{ ++ matrix *raw, *tmp, *D, *E, *R; ++ raw = new_matrix(width, height); ++ for (int i = 0; i < width * height; ++i) ++ raw->d[i] = (int) (rnd() % (1024/ATTENUATION)) - 512/ATTENUATION; ++ tmp = multiply(height == 8 ? &T8 : &T4, raw); ++ D = multiply(tmp, width == 8 ? &T8t : &T4t); ++ normalise(D); ++ divide_and_round_nearest(D, 1); ++ for (int i = 0; i < width * height; ++i) { ++ if (D->d[i] < -2048/ATTENUATION || D->d[i] > 2048/ATTENUATION-1) { ++ /* Rare, so simply try again */ ++ av_free(raw); ++ av_free(tmp); ++ av_free(D); ++ return generate_inverse_quantized_transform_coefficients(width, height); ++ } ++ } ++ E = multiply(D, width == 8 ? &T8 : &T4); ++ divide_and_round_nearest(E, 8); ++ for (int i = 0; i < width * height; ++i) ++ if (E->d[i] < -4096/ATTENUATION || E->d[i] > 4096/ATTENUATION-1) { ++ /* Rare, so simply try again */ ++ av_free(raw); ++ av_free(tmp); ++ av_free(D); ++ av_free(E); ++ return generate_inverse_quantized_transform_coefficients(width, height); ++ } ++ R = multiply(height == 8 ? &T8t : &T4t, E); ++ tweak(R); ++ divide_and_round_nearest(R, 128); ++ for (int i = 0; i < width * height; ++i) ++ if (R->d[i] < -512/ATTENUATION || R->d[i] > 512/ATTENUATION-1) { ++ /* Rare, so simply try again */ ++ av_free(raw); ++ av_free(tmp); ++ av_free(D); ++ av_free(E); ++ av_free(R); ++ return generate_inverse_quantized_transform_coefficients(width, height); ++ } ++ av_free(raw); ++ av_free(tmp); ++ av_free(E); ++ av_free(R); ++ return D; ++} ++ ++#define RANDOMIZE_BUFFER16(name, size) \ ++ do { \ ++ int i; \ ++ for (i = 0; i < size; ++i) { \ ++ uint16_t r = rnd(); \ ++ AV_WN16A(name##0 + i, r); \ ++ AV_WN16A(name##1 + i, r); \ ++ } \ ++ } while (0) ++ ++#define RANDOMIZE_BUFFER8(name, size) \ ++ do { \ ++ int i; \ ++ for (i = 0; i < size; ++i) { \ ++ uint8_t r = rnd(); \ ++ name##0[i] = r; \ ++ name##1[i] = r; \ ++ } \ ++ } while (0) ++ ++#define RANDOMIZE_BUFFER8_MID_WEIGHTED(name, size) \ ++ do { \ ++ uint8_t *p##0 = name##0, *p##1 = name##1; \ ++ int i = (size); \ ++ while (i-- > 0) { \ ++ int x = 0x80 | (rnd() & 0x7F); \ ++ x >>= rnd() % 9; \ ++ if (rnd() & 1) \ ++ x = -x; \ ++ *p##1++ = *p##0++ = 0x80 + x; \ ++ } \ ++ } while (0) ++ ++static void check_inv_trans_inplace(void) ++{ ++ /* Inverse transform input coefficients are stored in a 16-bit buffer ++ * with row stride of 8 coefficients irrespective of transform size. ++ * vc1_inv_trans_8x8 differs from the others in two ways: coefficients ++ * are stored in column-major order, and the outputs are written back ++ * to the input buffer, so we oversize it slightly to catch overruns. */ ++ LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [10 * 8]); ++ LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [10 * 8]); ++ ++ VC1DSPContext h; ++ ++ ff_vc1dsp_init(&h); ++ ++ if (check_func(h.vc1_inv_trans_8x8, "vc1dsp.vc1_inv_trans_8x8")) { ++ matrix *coeffs; ++ declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *); ++ RANDOMIZE_BUFFER16(inv_trans_in, 10 * 8); ++ coeffs = generate_inverse_quantized_transform_coefficients(8, 8); ++ for (int j = 0; j < 8; ++j) ++ for (int i = 0; i < 8; ++i) { ++ int idx = 8 + i * 8 + j; ++ inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * 8 + i]; ++ } ++ call_ref(inv_trans_in0 + 8); ++ call_new(inv_trans_in1 + 8); ++ if (memcmp(inv_trans_in0, inv_trans_in1, 10 * 8 * sizeof (int16_t))) ++ fail(); ++ bench_new(inv_trans_in1 + 8); ++ av_free(coeffs); ++ } ++} ++ ++static void check_inv_trans_adding(void) ++{ ++ /* Inverse transform input coefficients are stored in a 16-bit buffer ++ * with row stride of 8 coefficients irrespective of transform size. */ ++ LOCAL_ALIGNED_16(int16_t, inv_trans_in0, [8 * 8]); ++ LOCAL_ALIGNED_16(int16_t, inv_trans_in1, [8 * 8]); ++ ++ /* For all but vc1_inv_trans_8x8, the inverse transform is narrowed and ++ * added with saturation to an array of unsigned 8-bit values. Oversize ++ * this by 8 samples left and right and one row above and below. */ ++ LOCAL_ALIGNED_8(uint8_t, inv_trans_out0, [10 * 24]); ++ LOCAL_ALIGNED_8(uint8_t, inv_trans_out1, [10 * 24]); ++ ++ VC1DSPContext h; ++ ++ const test tests[] = { ++ VC1DSP_SIZED_TEST(vc1_inv_trans_8x4, 8, 4) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_4x8, 4, 8) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_4x4, 4, 4) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_8x8_dc, 8, 8) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_8x4_dc, 8, 4) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_4x8_dc, 4, 8) ++ VC1DSP_SIZED_TEST(vc1_inv_trans_4x4_dc, 4, 4) ++ }; ++ ++ ff_vc1dsp_init(&h); ++ ++ for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { ++ void (*func)(uint8_t *, ptrdiff_t, int16_t *) = *(void **)((intptr_t) &h + tests[t].offset); ++ if (check_func(func, "vc1dsp.%s", tests[t].name)) { ++ matrix *coeffs; ++ declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int16_t *); ++ RANDOMIZE_BUFFER16(inv_trans_in, 8 * 8); ++ RANDOMIZE_BUFFER8(inv_trans_out, 10 * 24); ++ coeffs = generate_inverse_quantized_transform_coefficients(tests[t].width, tests[t].height); ++ for (int j = 0; j < tests[t].height; ++j) ++ for (int i = 0; i < tests[t].width; ++i) { ++ int idx = j * 8 + i; ++ inv_trans_in1[idx] = inv_trans_in0[idx] = coeffs->d[j * tests[t].width + i]; ++ } ++ call_ref(inv_trans_out0 + 24 + 8, 24, inv_trans_in0); ++ call_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1); ++ if (memcmp(inv_trans_out0, inv_trans_out1, 10 * 24)) ++ fail(); ++ bench_new(inv_trans_out1 + 24 + 8, 24, inv_trans_in1 + 8); ++ av_free(coeffs); ++ } ++ } ++} ++ ++static void check_loop_filter(void) ++{ ++ /* Deblocking filter buffers are big enough to hold a 16x16 block, ++ * plus 16 columns left and 4 rows above to hold filter inputs ++ * (depending on whether v or h neighbouring block edge, oversized ++ * horizontally to maintain 16-byte alignment) plus 16 columns and ++ * 4 rows below to catch write overflows */ ++ LOCAL_ALIGNED_16(uint8_t, filter_buf0, [24 * 48]); ++ LOCAL_ALIGNED_16(uint8_t, filter_buf1, [24 * 48]); ++ ++ VC1DSPContext h; ++ ++ const test tests[] = { ++ VC1DSP_TEST(vc1_v_loop_filter4) ++ VC1DSP_TEST(vc1_h_loop_filter4) ++ VC1DSP_TEST(vc1_v_loop_filter8) ++ VC1DSP_TEST(vc1_h_loop_filter8) ++ VC1DSP_TEST(vc1_v_loop_filter16) ++ VC1DSP_TEST(vc1_h_loop_filter16) ++ }; ++ ++ ff_vc1dsp_init(&h); ++ ++ for (size_t t = 0; t < FF_ARRAY_ELEMS(tests); ++t) { ++ void (*func)(uint8_t *, ptrdiff_t, int) = *(void **)((intptr_t) &h + tests[t].offset); ++ declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *, ptrdiff_t, int); ++ if (check_func(func, "vc1dsp.%s", tests[t].name)) { ++ for (int count = 1000; count > 0; --count) { ++ int pq = rnd() % 31 + 1; ++ RANDOMIZE_BUFFER8_MID_WEIGHTED(filter_buf, 24 * 48); ++ call_ref(filter_buf0 + 4 * 48 + 16, 48, pq); ++ call_new(filter_buf1 + 4 * 48 + 16, 48, pq); ++ if (memcmp(filter_buf0, filter_buf1, 24 * 48)) ++ fail(); ++ } ++ } ++ for (int j = 0; j < 24; ++j) ++ for (int i = 0; i < 48; ++i) ++ filter_buf1[j * 48 + i] = 0x60 + 0x40 * (i >= 16 && j >= 4); ++ if (check_func(func, "vc1dsp.%s_bestcase", tests[t].name)) ++ bench_new(filter_buf1 + 4 * 48 + 16, 48, 1); ++ if (check_func(func, "vc1dsp.%s_worstcase", tests[t].name)) ++ bench_new(filter_buf1 + 4 * 48 + 16, 48, 31); ++ } ++} ++ ++#define TEST_UNESCAPE \ ++ do { \ ++ for (int count = 100; count > 0; --count) { \ ++ escaped_offset = rnd() & 7; \ ++ unescaped_offset = rnd() & 7; \ ++ escaped_len = (1u << (rnd() % 8) + 3) - (rnd() & 7); \ ++ RANDOMIZE_BUFFER8(unescaped, UNESCAPE_BUF_SIZE); \ ++ len0 = call_ref(escaped0 + escaped_offset, escaped_len, unescaped0 + unescaped_offset); \ ++ len1 = call_new(escaped1 + escaped_offset, escaped_len, unescaped1 + unescaped_offset); \ ++ if (len0 != len1 || memcmp(unescaped0, unescaped1, UNESCAPE_BUF_SIZE)) \ ++ fail(); \ ++ } \ ++ } while (0) ++ ++static void check_unescape(void) ++{ ++ /* This appears to be a typical length of buffer in use */ ++#define LOG2_UNESCAPE_BUF_SIZE 17 ++#define UNESCAPE_BUF_SIZE (1u<