ffmpeg: hevc: Fix for green artefacts

This commit is contained in:
MilhouseVH 2018-12-10 20:03:53 +00:00
parent 68e31e1161
commit 0c4bbe8556

View File

@ -12349,7 +12349,7 @@ index 0000000000..75a1789c25
+
diff --git a/libavcodec/arm/rpi_hevcpred_intra_filter_neon.S b/libavcodec/arm/rpi_hevcpred_intra_filter_neon.S
new file mode 100644
index 0000000000..6ce3d3ca8d
index 0000000000..21cd28c709
--- /dev/null
+++ b/libavcodec/arm/rpi_hevcpred_intra_filter_neon.S
@@ -0,0 +1,872 @@
@ -12838,7 +12838,7 @@ index 0000000000..6ce3d3ca8d
+ vldm r5, {d20-d23}
+ bgt 1f
+ bge 4f
+ cmp r5, #8
+ cmp r12, #8
+ bge 3f
+ vdup.16 d21, d20[3]
+3: vdup.16 d22, d21[3]
@ -39984,10 +39984,10 @@ index 0000000000..2f710626cf
+
diff --git a/libavcodec/rpi_mailbox.c b/libavcodec/rpi_mailbox.c
new file mode 100644
index 0000000000..c16d9931bd
index 0000000000..20f218f22c
--- /dev/null
+++ b/libavcodec/rpi_mailbox.c
@@ -0,0 +1,145 @@
@@ -0,0 +1,107 @@
+/*
+Copyright (c) 2012, Broadcom Europe Ltd.
+All rights reserved.
@ -40053,44 +40053,6 @@ index 0000000000..c16d9931bd
+ return ret_val;
+}
+
+unsigned mbox_mem_lock(int file_desc, unsigned handle)
+{
+ int i=0;
+ unsigned p[32];
+ p[i++] = 0; // size
+ p[i++] = 0x00000000; // process request
+
+ p[i++] = 0x3000d; // (the tag id)
+ p[i++] = 4; // (size of the buffer)
+ p[i++] = 4; // (size of the data)
+ p[i++] = handle;
+
+ p[i++] = 0x00000000; // end tag
+ p[0] = i*sizeof *p; // actual size
+
+ mbox_property(file_desc, p);
+ return p[5];
+}
+
+unsigned mbox_mem_unlock(int file_desc, unsigned handle)
+{
+ int i=0;
+ unsigned p[32];
+ p[i++] = 0; // size
+ p[i++] = 0x00000000; // process request
+
+ p[i++] = 0x3000e; // (the tag id)
+ p[i++] = 4; // (size of the buffer)
+ p[i++] = 4; // (size of the data)
+ p[i++] = handle;
+
+ p[i++] = 0x00000000; // end tag
+ p[0] = i*sizeof *p; // actual size
+
+ mbox_property(file_desc, p);
+ return p[5];
+}
+
+#define GET_VCIMAGE_PARAMS 0x30044
+
+int mbox_get_image_params(int fd, VC_IMAGE_T * img)
@ -40135,10 +40097,10 @@ index 0000000000..c16d9931bd
+
diff --git a/libavcodec/rpi_mailbox.h b/libavcodec/rpi_mailbox.h
new file mode 100644
index 0000000000..b3168788d2
index 0000000000..06709d57fd
--- /dev/null
+++ b/libavcodec/rpi_mailbox.h
@@ -0,0 +1,58 @@
@@ -0,0 +1,55 @@
+#ifndef RPI_MAILBOX_H
+#define RPI_MAILBOX_H
+
@ -40191,18 +40153,15 @@ index 0000000000..b3168788d2
+extern int mbox_open(void);
+extern void mbox_close(int file_desc);
+
+extern unsigned mbox_mem_lock(int file_desc, unsigned handle);
+extern unsigned mbox_mem_unlock(int file_desc, unsigned handle);
+
+int mbox_get_image_params(int fd, VC_IMAGE_T * img);
+
+#endif
diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c
new file mode 100644
index 0000000000..ddbb1eb9a6
index 0000000000..f4498bf7b1
--- /dev/null
+++ b/libavcodec/rpi_qpu.c
@@ -0,0 +1,992 @@
@@ -0,0 +1,957 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
@ -40407,59 +40366,34 @@ index 0000000000..ddbb1eb9a6
+
+// GPU memory alloc fns (internal)
+
+// GPU_MEM_PTR_T alloc fns
+// The magic 0x80 on the cache type means: map all pages to arm memory now
+// rather than demand page later
+static int gpu_malloc_cached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) {
+ p->numbytes = (numbytes + 255) & ~255; // Round up
+ p->vcsm_handle = vcsm_malloc_cache(p->numbytes, VCSM_CACHE_TYPE_HOST | 0x80, (char *)"Video Frame" );
+ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" );
+ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" );
+ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" );
+ av_assert0(p->vcsm_handle);
+ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle);
+ av_assert0(p->vc_handle);
+ p->arm = vcsm_lock(p->vcsm_handle);
+ av_assert0(p->arm);
+ p->vc = mbox_mem_lock(mb, p->vc_handle);
+ av_assert0(p->vc);
+ return 0;
+}
+
+static int gpu_malloc_vccached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) {
+ p->numbytes = numbytes;
+ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC | 0x80, (char *)"VPU code" );
+ av_assert0(p->vcsm_handle);
+ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle);
+ av_assert0(p->vc_handle);
+ p->arm = vcsm_lock(p->vcsm_handle);
+ av_assert0(p->arm);
+ p->vc = mbox_mem_lock(mb, p->vc_handle);
+ av_assert0(p->vc);
+ return 0;
+}
+
+static int gpu_malloc_uncached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) {
+ p->numbytes = numbytes;
+ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE | 0x80, (char *)"Video Frame" );
+ av_assert0(p->vcsm_handle);
+ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle);
+ av_assert0(p->vc_handle);
+ p->arm = vcsm_lock(p->vcsm_handle);
+ av_assert0(p->arm);
+ p->vc = mbox_mem_lock(mb, p->vc_handle);
+ av_assert0(p->vc);
+ return 0;
+}
+
+static void gpu_free_internal(const int mb, GPU_MEM_PTR_T * const p) {
+ mbox_mem_unlock(mb, p->vc_handle);
+static void gpu_free_internal(GPU_MEM_PTR_T * const p)
+{
+ if (p->arm != NULL)
+ vcsm_unlock_ptr(p->arm);
+ if (p->vcsm_handle != 0)
+ vcsm_free(p->vcsm_handle);
+ memset(p, 0, sizeof(*p)); // Ensure we crash hard if we try and use this again
+}
+
+
+static int gpu_malloc_internal(GPU_MEM_PTR_T * const p,
+ const int numbytes, const unsigned int cache_type, const char * const name)
+{
+ memset(p, 0, sizeof(*p));
+ p->numbytes = (numbytes + 255) & ~255; // Round up
+
+ if ((p->vcsm_handle = vcsm_malloc_cache(p->numbytes, cache_type | 0x80, (char *)name)) == 0 ||
+ (p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle)) == 0 ||
+ (p->arm = vcsm_lock(p->vcsm_handle)) == NULL ||
+ (p->vc = vcsm_vc_addr_from_hdl(p->vcsm_handle)) == 0)
+ {
+ gpu_free_internal(p);
+ return AVERROR(ENOMEM);
+ }
+ return 0;
+}
+
+
+// GPU init, free, lock, unlock
+
+static void gpu_term(void)
@ -40471,9 +40405,9 @@ index 0000000000..ddbb1eb9a6
+
+ vc_gpuserv_deinit();
+
+ gpu_free_internal(ge->mb, &ge->code_gm_ptr);
+ gpu_free_internal(ge->mb, &ge->qpu_code_gm_ptr);
+ gpu_free_internal(ge->mb, &ge->dummy_gm_ptr);
+ gpu_free_internal(&ge->code_gm_ptr);
+ gpu_free_internal(&ge->qpu_code_gm_ptr);
+ gpu_free_internal(&ge->dummy_gm_ptr);
+
+ vcsm_exit();
+
@ -40489,6 +40423,7 @@ index 0000000000..ddbb1eb9a6
+static int gpu_init(gpu_env_t ** const gpu) {
+ volatile struct GPU* ptr;
+ gpu_env_t * const ge = calloc(1, sizeof(gpu_env_t));
+ int rv;
+ *gpu = NULL;
+
+ if (ge == NULL)
@ -40502,7 +40437,8 @@ index 0000000000..ddbb1eb9a6
+ vcsm_init();
+
+ // Now copy over the QPU code into GPU memory
+ gpu_malloc_uncached_internal(ge->mb, QPU_CODE_SIZE*4, &ge->qpu_code_gm_ptr);
+ if ((rv = gpu_malloc_internal(&ge->qpu_code_gm_ptr, QPU_CODE_SIZE * 4, VCSM_CACHE_TYPE_NONE, "ffmpeg qpu code")) != 0)
+ return rv;
+
+ {
+ int num_bytes = (char *)mc_end - (char *)ff_hevc_rpi_shader;
@ -40512,7 +40448,8 @@ index 0000000000..ddbb1eb9a6
+ }
+
+ // And the VPU code
+ gpu_malloc_vccached_internal(ge->mb, sizeof(struct GPU), &ge->code_gm_ptr);
+ if ((rv = gpu_malloc_internal(&ge->code_gm_ptr, sizeof(struct GPU), VCSM_CACHE_TYPE_VC, "ffmpeg vpu code")) != 0)
+ return rv;
+ ptr = (volatile struct GPU*)ge->code_gm_ptr.arm;
+
+ // Zero everything so we have zeros between the code bits
@ -40532,7 +40469,8 @@ index 0000000000..ddbb1eb9a6
+
+ // Generate a dummy "frame" & fill with 0x80
+ // * Could reset to 1 <<bit_depth?
+ gpu_malloc_uncached_internal(ge->mb, 0x4000, &ge->dummy_gm_ptr);
+ if ((rv = gpu_malloc_internal(&ge->dummy_gm_ptr, 0x4000, VCSM_CACHE_TYPE_NONE, "ffmpeg dummy frame")) != 0)
+ return rv;
+ memset(ge->dummy_gm_ptr.arm, 0x80, 0x4000);
+
+ *gpu = ge;
@ -40592,13 +40530,7 @@ index 0000000000..ddbb1eb9a6
+// Therefore safe to use without data cache flushing.
+int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p)
+{
+ int r;
+ gpu_env_t * const ge = gpu_lock_ref();
+ if (ge == NULL)
+ return -1;
+ r = gpu_malloc_uncached_internal(ge->mb, numbytes, p);
+ gpu_unlock();
+ return r;
+ return gpu_malloc_internal(p, numbytes, VCSM_CACHE_TYPE_NONE, "ffmpeg uncached");
+}
+
+// This allocates data that will be
@ -40606,19 +40538,11 @@ index 0000000000..ddbb1eb9a6
+// Uncached in VPU L2
+int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p)
+{
+ int r;
+ gpu_env_t * const ge = gpu_lock_ref();
+ if (ge == NULL)
+ return -1;
+ r = gpu_malloc_cached_internal(ge->mb, numbytes, p);
+ gpu_unlock();
+ return r;
+ return gpu_malloc_internal(p, numbytes, VCSM_CACHE_TYPE_HOST, "ffmpeg cached");
+}
+
+void gpu_free(GPU_MEM_PTR_T * const p) {
+ gpu_env_t * const ge = gpu_lock();
+ gpu_free_internal(ge->mb, p);
+ gpu_unlock_unref(ge);
+ gpu_free_internal(p);
+}
+
+unsigned int vpu_get_fn(const unsigned int bit_depth) {