From a8cffb11428d13d57190dc173bca87a206546040 Mon Sep 17 00:00:00 2001 From: Matthias Reichl Date: Wed, 24 Apr 2019 10:42:35 +0200 Subject: [PATCH] kodi (RPi4): use MMAL renderer with gbm Signed-off-by: Matthias Reichl --- .../patches/kodi/kodi-001-hack-in-mmal.patch | 5273 +++++++++++++++++ 1 file changed, 5273 insertions(+) create mode 100644 projects/RPi/devices/RPi4/patches/kodi/kodi-001-hack-in-mmal.patch diff --git a/projects/RPi/devices/RPi4/patches/kodi/kodi-001-hack-in-mmal.patch b/projects/RPi/devices/RPi4/patches/kodi/kodi-001-hack-in-mmal.patch new file mode 100644 index 0000000000..c46c5cdd47 --- /dev/null +++ b/projects/RPi/devices/RPi4/patches/kodi/kodi-001-hack-in-mmal.patch @@ -0,0 +1,5273 @@ +From a67cb967b02cfb59c448d3c6a5a2bc1ecfe933a6 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Wed, 24 Apr 2019 17:01:32 +0100 +Subject: [PATCH 01/14] hack: Nobble gles3 detection + +--- + cmake/modules/FindOpenGLES.cmake | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmake/modules/FindOpenGLES.cmake b/cmake/modules/FindOpenGLES.cmake +index 0191d9e78c..c85a17928a 100644 +--- a/cmake/modules/FindOpenGLES.cmake ++++ b/cmake/modules/FindOpenGLES.cmake +@@ -42,7 +42,7 @@ find_path(OPENGLES3_INCLUDE_DIR GLES3/gl3.h + + if(OPENGLES_FOUND) + set(OPENGLES_LIBRARIES ${OPENGLES_gl_LIBRARY}) +- if(OPENGLES3_INCLUDE_DIR) ++ if(OPENGLES3x_INCLUDE_DIR) + set(OPENGLES_INCLUDE_DIRS ${OPENGLES_INCLUDE_DIR} ${OPENGLES3_INCLUDE_DIR}) + set(OPENGLES_DEFINITIONS -DHAS_GLES=3) + mark_as_advanced(OPENGLES_INCLUDE_DIR OPENGLES3_INCLUDE_DIR OPENGLES_gl_LIBRARY) +-- +2.20.1 + + +From 3cb1b595f36b6f6ba8befad52912ab245b91c0ab Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Tue, 4 Jun 2019 13:38:44 +0100 +Subject: [PATCH 02/14] ffmpeg: hevc: Add gpu based hevc optimisation + +--- + tools/depends/target/ffmpeg/Makefile | 6 +- + .../ffmpeg/pfcd_hevc_optimisations.patch | 4102 +++++++++++++++++ + 2 files changed, 4107 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch + +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index 47acb73bdd..0bb9eb375e 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -1,6 +1,7 @@ + include ../../Makefile.include + include FFMPEG-VERSION +-DEPS= ../../Makefile.include FFMPEG-VERSION Makefile ++DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ ++ pfcd_hevc_optimisations.patch \ + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -68,6 +69,8 @@ ifeq ($(Configuration), Release) + ffmpg_config += --disable-debug + endif + ++ffmpg_config += --enable-rpi ++ + all: .installed-$(PLATFORM) + + $(TARBALLS_LOCATION)/$(ARCHIVE): +@@ -77,6 +80,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM); mkdir -p $(PLATFORM) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); sed -i".bak" -e "s%pkg_config_default=pkg-config%export PKG_CONFIG_LIBDIR=$(PREFIX)/lib/pkgconfig \&\& pkg_config_default=$(NATIVEPREFIX)/bin/pkg-config%" configure ++ cd $(PLATFORM); patch -p1 < ../pfcd_hevc_optimisations.patch + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ + ./configure $(ffmpg_config) +diff --git a/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch b/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch +new file mode 100644 +index 0000000000..fbdc27f516 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch +@@ -0,0 +1,4102 @@ ++diff --git a/configure b/configure ++index 172611bb4a..fa204fca71 100755 ++--- a/configure +++++ b/configure ++@@ -1782,6 +1782,8 @@ HWACCEL_LIBRARY_LIST=" ++ mmal ++ omx ++ opencl +++ rpi4_8 +++ rpi4_10 ++ " ++ ++ DOCUMENT_LIST=" ++@@ -1843,6 +1845,7 @@ SUBSYSTEM_LIST=" ++ pixelutils ++ network ++ rdft +++ rpi ++ " ++ ++ # COMPONENT_LIST needs to come last to ensure correct dependency checking ++@@ -2312,6 +2315,7 @@ CONFIG_EXTRA=" ++ rangecoder ++ riffdec ++ riffenc +++ rpi ++ rtpdec ++ rtpenc_chain ++ rv34dsp ++diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c ++index c0214c42d8..3f43b58cbb 100644 ++--- a/fftools/ffmpeg.c +++++ b/fftools/ffmpeg.c ++@@ -23,6 +23,11 @@ ++ * multimedia converter based on the FFmpeg libraries ++ */ ++ +++#ifdef RPI +++//#define RPI_DISPLAY +++#define RPI_DISPLAY_ALL 0 +++#endif +++ ++ #include "config.h" ++ #include ++ #include ++@@ -70,6 +75,24 @@ ++ # include "libavfilter/buffersrc.h" ++ # include "libavfilter/buffersink.h" ++ +++#ifdef RPI_DISPLAY +++#pragma GCC diagnostic push +++// Many many redundant decls in the header files +++#pragma GCC diagnostic ignored "-Wredundant-decls" +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#pragma GCC diagnostic pop +++#include "libavcodec/rpi_qpu.h" +++#include "libavcodec/rpi_zc.h" +++#endif +++ ++ #if HAVE_SYS_RESOURCE_H ++ #include ++ #include ++@@ -162,6 +185,247 @@ static int restore_tty; ++ static void free_input_threads(void); ++ #endif ++ +++#ifdef RPI_DISPLAY +++ +++#define NUM_BUFFERS 4 +++ +++ +++typedef struct rpi_display_env_s +++{ +++ MMAL_COMPONENT_T* display; +++ MMAL_COMPONENT_T* isp; +++ MMAL_PORT_T * port_in; // Input port of either isp or display depending on pipe setup +++ MMAL_CONNECTION_T * conn; +++ +++ MMAL_POOL_T *rpi_pool; +++ volatile int rpi_display_count; +++ enum AVPixelFormat avfmt; +++} rpi_display_env_t; +++ +++static rpi_display_env_t * rpi_display_env = NULL; +++ +++ +++static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port) +++{ +++ MMAL_POOL_T* pool; +++ mmal_port_parameter_set_boolean(port, MMAL_PARAMETER_ZERO_COPY, MMAL_TRUE); // Does this mark that the buffer contains a vc_handle? Would have expected a vc_image? +++ pool = mmal_port_pool_create(port, NUM_BUFFERS, 0); +++ assert(pool); +++ +++ return pool; +++} +++ +++static void display_cb_input(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) { +++ rpi_display_env_t *const de = (rpi_display_env_t *)port->userdata; +++ av_rpi_zc_unref(buffer->user_data); +++ atomic_fetch_add(&de->rpi_display_count, -1); +++ mmal_buffer_header_release(buffer); +++} +++ +++static void display_cb_control(MMAL_PORT_T *port,MMAL_BUFFER_HEADER_T *buffer) { +++ mmal_buffer_header_release(buffer); +++} +++ +++#define DISPLAY_PORT_DEPTH 4 +++ +++static rpi_display_env_t * +++display_init(const enum AVPixelFormat req_fmt, size_t x, size_t y, size_t w, size_t h) +++{ +++ MMAL_STATUS_T err; +++ MMAL_DISPLAYREGION_T region = +++ { +++ .hdr = {MMAL_PARAMETER_DISPLAYREGION, sizeof(region)}, +++ .set = MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_DEST_RECT, +++ .layer = 2, +++ .fullscreen = 0, +++ .dest_rect = {x, y, w, h} +++ }; +++#if RPI_ZC_SAND_8_IN_10_BUF +++ const enum AVPixelFormat fmt = (req_fmt == AV_PIX_FMT_YUV420P10 || av_rpi_is_sand_format(req_fmt)) ? AV_PIX_FMT_SAND128 : req_fmt; +++#else +++ const enum AVPixelFormat fmt = (req_fmt == AV_PIX_FMT_YUV420P10) ? AV_PIX_FMT_SAND128 : req_fmt; +++#endif +++ const AVRpiZcFrameGeometry geo = av_rpi_zc_frame_geometry(fmt, w, h); +++ rpi_display_env_t * de; +++ int isp_req = (fmt == AV_PIX_FMT_SAND64_10); +++ +++ bcm_host_init(); // Needs to be done by someone... +++ +++ if ((de = av_mallocz(sizeof(*de))) == NULL) { +++ return NULL; +++ } +++ +++ mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &de->display); +++ av_assert0(de->display); +++ de->port_in = de->display->input[0]; +++ +++ if (isp_req) +++ { +++ mmal_component_create("vc.ril.isp", &de->isp); +++ de->port_in = de->isp->input[0]; +++ } +++ +++ mmal_port_parameter_set(de->display->input[0], ®ion.hdr); +++ +++ { +++ MMAL_PORT_T * const port = de->port_in; +++ MMAL_ES_FORMAT_T* const format = port->format; +++ port->userdata = (struct MMAL_PORT_USERDATA_T *)de; +++ port->buffer_num = DISPLAY_PORT_DEPTH; +++ format->encoding = +++ fmt == AV_PIX_FMT_SAND128 ? MMAL_ENCODING_YUVUV128 : +++ fmt == AV_PIX_FMT_RPI4_8 ? MMAL_ENCODING_YUVUV128 : +++ fmt == AV_PIX_FMT_RPI4_10 ? MMAL_ENCODING_YUV10_COL : +++ fmt == AV_PIX_FMT_SAND64_10 ? MMAL_ENCODING_YUVUV64_16 : +++ MMAL_ENCODING_I420; +++ format->es->video.width = geo.stride_y; +++ format->es->video.height = (fmt == AV_PIX_FMT_SAND128 || +++ fmt == AV_PIX_FMT_RPI4_8 || +++ fmt == AV_PIX_FMT_RPI4_10 || +++ fmt == AV_PIX_FMT_SAND64_10) ? +++ (h + 15) & ~15 : geo.height_y; // Magic +++ format->es->video.crop.x = 0; +++ format->es->video.crop.y = 0; +++ format->es->video.crop.width = w; +++ format->es->video.crop.height = h; +++ mmal_port_format_commit(port); +++ } +++ +++ de->rpi_pool = display_alloc_pool(de->port_in); +++ mmal_port_enable(de->port_in,display_cb_input); +++ +++ if (isp_req) { +++ MMAL_PORT_T * const port_out = de->isp->output[0]; +++ mmal_log_dump_port(de->port_in); +++ mmal_format_copy(port_out->format, de->port_in->format); +++ if (fmt == AV_PIX_FMT_SAND64_10) { +++ if ((err = mmal_port_parameter_set_int32(de->port_in, MMAL_PARAMETER_CCM_SHIFT, 5)) != MMAL_SUCCESS || +++ (err = mmal_port_parameter_set_int32(port_out, MMAL_PARAMETER_OUTPUT_SHIFT, 1)) != MMAL_SUCCESS) +++ { +++ av_log(NULL, AV_LOG_WARNING, "Failed to set ISP output port shift\n"); +++ } +++ else +++ av_log(NULL, AV_LOG_WARNING, "Set ISP output port shift OK\n"); +++ +++ } +++ port_out->format->encoding = MMAL_ENCODING_I420; +++ mmal_log_dump_port(port_out); +++ if ((err = mmal_port_format_commit(port_out)) != MMAL_SUCCESS) +++ { +++ av_log(NULL, AV_LOG_ERROR, "Failed to set ISP output port format\n"); +++ goto fail; +++ } +++ if ((err = mmal_connection_create(&de->conn, port_out, de->display->input[0], MMAL_CONNECTION_FLAG_TUNNELLING)) != MMAL_SUCCESS) { +++ av_log(NULL, AV_LOG_ERROR, "Failed to create connection\n"); +++ goto fail; +++ } +++ if ((err = mmal_connection_enable(de->conn)) != MMAL_SUCCESS) { +++ av_log(NULL, AV_LOG_ERROR, "Failed to enable connection\n"); +++ goto fail; +++ } +++ mmal_port_enable(de->isp->control,display_cb_control); +++ mmal_component_enable(de->isp); +++ } +++ +++ mmal_component_enable(de->display); +++ mmal_port_enable(de->display->control,display_cb_control); +++ de->avfmt = fmt; +++ +++ printf("Allocated display %dx%d in %dx%d, fmt=%d\n", w, h, geo.stride_y, geo.height_y, fmt); +++ +++ return de; +++ +++fail: +++ // **** Free stuff +++ return NULL; +++} +++ +++static void display_frame(struct AVCodecContext * const s, rpi_display_env_t * const de, const AVFrame* const fr) +++{ +++ MMAL_BUFFER_HEADER_T* buf; +++ +++ if (de == NULL) +++ return; +++ +++ if (atomic_load(&de->rpi_display_count) >= DISPLAY_PORT_DEPTH - 1) { +++ av_log(s, AV_LOG_VERBOSE, "Frame dropped\n"); +++ return; +++ } +++ +++ buf = mmal_queue_get(de->rpi_pool->queue); +++ if (!buf) { +++ // Running too fast so drop the frame +++ printf("Q alloc failure\n"); +++ return; +++ } +++ assert(buf); +++ buf->cmd = 0; +++ buf->offset = 0; // Offset to valid data +++ buf->flags = 0; +++ { +++ const AVRpiZcRefPtr fr_buf = av_rpi_zc_ref(s, fr, de->avfmt, 1); +++ if (fr_buf == NULL) { +++ mmal_buffer_header_release(buf); +++ return; +++ } +++ +++ buf->user_data = fr_buf; +++ buf->data = (uint8_t *)av_rpi_zc_vc_handle(fr_buf); // Cast our handle to a pointer for mmal +++ buf->offset = av_rpi_zc_offset(fr_buf); +++ buf->length = av_rpi_zc_length(fr_buf); +++ buf->alloc_size = av_rpi_zc_numbytes(fr_buf); +++ atomic_fetch_add(&de->rpi_display_count, 1); +++ } +++#if RPI_DISPLAY_ALL +++ while (atomic_load(&de->rpi_display_count) >= DISPLAY_PORT_DEPTH - 1) { +++ usleep(5000); +++ } +++#endif +++ +++ if (mmal_port_send_buffer(de->port_in, buf) != MMAL_SUCCESS) +++ { +++ av_log(s, AV_LOG_ERROR, "mmal_port_send_buffer failed: depth=%d\n", de->rpi_display_count); +++ display_cb_input(de->port_in, buf); +++ } +++} +++ +++static void display_exit(rpi_display_env_t ** const pde) +++{ +++ rpi_display_env_t * const de = *pde; +++ *pde = NULL; +++ +++ if (de != NULL) { +++// sleep(120); +++ +++ if (de->port_in != NULL) { +++ mmal_port_disable(de->port_in); +++ } +++ +++ // The above disable should kick out all buffers - check that +++ if (atomic_load(&de->rpi_display_count) != 0) { +++ av_log(NULL, AV_LOG_WARNING, "Exiting with display count non-zero:%d\n", atomic_load(&de->rpi_display_count)); +++ } +++ +++ if (de->conn != NULL) { +++ mmal_connection_destroy(de->conn); +++ } +++ if (de->isp != NULL) { +++ mmal_component_destroy(de->isp); +++ } +++ if (de->display != NULL) { +++ mmal_component_destroy(de->display); +++ } +++ if (de->rpi_pool != NULL) { +++ mmal_port_pool_destroy(de->display->input[0], de->rpi_pool); +++ } +++ +++ av_free(de); +++ } +++} +++ +++#endif +++ +++ ++ /* sub2video hack: ++ Convert subtitles to video with alpha to insert them in filter graphs. ++ This is a temporary solution until libavfilter gets real subtitles support. ++@@ -583,6 +847,11 @@ static void ffmpeg_cleanup(int ret) ++ avformat_close_input(&input_files[i]->ctx); ++ av_freep(&input_files[i]); ++ } +++ +++#ifdef RPI_DISPLAY +++ display_exit(&rpi_display_env); +++#endif +++ ++ for (i = 0; i < nb_input_streams; i++) { ++ InputStream *ist = input_streams[i]; ++ ++@@ -594,7 +863,9 @@ static void ffmpeg_cleanup(int ret) ++ av_freep(&ist->filters); ++ av_freep(&ist->hwaccel_device); ++ av_freep(&ist->dts_buffer); ++- +++#ifdef RPI_DISPLAY +++ av_rpi_zc_uninit(ist->dec_ctx); +++#endif ++ avcodec_free_context(&ist->dec_ctx); ++ ++ av_freep(&input_streams[i]); ++@@ -625,6 +896,7 @@ static void ffmpeg_cleanup(int ret) ++ } ++ term_exit(); ++ ffmpeg_exited = 1; +++ ++ } ++ ++ void remove_avoptions(AVDictionary **a, AVDictionary *b) ++@@ -1060,6 +1332,15 @@ static void do_video_out(OutputFile *of, ++ if (ost->source_index >= 0) ++ ist = input_streams[ost->source_index]; ++ +++#ifdef RPI_DISPLAY +++ if (next_picture && ist != NULL) +++ { +++ if (rpi_display_env == NULL) +++ rpi_display_env = display_init(next_picture->format, 0, 0, next_picture->width, next_picture->height); +++ display_frame(ist->dec_ctx, rpi_display_env, next_picture); +++ } +++#endif +++ ++ frame_rate = av_buffersink_get_frame_rate(filter); ++ if (frame_rate.num > 0 && frame_rate.den > 0) ++ duration = 1/(av_q2d(frame_rate) * av_q2d(enc->time_base)); ++@@ -1275,7 +1556,7 @@ static void do_video_out(OutputFile *of, ++ ++ ost->frames_encoded++; ++ ++- ret = avcodec_send_frame(enc, in_picture); +++ ret = 0;//avcodec_send_frame(enc, in_picture); ++ if (ret < 0) ++ goto error; ++ ++@@ -2891,6 +3172,12 @@ static int init_input_stream(int ist_index, char *error, int error_len) ++ ist->dec_ctx->opaque = ist; ++ ist->dec_ctx->get_format = get_format; ++ ist->dec_ctx->get_buffer2 = get_buffer; +++ +++#ifdef RPI_DISPLAY +++ // Overrides the above get_buffer2 +++ av_rpi_zc_init(ist->dec_ctx); +++#endif +++ ++ ist->dec_ctx->thread_safe_callbacks = 1; ++ ++ av_opt_set_int(ist->dec_ctx, "refcounted_frames", 1, 0); ++diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h ++index d44b7a5c72..0c5fa38f1d 100644 ++--- a/fftools/ffmpeg.h +++++ b/fftools/ffmpeg.h ++@@ -62,6 +62,7 @@ enum HWAccelID { ++ HWACCEL_VIDEOTOOLBOX, ++ HWACCEL_QSV, ++ HWACCEL_CUVID, +++ HWACCEL_RPI, ++ }; ++ ++ typedef struct HWAccel { ++@@ -654,6 +655,7 @@ int ffmpeg_parse_options(int argc, char **argv); ++ int videotoolbox_init(AVCodecContext *s); ++ int qsv_init(AVCodecContext *s); ++ int cuvid_init(AVCodecContext *s); +++int rpi_init(AVCodecContext *s); ++ ++ HWDevice *hw_device_get_by_name(const char *name); ++ int hw_device_init_from_string(const char *arg, HWDevice **dev); ++diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c ++index d7a7eb0662..4ee87e742b 100644 ++--- a/fftools/ffmpeg_opt.c +++++ b/fftools/ffmpeg_opt.c ++@@ -74,6 +74,10 @@ const HWAccel hwaccels[] = { ++ #endif ++ #if CONFIG_CUVID ++ { "cuvid", cuvid_init, HWACCEL_CUVID, AV_PIX_FMT_CUDA }, +++#endif +++#if CONFIG_RPI +++ { "rpi", rpi_init, HWACCEL_RPI, AV_PIX_FMT_RPI4_8 }, +++ { "rpi", rpi_init, HWACCEL_RPI, AV_PIX_FMT_RPI4_10 }, ++ #endif ++ { 0 }, ++ }; ++diff --git a/libavcodec/Makefile b/libavcodec/Makefile ++index 4b8ad121db..40ec4691ef 100644 ++--- a/libavcodec/Makefile +++++ b/libavcodec/Makefile ++@@ -6,6 +6,10 @@ HEADERS = ac3_parser.h \ ++ avcodec.h \ ++ avdct.h \ ++ avfft.h \ +++ rpi_qpu.h \ +++ rpi_mailbox.h \ +++ rpi_zc.h \ +++ rpi_ctrl_ffmpeg.h \ ++ d3d11va.h \ ++ dirac.h \ ++ dv_profile.h \ ++@@ -48,6 +52,10 @@ OBJS = ac3_parser.o \ ++ qsv_api.o \ ++ raw.o \ ++ utils.o \ +++ rpi_qpu.o \ +++ rpi_mailbox.o \ +++ rpi_zc.o \ +++ rpi_ctrl_ffmpeg.o \ ++ vorbis_parser.o \ ++ xiph.o \ ++ ++@@ -360,6 +368,7 @@ OBJS-$(CONFIG_HAP_ENCODER) += hapenc.o hap.o ++ OBJS-$(CONFIG_HEVC_DECODER) += hevcdec.o hevc_mvs.o \ ++ hevc_cabac.o hevc_refs.o hevcpred.o \ ++ hevcdsp.o hevc_filter.o hevc_data.o +++OBJS-$(CONFIG_RPI) += rpi_hevc.o ++ OBJS-$(CONFIG_HEVC_AMF_ENCODER) += amfenc_hevc.o ++ OBJS-$(CONFIG_HEVC_CUVID_DECODER) += cuviddec.o ++ OBJS-$(CONFIG_HEVC_MEDIACODEC_DECODER) += mediacodecdec.o ++diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h ++index fb0c6fae70..798d0903eb 100644 ++--- a/libavcodec/avcodec.h +++++ b/libavcodec/avcodec.h ++@@ -3208,7 +3208,13 @@ typedef struct AVCodecContext { ++ #endif ++ ++ /** ++- * Audio only. The amount of padding (in samples) appended by the encoder to +++ * Opaque pointer for use by replacement get_buffer2 code +++ * +++ * @author jc (08/02/2016) +++ */ +++ void * get_buffer_context; +++ +++ /* Audio only. The amount of padding (in samples) appended by the encoder to ++ * the end of the audio. I.e. this number of decoded samples must be ++ * discarded by the caller from the end of the stream to get the original ++ * audio without any trailing padding. ++diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c ++index c8877626d2..da769116ec 100644 ++--- a/libavcodec/hevcdec.c +++++ b/libavcodec/hevcdec.c ++@@ -364,12 +364,17 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) ++ CONFIG_HEVC_NVDEC_HWACCEL + \ ++ CONFIG_HEVC_VAAPI_HWACCEL + \ ++ CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \ +++ CONFIG_HEVC_RPI4_8_HWACCEL + \ +++ CONFIG_HEVC_RPI4_10_HWACCEL + \ ++ CONFIG_HEVC_VDPAU_HWACCEL) ++ enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts; ++ ++ switch (sps->pix_fmt) { ++ case AV_PIX_FMT_YUV420P: ++ case AV_PIX_FMT_YUVJ420P: +++#if CONFIG_HEVC_RPI4_8_HWACCEL +++ *fmt++ = AV_PIX_FMT_RPI4_8; +++#endif ++ #if CONFIG_HEVC_DXVA2_HWACCEL ++ *fmt++ = AV_PIX_FMT_DXVA2_VLD; ++ #endif ++@@ -391,6 +396,9 @@ static enum AVPixelFormat get_format(HEVCContext *s, const HEVCSPS *sps) ++ #endif ++ break; ++ case AV_PIX_FMT_YUV420P10: +++#if CONFIG_HEVC_RPI4_10_HWACCEL +++ *fmt++ = AV_PIX_FMT_RPI4_10; +++#endif ++ #if CONFIG_HEVC_DXVA2_HWACCEL ++ *fmt++ = AV_PIX_FMT_DXVA2_VLD; ++ #endif ++@@ -3556,6 +3564,12 @@ AVCodec ff_hevc_decoder = { ++ #endif ++ #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL ++ HWACCEL_VIDEOTOOLBOX(hevc), +++#endif +++#if CONFIG_HEVC_RPI4_8_HWACCEL +++ HWACCEL_RPI4_8(hevc), +++#endif +++#if CONFIG_HEVC_RPI4_10_HWACCEL +++ HWACCEL_RPI4_10(hevc), ++ #endif ++ NULL ++ }, ++diff --git a/libavcodec/hwaccel.h b/libavcodec/hwaccel.h ++index 3aaa92571c..c6bc36b3e3 100644 ++--- a/libavcodec/hwaccel.h +++++ b/libavcodec/hwaccel.h ++@@ -80,5 +80,9 @@ typedef struct AVCodecHWConfigInternal { ++ HW_CONFIG_HWACCEL(0, 0, 1, D3D11VA_VLD, NONE, ff_ ## codec ## _d3d11va_hwaccel) ++ #define HWACCEL_XVMC(codec) \ ++ HW_CONFIG_HWACCEL(0, 0, 1, XVMC, NONE, ff_ ## codec ## _xvmc_hwaccel) +++#define HWACCEL_RPI4_8(codec) \ +++ HW_CONFIG_HWACCEL(0, 0, 1, RPI4_8, NONE, ff_ ## codec ## _rpi4_8_hwaccel) +++#define HWACCEL_RPI4_10(codec) \ +++ HW_CONFIG_HWACCEL(0, 0, 1, RPI4_10, NONE, ff_ ## codec ## _rpi4_10_hwaccel) ++ ++ #endif /* AVCODEC_HWACCEL_H */ ++diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h ++index 7d73da8676..01799f869a 100644 ++--- a/libavcodec/hwaccels.h +++++ b/libavcodec/hwaccels.h ++@@ -74,5 +74,7 @@ extern const AVHWAccel ff_wmv3_dxva2_hwaccel; ++ extern const AVHWAccel ff_wmv3_nvdec_hwaccel; ++ extern const AVHWAccel ff_wmv3_vaapi_hwaccel; ++ extern const AVHWAccel ff_wmv3_vdpau_hwaccel; +++extern const AVHWAccel ff_hevc_rpi4_8_hwaccel; +++extern const AVHWAccel ff_hevc_rpi4_10_hwaccel; ++ ++ #endif /* AVCODEC_HWACCELS_H */ ++diff --git a/libavcodec/rpi_ctrl_ffmpeg.c b/libavcodec/rpi_ctrl_ffmpeg.c ++new file mode 100644 ++index 0000000000..6d93adba03 ++--- /dev/null +++++ b/libavcodec/rpi_ctrl_ffmpeg.c ++@@ -0,0 +1,427 @@ +++#include +++#include +++#include +++#include +++ +++// How to access GPIO registers from C-code on the Raspberry-Pi +++// Example program +++// 15-January-2012 +++// Dom and Gert +++ +++// Access from ARM Running Linux +++ +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++ +++#include +++#include +++#include +++#include +++#include "rpi_mailbox.h" +++#include "rpi_ctrl_ffmpeg.h" +++ +++#define av_assert0(x) assert(x) +++ +++// argon block doesn't see VC sdram alias bits +++#define MANGLE(x) ((x) &~0xc0000000) +++#ifdef AXI_BUFFERS +++#define AXI_MEM_SIZE (64*1024*1024) +++#else +++#define AXI_MEM_SIZE (64*1024*1024) +++#endif +++ +++#define PAGE_SIZE (4*1024) +++#define BLOCK_SIZE (0x10000) +++#define CACHED 0 +++#define VERBOSE 0 +++ +++static inline void __DMB2(void) {}//{ asm volatile ("dmb" ::: "memory"); } +++ +++ +++// GPU memory alloc fns (internal) +++typedef struct gpu_mem_ptr_s { +++ unsigned char *arm; // Pointer to memory mapped on ARM side +++ int vc_handle; // Videocore handle of relocatable memory +++ int vcsm_handle; // Handle for use by VCSM +++ unsigned int vc; // Address for use in GPU code +++ unsigned int numbytes; // Size of memory block +++} GPU_MEM_PTR_T; +++ +++typedef enum +++{ +++ RPI_CACHE_FLUSH_MODE_INVALIDATE = 1, +++ RPI_CACHE_FLUSH_MODE_WRITEBACK = 2, +++ RPI_CACHE_FLUSH_MODE_WB_INVALIDATE = 3 +++} rpi_cache_flush_mode_t; +++ +++// GPU_MEM_PTR_T alloc fns +++static int gpu_malloc_cached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) { +++ p->numbytes = (numbytes + 255) & ~255; // Round up +++ p->vcsm_handle = vcsm_malloc_cache(p->numbytes, VCSM_CACHE_TYPE_HOST | 0x80, (char *)"Video Frame" ); +++ av_assert0(p->vcsm_handle); +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ av_assert0(p->vc_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ av_assert0(p->arm); +++ p->vc = mbox_mem_lock(mb, p->vc_handle); +++ av_assert0(p->vc); +++ printf("***** %s, %d\n", __func__, numbytes); +++ +++ return 0; +++} +++ +++static int gpu_malloc_uncached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) { +++ p->numbytes = numbytes; +++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE | 0x80, (char *)"Video Frame" ); +++ av_assert0(p->vcsm_handle); +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ av_assert0(p->vc_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ av_assert0(p->arm); +++ p->vc = mbox_mem_lock(mb, p->vc_handle); +++ av_assert0(p->vc); +++ printf("***** %s, %d\n", __func__, numbytes); +++ return 0; +++} +++ +++static void gpu_free_internal(const int mb, GPU_MEM_PTR_T * const p) { +++ mbox_mem_unlock(mb, p->vc_handle); +++ vcsm_unlock_ptr(p->arm); +++ vcsm_free(p->vcsm_handle); +++ memset(p, 0, sizeof(*p)); // Ensure we crash hard if we try and use this again +++ printf("***** %s\n", __func__); +++} +++ +++static void gpu_clean_invalidate(GPU_MEM_PTR_T * const p, int mode) { +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = mode; +++ iocache.s[0].addr = (int) p->arm; +++ iocache.s[0].size = p->numbytes; +++ vcsm_clean_invalid( &iocache ); +++ printf("***** %s mode:%d\n", __func__, mode); +++} +++ +++// +++// Set up a memory regions to access periperhals +++// +++static void *setup_io(const char *dev, unsigned long base) +++{ +++ void *gpio_map; +++ int mem_fd; +++ +++ /* open /dev/mem */ +++ if ((mem_fd = open(dev, O_RDWR|O_SYNC) ) < 0) { +++ printf("can't open %s\n", dev); +++ exit (-1); +++ } +++ // Now map it +++ gpio_map = (unsigned char *)mmap( +++ NULL, +++ BLOCK_SIZE, +++ PROT_READ|PROT_WRITE, +++ MAP_SHARED, +++ mem_fd, +++ base +++ ); +++ printf("%s: %08lx -> %p (fd:%d)\n", __FUNCTION__, base, gpio_map, mem_fd); +++ +++ if (gpio_map == MAP_FAILED) { +++ printf("mmap error %p\n", gpio_map); +++ //exit (-1); +++ } +++ +++ return gpio_map; +++} // setup_io +++ +++static void release_io(void *gpio_map) +++{ +++ int s = munmap(gpio_map, BLOCK_SIZE); +++ assert(s == 0); +++} +++ +++struct RPI_DEBUG { +++ FILE *fp_reg; +++ FILE *fp_bin; +++ int mbox; +++ GPU_MEM_PTR_T axi; +++ void *read_buf; +++ int32_t read_buf_size, read_buf_used; +++ volatile unsigned int *apb; +++ volatile unsigned int *interrupt; +++ //volatile unsigned int *sdram; +++}; +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++void rpi_apb_write_addr(void *id, uint16_t addr, uint32_t data) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ if (VERBOSE) +++ fprintf(rpi->fp_reg, "P %x %08x\n", addr, data); +++ __DMB2(); +++ rpi->apb[addr>>2] = data + (MANGLE(rpi->axi.vc)>>6); +++} +++ +++uint64_t rpi_axi_get_addr(void *id) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ return (uint64_t)MANGLE(rpi->axi.vc); +++} +++ +++void rpi_apb_write(void *id, uint16_t addr, uint32_t data) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ if (VERBOSE) +++ fprintf(rpi->fp_reg, "W %x %08x\n", addr, data); +++ __DMB2(); +++ rpi->apb[addr>>2] = data; +++} +++ +++uint32_t rpi_apb_read(void *id, uint16_t addr) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ uint32_t v = rpi->apb[addr>>2]; +++ __DMB2(); +++ if (VERBOSE) +++ fprintf(rpi->fp_reg, "R %x (=%x)\n", addr, v); +++ return v; +++} +++ +++void rpi_apb_read_drop(void *id, uint16_t addr) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ uint32_t v = rpi->apb[addr>>2]; +++ __DMB2(); +++ if (VERBOSE) +++ fprintf(rpi->fp_reg, "R %x (=%x)\n", addr, v); +++} +++ +++void rpi_axi_write(void *id, uint64_t addr, uint32_t size, void *buf) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ if (VERBOSE) +++ fprintf(rpi->fp_reg, "L %08" PRIx64 " %08x\n", addr, size); +++ assert(addr + size <= AXI_MEM_SIZE); +++ __DMB2(); +++ memcpy(rpi->axi.arm + addr, buf, size); +++} +++ +++void rpi_axi_read_alloc(void *id, uint32_t size) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ assert(rpi->read_buf == NULL); +++ rpi->read_buf = malloc(size); +++ rpi->read_buf_size = size; +++ rpi->read_buf_used = 0; +++} +++ +++void rpi_axi_read_tx(void *id, uint64_t addr, uint32_t size) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ assert(rpi->read_buf_used + size <= rpi->read_buf_size); +++ if (VERBOSE) +++ fprintf(rpi->fp_reg, "S %08" PRIx64 " %08x\n", addr, size); +++ assert(addr + size <= AXI_MEM_SIZE); +++ __DMB2(); +++ memcpy((char *)rpi->read_buf + rpi->read_buf_used, rpi->axi.arm + addr, size); +++ rpi->read_buf_used += size; +++} +++ +++void rpi_axi_read_rx(void *id, uint32_t size, void *buf) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ assert(size == rpi->read_buf_used); +++ fprintf(rpi->fp_reg, "Z " PRIx64 " %08x\n", size); +++ memcpy(buf, rpi->read_buf, size); +++ free(rpi->read_buf); +++ rpi->read_buf = NULL; +++ rpi->read_buf_size = 0; +++ rpi->read_buf_used = 0; +++} +++ +++static int getthreadnum(unsigned pid) +++{ +++ static unsigned pids[8]; +++ int i; +++ for (i = 0; i < 8; i++) +++ { +++ if (pids[i] == 0) +++ pids[i] = pid; +++ if (pids[i] == pid) +++ return i; +++ } +++ return -1; +++} +++ +++#define _NOP() //do { __asm__ __volatile__ ("nop"); } while (0) +++ +++static void yield(void) +++{ +++ int i; +++ for (i=0; i<0; i++) +++ _NOP(); +++ usleep(1000); +++} +++ +++ +++void rpi_wait_interrupt(void *id, int phase) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ static struct timespec tfirst={0,0}; +++ static __thread struct timespec tstart={0,0}; +++ struct timespec tend={0,0}; +++ unsigned pid = (unsigned)pthread_self(); +++ clock_gettime(CLOCK_MONOTONIC, &tend); +++ if (tstart.tv_sec == 0 && tstart.tv_nsec == 0) +++ tstart = tend; +++ if (tfirst.tv_sec == 0 && tfirst.tv_nsec == 0) +++ { +++ /*printf("%s: Resetting sdram stats\n", __FUNCTION__); +++ rpi->sdram[0x30/4] = 0;*/ +++ tfirst = tend; +++ } +++ if (VERBOSE) +++ printf("%08llu: %s: IN thread:%u phase:%d time:%llu\n", ((tend.tv_sec * 1000000000ULL + tend.tv_nsec) - (tfirst.tv_sec * 1000000000ULL + tfirst.tv_nsec))/1000, +++ __FUNCTION__, getthreadnum(pid), phase, ((tend.tv_sec * 1000000000ULL + tend.tv_nsec) - (tstart.tv_sec * 1000000000ULL + tstart.tv_nsec))/1000); +++ /*enum {IDL=0x30/4, RTC=0x34/4, WTC=0x38/4, RDC=0x3c/4, WDC=0x40/4, RAC=0x44/4, CYC=0x48/4, CMD=0x4c/4, DAT=0x50/4, RDCMD=0x78/4, RDSUB=0x7c/4, WRCMD=0x80/4, WRSUB=0x84/4, MWRCMD=0x88/4, MWRSUB=0x8c/4,}; +++ printf("IDL:%u RTC:%u WTC:%u RDC:%u WDC:%u RAC:%u CYC:%u CMD:%u DAT:%u RDCMD:%u RDSUB:%u WRCMD:%u WRSUB:%u MWRCMD:%u MWRSUB:%u\n", +++ rpi->sdram[IDL], rpi->sdram[RTC], rpi->sdram[WTC], rpi->sdram[RDC], rpi->sdram[WDC], rpi->sdram[RAC], rpi->sdram[CYC], rpi->sdram[CMD], rpi->sdram[DAT], +++ rpi->sdram[RDCMD], rpi->sdram[RDSUB], rpi->sdram[WRCMD], rpi->sdram[WRSUB], rpi->sdram[MWRCMD], rpi->sdram[MWRSUB]); +++ rpi->sdram[0x30/4] = 0;*/ +++ +++ if (VERBOSE) +++ fprintf(rpi->fp_reg, "I %d\n", phase); +++ __DMB2(); +++#if 0 +++ assert(phase == 1 || phase == 2); +++ for (;;) { +++ if (phase==1 && rpi->apb[0x74>>2]==rpi->apb[0x70>>2]) break; +++ else if (phase==2 && (rpi->apb[0x8028/*STATUS2*/>>2]&1)==0) break; +++ } +++ fprintf(rpi->fp_reg, "I %d done\n", phase); +++#else +++ #define ARG_IC_ICTRL_ACTIVE1_INT_SET 0x00000001 +++ #define ARG_IC_ICTRL_ACTIVE1_EDGE_SET 0x00000002 +++ #define ARG_IC_ICTRL_ACTIVE1_EN_SET 0x00000004 +++ #define ARG_IC_ICTRL_ACTIVE1_STATUS_SET 0x00000008 +++ #define ARG_IC_ICTRL_ACTIVE2_INT_SET 0x00000010 +++ #define ARG_IC_ICTRL_ACTIVE2_EDGE_SET 0x00000020 +++ #define ARG_IC_ICTRL_ACTIVE2_EN_SET 0x00000040 +++ #define ARG_IC_ICTRL_ACTIVE2_STATUS_SET 0x00000080 +++ //if (rpi->interrupt[0] &~ (ARG_IC_ICTRL_ACTIVE1_INT_SET|ARG_IC_ICTRL_ACTIVE2_INT_SET|ARG_IC_ICTRL_ACTIVE1_EDGE_SET|ARG_IC_ICTRL_ACTIVE2_EDGE_SET|ARG_IC_ICTRL_ACTIVE1_STATUS_SET|ARG_IC_ICTRL_ACTIVE2_STATUS_SET)) +++ //fprintf(rpi->fp_reg, "I %d %x in\n", phase, rpi->interrupt[0]); +++ +++ if (phase == 1) { +++ while (!(rpi->interrupt[0] & ARG_IC_ICTRL_ACTIVE1_INT_SET)) +++ yield(); +++ rpi->interrupt[0] = rpi->interrupt[0] &~ ARG_IC_ICTRL_ACTIVE2_INT_SET; //ARG_IC_ICTRL_ACTIVE1_INT_SET|ARG_IC_ICTRL_ACTIVE2_EDGE_SET|ARG_IC_ICTRL_ACTIVE2_EDGE_SET; +++ } else if (phase == 2) { +++ while (!(rpi->interrupt[0] & ARG_IC_ICTRL_ACTIVE2_INT_SET)) +++ yield(); +++ rpi->interrupt[0] = rpi->interrupt[0] &~ ARG_IC_ICTRL_ACTIVE1_INT_SET; //ARG_IC_ICTRL_ACTIVE2_INT_SET|ARG_IC_ICTRL_ACTIVE1_EDGE_SET|ARG_IC_ICTRL_ACTIVE2_EDGE_SET; +++ } else assert(0); +++#endif +++ //fprintf(rpi->fp_reg, "I %d %x out\n", phase, rpi->interrupt[0]); +++ if (phase == 2) +++ { +++ __DMB2(); +++ if (VERBOSE) +++ fprintf(rpi->fp_reg, "YBASE:%08x CBASE:%08x\n", rpi->apb[0x8018>>2]*64, rpi->apb[0x8020>>2]*64); +++ } +++ clock_gettime(CLOCK_MONOTONIC, &tend); +++ +++ if (VERBOSE) +++ printf("%08llu: %s: OUT thread:%u phase:%d time:%llu\n", ((tend.tv_sec * 1000000000ULL + tend.tv_nsec) - (tfirst.tv_sec * 1000000000ULL + tfirst.tv_nsec))/1000, +++ __FUNCTION__, getthreadnum(pid), phase, ((tend.tv_sec * 1000000000ULL + tend.tv_nsec) - (tstart.tv_sec * 1000000000ULL + tstart.tv_nsec))/1000); +++ /*printf("IDL:%u RTC:%u WTC:%u RDC:%u WDC:%u RAC:%u CYC:%u CMD:%u DAT:%u RDCMD:%u RDSUB:%u WRCMD:%u WRSUB:%u MWRCMD:%u MWRSUB:%u\n", +++ rpi->sdram[IDL], rpi->sdram[RTC], rpi->sdram[WTC], rpi->sdram[RDC], rpi->sdram[WDC], rpi->sdram[RAC], rpi->sdram[CYC], rpi->sdram[CMD], rpi->sdram[DAT], +++ rpi->sdram[RDCMD], rpi->sdram[RDSUB], rpi->sdram[WRCMD], rpi->sdram[WRSUB], rpi->sdram[MWRCMD], rpi->sdram[MWRSUB]);*/ +++ +++ tstart = tend; +++} +++ +++ +++void rpi_apb_dump_regs(void *id, uint16_t addr, int num) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ int i; +++ __DMB2(); +++ if (VERBOSE) +++ for (i=0; ifp_reg, "%08x: ", 0x7eb00000 + addr + 4*i); +++ fprintf(rpi->fp_reg, "%08x", rpi->apb[(addr>>2)+i]); +++ if ((i%4)==3 || i+1 == num) +++ fprintf(rpi->fp_reg, "\n"); +++ else +++ fprintf(rpi->fp_reg, " "); +++ } +++} +++ +++void rpi_axi_dump(void *id, uint64_t addr, uint32_t size) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ int i; +++ __DMB2(); +++ if (VERBOSE) +++ for (i=0; i>2; i++) +++ { +++ if ((i%4)==0) +++ fprintf(rpi->fp_reg, "%08x: ", MANGLE(rpi->axi.vc) + (uint32_t)addr + 4*i); +++ fprintf(rpi->fp_reg, "%08x", ((uint32_t*)rpi->axi.arm)[(addr>>2)+i]); +++ if ((i%4)==3 || i+1 == size>>2) +++ fprintf(rpi->fp_reg, "\n"); +++ else +++ fprintf(rpi->fp_reg, " "); +++ } +++} +++ +++void rpi_axi_flush(void *id, int mode) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ if (CACHED) +++ { +++ gpu_clean_invalidate(&rpi->axi, mode); +++ } +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++const char * rpi_ctrl_ffmpeg_init(const char *hwaccel_device, void **id) { +++ struct RPI_DEBUG *rpi = calloc(1, sizeof(struct RPI_DEBUG)); +++ (void) hwaccel_device; +++ printf("%s\n id=%p\n", __FUNCTION__, rpi); +++ +++ if (!rpi) return "out of memory"; +++ +++ bcm_host_init(); +++ vcsm_init(); +++ rpi->apb = setup_io("/dev/argon-hevcmem", 0); +++ rpi->interrupt = setup_io("/dev/argon-intcmem", 0); +++ //rpi->sdram = setup_io(0xfe001000); +++ +++ rpi->fp_bin = stderr; +++ rpi->fp_reg = stderr; +++ +++ rpi->mbox = mbox_open(); +++ if ((CACHED ? gpu_malloc_cached_internal:gpu_malloc_uncached_internal)(rpi->mbox, AXI_MEM_SIZE, &rpi->axi) != 0) +++ return "out of memory"; +++ +++ fprintf(rpi->fp_reg, "A 100000000 apb:%p axi.arm:%p axi.vc:%08x\n", rpi->apb, rpi->axi.arm, MANGLE(rpi->axi.vc)); +++ *id = rpi; +++ return 0; +++} +++ +++void rpi_ctrl_ffmpeg_free(void *id) { +++ struct RPI_DEBUG *rpi = (struct RPI_DEBUG *) id; +++ printf("%s id=%p\n", __FUNCTION__, rpi); +++ release_io(rpi->apb); +++ release_io(rpi->interrupt); +++ gpu_free_internal(rpi->mbox, &rpi->axi); +++ printf("%s freed axi mem\n", __FUNCTION__); +++ mbox_close(rpi->mbox); +++ printf("%s closed mbox\n", __FUNCTION__); +++ free(rpi); +++ printf("%s freed rpi\n", __FUNCTION__); +++ vcsm_exit(); +++ bcm_host_deinit(); +++} ++diff --git a/libavcodec/rpi_ctrl_ffmpeg.h b/libavcodec/rpi_ctrl_ffmpeg.h ++new file mode 100644 ++index 0000000000..6a1d95f195 ++--- /dev/null +++++ b/libavcodec/rpi_ctrl_ffmpeg.h ++@@ -0,0 +1,29 @@ +++// rpi_ctrl_ffmpeg.h +++// +++// This file contains prototypes for the functions used to control the socket +++// interface when using ffmpeg. +++// +++ +++#ifndef __CTRL_FFMPEG_H__ +++#define __CTRL_FFMPEG_H__ +++ +++#include +++ +++const char *rpi_ctrl_ffmpeg_init (const char *hwaccel_device, void **id); +++void rpi_apb_write_addr (void *id, uint16_t addr, uint32_t data); +++void rpi_apb_write (void *id, uint16_t addr, uint32_t data); +++uint32_t rpi_apb_read (void *id, uint16_t addr); +++void rpi_apb_read_drop (void *id, uint16_t addr); +++void rpi_axi_write (void *id, uint64_t addr, uint32_t size, void *buf); +++void rpi_axi_read (void *id, uint64_t addr, uint32_t size, void *buf); +++void rpi_axi_read_alloc (void *id, uint32_t size); +++void rpi_axi_read_tx (void *id, uint64_t addr, uint32_t size); +++void rpi_axi_read_rx (void *id, uint32_t size, void *buf); +++void rpi_wait_interrupt (void *id, int phase); +++void rpi_ctrl_ffmpeg_free (void *id); +++uint64_t rpi_axi_get_addr (void *id); +++void rpi_apb_dump_regs(void *id, uint16_t addr, int num); +++void rpi_axi_dump(void *id, uint64_t addr, uint32_t size); +++void rpi_axi_flush(void *id, int mode); +++ +++#endif // __CTRL_FILES_H__ ++diff --git a/libavcodec/rpi_hevc.c b/libavcodec/rpi_hevc.c ++new file mode 100644 ++index 0000000000..a000077f33 ++--- /dev/null +++++ b/libavcodec/rpi_hevc.c ++@@ -0,0 +1,1065 @@ +++// FFMPEG HEVC decoder hardware accelerator +++// Andrew Holme, Argon Design Ltd +++// Copyright (c) June 2017 Raspberry Pi Ltd +++ +++#include +++#include +++ +++#include "fftools/ffmpeg.h" +++#include "libavutil/avassert.h" +++#include "libavutil/imgutils.h" +++#include "avcodec.h" +++#include "hwaccel.h" +++ +++#include "rpi_hevc.h" +++#include "rpi_zc.h" +++#include "rpi_qpu.h" +++ +++#include "rpi_ctrl_ffmpeg.h" +++////////////////////////////////////////////////////////////////////////////// +++ +++// Array of constants for scaling factors +++static const uint32_t scaling_factor_offsets[4][6] = { +++ // MID0 MID1 MID2 MID3 MID4 MID5 +++ {0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050}, // SID0 (4x4) +++ {0x0060, 0x00A0, 0x00E0, 0x0120, 0x0160, 0x01A0}, // SID1 (8x8) +++ {0x01E0, 0x02E0, 0x03E0, 0x04E0, 0x05E0, 0x06E0}, // SID2 (16x16) +++ {0x07E0, 0, 0, 0x0BE0, 0, 0}}; // SID3 (32x32) +++ +++// ffmpeg places SID3,MID1 where matrixID 3 normally is +++ +++////////////////////////////////////////////////////////////////////////////// +++// Scaling factors +++ +++static void expand_scaling_list( +++ RPI_T *rpi, +++ const ScalingList *scaling_list, // scaling list structure from ffmpeg +++ uint8_t sizeID, uint8_t matrixID) +++{ +++ uint8_t x, y, i, blkSize = 4<>1)<<3) + (x>>1); break; +++ case 3: i = ((y>>2)<<3) + (x>>2); +++ } +++ rpi->scaling_factors[index] = scaling_list->sl[sizeID][matrixID][i]; +++ } +++ } +++ if (sizeID>1) +++ rpi->scaling_factors[index_offset] = +++ scaling_list->sl_dc[sizeID-2][matrixID]; +++} +++ +++static void populate_scaling_factors(RPI_T *rpi, HEVCContext *s) { +++ const ScalingList *sl = +++ s->ps.pps->scaling_list_data_present_flag ? &s->ps.pps->scaling_list +++ : &s->ps.sps->scaling_list; +++ int sid, mid; +++ for (sid=0; sid<3; sid++) +++ for (mid=0; mid<6; mid++) +++ expand_scaling_list(rpi, sl, sid, mid); +++ +++ // second scaling matrix for 32x32 is at matrixID 3 not 1 in ffmpeg +++ expand_scaling_list(rpi, sl, 3, 0); +++ expand_scaling_list(rpi, sl, 3, 3); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Probabilities +++ +++static void populate_prob_tables(RPI_T *rpi, HEVCContext *s) { +++ struct RPI_PROB *dst = &rpi->probabilities; +++ struct FFM_PROB *src = (struct FFM_PROB *) s->HEVClc->cabac_state; +++ #define PROB_CPSZ(to, from, sz) memcpy(dst->to, src->from, sz) +++ #define PROB_COPY(to, from) memcpy(dst->to, src->from, sizeof(dst->to)) +++ memset(dst, 0, sizeof(*dst)); +++ PROB_COPY(SAO_MERGE_FLAG , sao_merge_flag ); +++ PROB_COPY(SAO_TYPE_IDX , sao_type_idx ); +++ PROB_COPY(SPLIT_FLAG , split_coding_unit_flag ); +++ PROB_COPY(CU_SKIP_FLAG , skip_flag ); +++ PROB_COPY(CU_TRANSQUANT_BYPASS_FLAG, cu_transquant_bypass_flag ); +++ PROB_COPY(PRED_MODE , pred_mode_flag ); +++ PROB_COPY(PART_SIZE , part_mode ); +++ PROB_COPY(INTRA_PRED_MODE , prev_intra_luma_pred_flag ); +++ PROB_COPY(CHROMA_PRED_MODE , intra_chroma_pred_mode ); +++ PROB_COPY(MERGE_FLAG_EXT , merge_flag ); +++ PROB_COPY(MERGE_IDX_EXT , merge_idx ); +++ PROB_COPY(INTER_DIR , inter_pred_idc ); +++ PROB_COPY(REF_PIC , ref_idx_l0 ); +++ PROB_COPY(MVP_IDX , mvp_lx_flag ); +++ PROB_CPSZ(MVD+0 , abs_mvd_greater0_flag+0 , 1); // ABS_MVD_GREATER0_FLAG[1] not used +++ PROB_CPSZ(MVD+1 , abs_mvd_greater1_flag+1 , 1); // ABS_MVD_GREATER1_FLAG[0] not used +++ PROB_COPY(QT_ROOT_CBF , no_residual_data_flag ); +++ PROB_COPY(TRANS_SUBDIV_FLAG , split_transform_flag ); +++ PROB_CPSZ(QT_CBF , cbf_luma , 2); +++ PROB_CPSZ(QT_CBF+2 , cbf_cb_cr , 4); +++ PROB_COPY(DQP , cu_qp_delta ); +++ PROB_COPY(ONE_FLAG , coeff_abs_level_greater1_flag ); +++ PROB_COPY(LASTX , last_significant_coeff_x_prefix); +++ PROB_COPY(LASTY , last_significant_coeff_y_prefix); +++ PROB_COPY(SIG_CG_FLAG , significant_coeff_group_flag ); +++ PROB_COPY(ABS_FLAG , coeff_abs_level_greater2_flag ); +++ PROB_COPY(TRANSFORMSKIP_FLAG , transform_skip_flag ); +++ PROB_CPSZ(SIG_FLAG , significant_coeff_flag , 42); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Read YUV data from socket server +++ +++static int bytes_per_line(const HEVCSPS *sps, int jump, int x) { +++ int width = FFMIN(jump, sps->width - x); +++ return sps->bit_depth>8? (width>48? 128:64) +++ : (width>64? 128:64); +++} +++ +++static void read_rect(RPI_T *rpi, char *buf, int addr64, int height, int bytes_per_line) { +++ rpi->axi_read_alloc(rpi->id, bytes_per_line*height); +++ if (bytes_per_line==128) +++ rpi->axi_read_tx(rpi->id, ((uint64_t)addr64)<<6, 128*height); +++ else { +++ int y; +++ for (y=0; yaxi_read_tx(rpi->id, ((uint64_t)addr64)<<6, 64); +++ } +++ rpi->axi_read_rx(rpi->id, bytes_per_line*height, buf); +++} +++ +++#ifdef AXI_BUFFERS +++////////////////////////////////////////////////////////////////////////////// +++// Copy YUV output data to FFMPEG frame buffer +++ +++static void copy_luma(char *buf, int bpl, int height, int x, uint8_t *data, int linesize) { +++ int y; +++ for (y=0; y> 0)&0x3ff; if(++j==linesize/2) break; +++ dst[j] = (src[i]>>10)&0x3ff; if(++j==linesize/2) break; +++ dst[j] = (src[i]>>20)&0x3ff; if(++j==linesize/2) break; +++ } +++ } +++} +++ +++static void copy_chroma10(char *buf, int bpl, int height, int x, uint8_t *u8, uint8_t *v8, int linesize) { +++ int i, j, y; +++ for (y=0; y> 0)&0x3ff; +++ v16[j] = (src[i]>>10)&0x3ff; if(++j==linesize/2) break; +++ u16[j] = (src[i]>>20)&0x3ff; i++; +++ v16[j] = (src[i]>> 0)&0x3ff; if(++j==linesize/2) break; +++ u16[j] = (src[i]>>10)&0x3ff; +++ v16[j] = (src[i]>>20)&0x3ff; if(++j==linesize/2) break; +++ } +++ } +++} +++#endif +++ +++////////////////////////////////////////////////////////////////////////////// +++// Phase 1 command and bit FIFOs +++ +++static int p1_apb_write(RPI_T *rpi, uint16_t addr, uint32_t data) { +++ if (rpi->cmd_len==rpi->cmd_max) +++ av_assert0(rpi->cmd_fifo = realloc(rpi->cmd_fifo, (rpi->cmd_max*=2)*sizeof(struct RPI_CMD))); +++ rpi->cmd_fifo[rpi->cmd_len].addr = addr; +++ rpi->cmd_fifo[rpi->cmd_len].data = data; +++ return rpi->cmd_len++; +++} +++ +++static void p1_axi_write(RPI_T *rpi, uint32_t len, const void *ptr, int cmd_idx) { +++ if (rpi->bit_len==rpi->bit_max) +++ av_assert0(rpi->bit_fifo = realloc(rpi->bit_fifo, (rpi->bit_max*=2)*sizeof(struct RPI_BIT))); +++ rpi->bit_fifo[rpi->bit_len].cmd = cmd_idx; +++ rpi->bit_fifo[rpi->bit_len].ptr = ptr; +++ rpi->bit_fifo[rpi->bit_len].len = len; +++ rpi->bit_len++; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Write probability and scaling factor memories +++ +++static void WriteProb(RPI_T *rpi) { +++ int i; +++ uint8_t *p = (uint8_t *) &rpi->probabilities; +++ for (i=0; iscaling_factors; +++ for (i=0; i= bd[i]; i++); // bd[] has num+1 elements; bd[0]=0; see hevc_ps.c +++ return i-1; +++} +++ +++static int ctb_to_slice_w_h (unsigned int ctb, int ctb_size, int width, unsigned int *bd, int num) { +++ if (ctb < bd[num-1]) return ctb_size; +++ else if (width % ctb_size) return width % ctb_size; +++ else return ctb_size; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++static void alloc_picture_space(RPI_T *rpi, HEVCContext *s, int thread_idx) { +++ const HEVCSPS *sps = s->ps.sps; +++ int CtbSizeY = 1<log2_ctb_size; +++ int x64 = AXI_BASE64; +++ +++ rpi->PicWidthInCtbsY = (sps->width + CtbSizeY - 1) / CtbSizeY; //7-15 +++ rpi->PicHeightInCtbsY = (sps->height + CtbSizeY - 1) / CtbSizeY; //7-17 +++#ifdef AXI_BUFFERS +++ rpi->lumabytes64 = ((sps->height+64) * ((sps->width+95)/96) * 2); +++ rpi->framebytes64 = ((rpi->lumabytes64 * 3)/2); +++ rpi->lumastride64 = ((sps->height+64) * 128) / 64; +++ rpi->chromastride64 = (((sps->height+64) * 128 ) / 2) / 64; +++ +++ x64 += 17 * rpi->framebytes64; +++#endif +++ +++ // collocated reads/writes +++ if (sps->sps_temporal_mvp_enabled_flag) { +++ // 128 bits = 16 bytes per MV, one for every 16*16 +++ int collocatedStride64 = (rpi->PicWidthInCtbsY * (CtbSizeY/16) * 16 + 63)>>6; +++ rpi->mvframebytes64 = rpi->PicHeightInCtbsY * (CtbSizeY/16) * collocatedStride64; +++ rpi->mvstorage64 = x64; +++ x64 += rpi->mvframebytes64 * 17; // Leave space for 17 reference pictures +++ rpi->colstride64 = collocatedStride64; +++ rpi->mvstride64 = collocatedStride64; +++ } +++ +++ rpi->pubase64[0] = x64; +++} +++ +++static int alloc_stream_space(RPI_T *rpi, HEVCContext *s, int thread_idx) { +++ int stride64, x64 = rpi->pubase64[0]; +++ +++ stride64 = 1 + (rpi->max_pu_msgs*2*rpi->PicWidthInCtbsY)/64; +++ rpi->pubase64[thread_idx] = x64 + rpi->PicHeightInCtbsY*stride64 * thread_idx; +++ rpi->pustep64 = stride64; +++ x64 += rpi->PicHeightInCtbsY*stride64 * s->avctx->thread_count; +++ +++ stride64 = rpi->max_coeff64; +++ rpi->coeffbase64[thread_idx] = x64 + rpi->PicHeightInCtbsY*stride64 * thread_idx; +++ rpi->coeffstep64 = stride64; +++ x64 += rpi->PicHeightInCtbsY*stride64 * s->avctx->thread_count; +++ return x64; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Start or restart phase 1 +++ +++static void phase1_begin(RPI_T *rpi, HEVCContext *s, int thread_idx) { +++ rpi->apb_write_addr(rpi->id, RPI_PUWBASE, rpi->pubase64[thread_idx]); +++ rpi->apb_write(rpi->id, RPI_PUWSTRIDE, rpi->pustep64); +++ rpi->apb_write_addr(rpi->id, RPI_COEFFWBASE, rpi->coeffbase64[thread_idx]); +++ rpi->apb_write(rpi->id, RPI_COEFFWSTRIDE, rpi->coeffstep64); +++} +++ +++/////////////////////////////////////////////////////////////////////////////// +++// Wait until phase 2 idle +++ +++static void wait_idle(RPI_T *rpi, int last) { +++ for (;;) { +++ int order; +++ pthread_mutex_lock (&rpi->mutex_phase2); +++ order = rpi->phase2_order; +++ pthread_mutex_unlock(&rpi->mutex_phase2); +++ if (order==last) return; +++ } +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Handle PU and COEFF stream overflow +++ +++static int check_status(RPI_T *rpi) { +++ int status, c, p; +++ status = rpi->apb_read(rpi->id, RPI_STATUS); +++ p = (status>>4)&1; +++ c = (status>>3)&1; +++ if (p|c) { // overflow? +++ wait_idle(rpi, rpi->phase1_order-1); // drain phase2 before changing memory layout +++ if (p) rpi->max_pu_msgs += rpi->max_pu_msgs/2; +++ if (c) rpi->max_coeff64 += rpi->max_coeff64/2; +++ return 1; +++ } +++ return 0; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Write STATUS register with expected end CTU address of previous slice +++ +++static void end_previous_slice(RPI_T *rpi, HEVCContext *s, int ctb_addr_ts) { +++ const HEVCPPS *pps = s->ps.pps; +++ int last_x = pps->ctb_addr_ts_to_rs[ctb_addr_ts-1] % rpi->PicWidthInCtbsY; +++ int last_y = pps->ctb_addr_ts_to_rs[ctb_addr_ts-1] / rpi->PicWidthInCtbsY; +++ p1_apb_write(rpi, RPI_STATUS, 1 + (last_x<<5) + (last_y<<18)); +++} +++ +++static void wpp_pause(RPI_T *rpi, int ctb_row) { +++ p1_apb_write(rpi, RPI_STATUS, (ctb_row<<18) + 0x25); +++ p1_apb_write(rpi, RPI_TRANSFER, PROB_BACKUP); +++ p1_apb_write(rpi, RPI_MODE, ctb_row==rpi->PicHeightInCtbsY-1?0x70000:0x30000); +++ p1_apb_write(rpi, RPI_CONTROL, (ctb_row<<16) + 2); +++} +++ +++static void wpp_end_previous_slice(RPI_T *rpi, HEVCContext *s, int ctb_addr_ts) { +++ const HEVCPPS *pps = s->ps.pps; +++ int new_x = s->sh.slice_ctb_addr_rs % rpi->PicWidthInCtbsY; +++ int new_y = s->sh.slice_ctb_addr_rs / rpi->PicWidthInCtbsY; +++ int last_x = pps->ctb_addr_ts_to_rs[ctb_addr_ts-1] % rpi->PicWidthInCtbsY; +++ int last_y = pps->ctb_addr_ts_to_rs[ctb_addr_ts-1] / rpi->PicWidthInCtbsY; +++ if (rpi->wpp_entry_x<2 && (rpi->wpp_entry_y2) && rpi->PicWidthInCtbsY>2) wpp_pause(rpi, last_y); +++ p1_apb_write(rpi, RPI_STATUS, 1 + (last_x<<5) + (last_y<<18)); +++ if (new_x==2 || rpi->PicWidthInCtbsY==2 && rpi->wpp_entry_yps.sps; +++ const HEVCPPS *pps = s->ps.pps; +++ +++ p1_apb_write(rpi, RPI_SPS0, +++ (sps->log2_min_cb_size << 0) + +++ (sps->log2_ctb_size << 4) + +++ (sps->log2_min_tb_size << 8) + +++ (sps->log2_max_trafo_size << 12) + +++ (sps->bit_depth << 16) + +++ (sps->bit_depth << 20) + +++ (sps->max_transform_hierarchy_depth_intra << 24) + +++ (sps->max_transform_hierarchy_depth_inter << 28)); +++ +++ p1_apb_write(rpi, RPI_SPS1, +++ (sps->pcm.bit_depth << 0) + +++ (sps->pcm.bit_depth_chroma << 4) + +++ (sps->pcm.log2_min_pcm_cb_size << 8) + +++ (sps->pcm.log2_max_pcm_cb_size << 12) + +++ (sps->separate_colour_plane_flag? 0:sps->chroma_format_idc << 16) + +++ (sps->amp_enabled_flag << 18) + +++ (sps->pcm_enabled_flag << 19) + +++ (sps->scaling_list_enable_flag << 20) + +++ (sps->sps_strong_intra_smoothing_enable_flag << 21)); +++ +++ p1_apb_write(rpi, RPI_PPS, +++ (sps->log2_ctb_size - pps->diff_cu_qp_delta_depth << 0) + +++ (pps->cu_qp_delta_enabled_flag << 4) + +++ (pps->transquant_bypass_enable_flag << 5) + +++ (pps->transform_skip_enabled_flag << 6) + +++ (pps->sign_data_hiding_flag << 7) + +++ (((pps->cb_qp_offset + s->sh.slice_cb_qp_offset)&255) << 8) + +++ (((pps->cr_qp_offset + s->sh.slice_cr_qp_offset)&255) << 16) + +++ (pps->constrained_intra_pred_flag << 24)); +++ +++ if (s->ps.sps->scaling_list_enable_flag) WriteScalingFactors(rpi); +++ +++ if (!s->sh.dependent_slice_segment_flag) { +++ int ctb_col = s->sh.slice_ctb_addr_rs % rpi->PicWidthInCtbsY; +++ int ctb_row = s->sh.slice_ctb_addr_rs / rpi->PicWidthInCtbsY; +++ rpi->reg_slicestart = (ctb_col<<0) + (ctb_row<<16); +++ } +++ +++ p1_apb_write(rpi, RPI_SLICESTART, rpi->reg_slicestart); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++static void write_slice(RPI_T *rpi, HEVCContext *s, uint8_t slice_w, uint8_t slice_h) { +++ uint32_t u32 = +++ (s->sh.slice_type << 12) +++ + (s->sh.slice_sample_adaptive_offset_flag[0] << 14) +++ + (s->sh.slice_sample_adaptive_offset_flag[1] << 15) +++ + (slice_w << 17) +++ + (slice_h << 24); +++ +++ if (s->sh.slice_type==HEVC_SLICE_B || s->sh.slice_type==HEVC_SLICE_P) u32 |= +++ (s->sh.max_num_merge_cand << 0) +++ + (s->sh.nb_refs[L0] << 4) +++ + (s->sh.nb_refs[L1] << 8); +++ +++ if (s->sh.slice_type==HEVC_SLICE_B) u32 |= s->sh.mvd_l1_zero_flag<<16; +++ p1_apb_write(rpi, RPI_SLICE, u32); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Wavefront mode +++ +++static void wpp_entry_point(RPI_T *rpi, HEVCContext *s, int do_bte, int resetQPY, int ctb_addr_ts) { +++ const HEVCSPS *sps = s->ps.sps; +++ const HEVCPPS *pps = s->ps.pps; +++ +++ int ctb_size = 1<log2_ctb_size; +++ int ctb_addr_rs = pps->ctb_addr_ts_to_rs[ctb_addr_ts]; +++ +++ int ctb_col = rpi->wpp_entry_x = ctb_addr_rs % rpi->PicWidthInCtbsY; +++ int ctb_row = rpi->wpp_entry_y = ctb_addr_rs / rpi->PicWidthInCtbsY; +++ +++ int endx = rpi->PicWidthInCtbsY-1; +++ int endy = ctb_row; +++ +++ uint8_t slice_w = ctb_to_slice_w_h(ctb_col, ctb_size, sps->width, pps->col_bd, pps->num_tile_columns); +++ uint8_t slice_h = ctb_to_slice_w_h(ctb_row, ctb_size, sps->height, pps->row_bd, pps->num_tile_rows); +++ +++ p1_apb_write(rpi, RPI_TILESTART, 0); +++ p1_apb_write(rpi, RPI_TILEEND, endx + (endy<<16)); +++ +++ if (do_bte) p1_apb_write(rpi, RPI_BEGINTILEEND, endx + (endy<<16)); +++ +++ write_slice(rpi, s, slice_w, ctb_row==rpi->PicHeightInCtbsY-1? slice_h : ctb_size); +++ +++ if (resetQPY) p1_apb_write(rpi, RPI_QP, sps->qp_bd_offset + s->sh.slice_qp); +++ +++ p1_apb_write(rpi, RPI_MODE, ctb_row==rpi->PicHeightInCtbsY-1? 0x60001 : 0x20001); +++ p1_apb_write(rpi, RPI_CONTROL, (ctb_col<<0) + (ctb_row<<16)); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Tiles mode +++ +++static void new_entry_point(RPI_T *rpi, HEVCContext *s, int do_bte, int resetQPY, int ctb_addr_ts) { +++ const HEVCSPS *sps = s->ps.sps; +++ const HEVCPPS *pps = s->ps.pps; +++ +++ int ctb_col = pps->ctb_addr_ts_to_rs[ctb_addr_ts] % rpi->PicWidthInCtbsY; +++ int ctb_row = pps->ctb_addr_ts_to_rs[ctb_addr_ts] / rpi->PicWidthInCtbsY; +++ +++ int tile_x = ctb_to_tile (ctb_col, pps->col_bd, pps->num_tile_columns); +++ int tile_y = ctb_to_tile (ctb_row, pps->row_bd, pps->num_tile_rows); +++ +++ int endx = pps->col_bd[tile_x+1] - 1; +++ int endy = pps->row_bd[tile_y+1] - 1; +++ +++ uint8_t slice_w = ctb_to_slice_w_h(ctb_col, 1<log2_ctb_size, sps->width, pps->col_bd, pps->num_tile_columns); +++ uint8_t slice_h = ctb_to_slice_w_h(ctb_row, 1<log2_ctb_size, sps->height, pps->row_bd, pps->num_tile_rows); +++ +++ p1_apb_write(rpi, RPI_TILESTART, pps->col_bd[tile_x] + (pps->row_bd[tile_y]<<16)); +++ p1_apb_write(rpi, RPI_TILEEND, endx + (endy<<16)); +++ +++ if (do_bte) p1_apb_write(rpi, RPI_BEGINTILEEND, endx + (endy<<16)); +++ +++ write_slice(rpi, s, slice_w, slice_h); +++ +++ if (resetQPY) p1_apb_write(rpi, RPI_QP, sps->qp_bd_offset + s->sh.slice_qp); +++ +++ p1_apb_write(rpi, RPI_MODE, (0xFFFF << 0) +++ + (0x0 << 16) +++ + ((tile_x==pps->num_tile_columns-1) << 17) +++ + ((tile_y==pps->num_tile_rows-1) << 18)); +++ +++ p1_apb_write(rpi, RPI_CONTROL, (ctb_col<<0) + (ctb_row<<16)); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Workaround for 3 December 2016 commit 8dfba25ce89b62c80ba83e2116d549176c376144 +++// https://github.com/libav/libav/commit/8dfba25ce89b62c80ba83e2116d549176c376144 +++// This commit prevents multi-threaded hardware acceleration by locking hwaccel_mutex +++// around codec->decode() calls. Workaround is to unlock and relock before returning. +++ +++static void hwaccel_mutex(AVCodecContext *avctx, int (*action) (pthread_mutex_t *)) { +++ struct FrameThreadContext { +++ void *foo1, *foo2; // must match struct layout in pthread_frame.c +++ pthread_mutex_t foo3, hwaccel_mutex; +++ }; +++ struct PerThreadContext { +++ struct FrameThreadContext *parent; +++ }; +++ struct PerThreadContext *p = avctx->internal->thread_ctx; +++ if (avctx->thread_count>1) action(&p->parent->hwaccel_mutex); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++static int get_thread_idx(RPI_T *rpi, AVCodecContext *avctx) { +++ int idx; +++ for (idx=0; idxthread_avctx[idx]==avctx) break; +++ av_assert0(idxinternal->hwaccel_priv_data; +++ HEVCContext *s = avctx->priv_data; +++ +++ int thread_idx = get_thread_idx(rpi, 0); // Find first free slot +++ +++ rpi->thread_avctx[thread_idx] = avctx; +++ rpi->thread_order[thread_idx] = rpi->decode_order++; +++ +++ ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame +++ hwaccel_mutex(avctx, pthread_mutex_unlock); +++ +++ // Enforcing phase 1 order precludes busy waiting for phase 2 +++ for (;;) { +++ pthread_mutex_lock (&rpi->mutex_phase1); +++ if (rpi->thread_order[thread_idx]==rpi->phase1_order) break; +++ pthread_mutex_unlock(&rpi->mutex_phase1); +++ } +++ rpi->phase1_order++; +++ +++ alloc_picture_space(rpi, s, thread_idx); +++ rpi->bit_len = rpi->cmd_len = 0; +++ return 0; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Slice messages +++ +++static void msg_slice(RPI_T *rpi, uint16_t msg) { +++ rpi->slice_msgs[rpi->num_slice_msgs++] = msg; +++} +++ +++static void program_slicecmds(RPI_T *rpi, int sliceid) { +++ int i; +++ p1_apb_write(rpi, RPI_SLICECMDS, rpi->num_slice_msgs+(sliceid<<8)); +++ for(i=0; inum_slice_msgs; i++) { +++ p1_apb_write(rpi, 0x4000+4*i, rpi->slice_msgs[i] & 0xffff); +++ } +++} +++ +++static void pre_slice_decode(RPI_T *rpi, HEVCContext *s) { +++ const HEVCSPS *sps = s->ps.sps; +++ const HEVCPPS *pps = s->ps.pps; +++ SliceHeader *sh = &s->sh; +++ +++ int weightedPredFlag, i, rIdx; +++ uint16_t cmd_slice; +++ +++ rpi->num_slice_msgs=0; +++ cmd_slice = 0; +++ if (sh->slice_type==HEVC_SLICE_I) cmd_slice = 1; +++ if (sh->slice_type==HEVC_SLICE_P) cmd_slice = 2; +++ if (sh->slice_type==HEVC_SLICE_B) cmd_slice = 3; +++ +++ if (sh->slice_type!=HEVC_SLICE_I) { +++ cmd_slice += sh->nb_refs[L0]<<2; +++ cmd_slice += sh->nb_refs[L1]<<6; +++ } +++ if (sh->slice_type==HEVC_SLICE_P +++ || sh->slice_type==HEVC_SLICE_B) rpi->max_num_merge_cand = sh->max_num_merge_cand; +++ +++ cmd_slice += rpi->max_num_merge_cand<<11; +++ +++ if (sh->slice_temporal_mvp_enabled_flag) { +++ if (sh->slice_type==HEVC_SLICE_B) rpi->collocated_from_l0_flag = sh->collocated_list==L0; +++ else if (sh->slice_type==HEVC_SLICE_P) rpi->collocated_from_l0_flag = 1; +++ } +++ cmd_slice += rpi->collocated_from_l0_flag<<14; +++ +++ if (sh->slice_type==HEVC_SLICE_P || sh->slice_type==HEVC_SLICE_B) { +++ +++ int NoBackwardPredFlag = 1; // Flag to say all reference pictures are from the past +++ for(i=L0; i<=L1; i++) { +++ for(rIdx=0; rIdx nb_refs[i]; rIdx++) { +++ HEVCFrame *f = s->ref->refPicList[i].ref[rIdx]; +++ HEVCFrame *c = s->ref; // CurrentPicture +++ if (c->poc < f->poc) NoBackwardPredFlag = 0; +++ } +++ } +++ +++ rpi->collocated_ref_idx = sh->collocated_ref_idx; +++ if (s->ref->refPicList && s->ref->collocated_ref) +++ for (i=0; inb_refs[L1]) rpi->RefPicList[1][i] = s->ref->refPicList[1].ref[i] - s->DPB; +++ if (inb_refs[L0]) rpi->RefPicList[0][i] = s->ref->refPicList[0].ref[i] - s->DPB; +++ } +++ +++ cmd_slice += NoBackwardPredFlag<<10; +++ msg_slice(rpi, cmd_slice); +++ +++ // Write reference picture descriptions +++ weightedPredFlag = sh->slice_type==HEVC_SLICE_P? pps->weighted_pred_flag : pps->weighted_bipred_flag; +++ +++ for(i=L0; i<=L1; i++) +++ for(rIdx=0; rIdx nb_refs[i]; rIdx++) { +++ HEVCFrame *f = s->ref->refPicList[i].ref[rIdx]; +++ HEVCFrame *c = s->ref; // CurrentPicture +++ int pic = f - s->DPB; +++ // Make sure pictures are in range 0 to 15 +++ int adjusted_pic = fref->refPicList[i].isLongTerm[rIdx]; +++ msg_slice(rpi, adjusted_pic+(lt<<4)+(weightedPredFlag<<5)+(weightedPredFlag<<6)); +++ msg_slice(rpi, f->poc); +++ if (weightedPredFlag) { +++ msg_slice(rpi, s->sh.luma_log2_weight_denom+(((i?s-> sh.luma_weight_l1: s->sh.luma_weight_l0)[rIdx] &0x1ff)<<3)); +++ msg_slice(rpi, (i?s-> sh.luma_offset_l1: s->sh.luma_offset_l0)[rIdx] & 0xff); +++ msg_slice(rpi, s->sh.chroma_log2_weight_denom+(((i?s->sh.chroma_weight_l1:s->sh.chroma_weight_l0)[rIdx][0]&0x1ff)<<3)); +++ msg_slice(rpi, (i?s->sh.chroma_offset_l1:s->sh.chroma_offset_l0)[rIdx][0]& 0xff); +++ msg_slice(rpi, s->sh.chroma_log2_weight_denom+(((i?s->sh.chroma_weight_l1:s->sh.chroma_weight_l0)[rIdx][1]&0x1ff)<<3)); +++ msg_slice(rpi, (i?s->sh.chroma_offset_l1:s->sh.chroma_offset_l0)[rIdx][1]& 0xff); +++ } +++ } +++ } +++ else +++ msg_slice(rpi, cmd_slice); +++ +++ msg_slice(rpi, ((sh->beta_offset/2)&15) +++ + (((sh->tc_offset/2)&15) << 4) +++ + (sh->disable_deblocking_filter_flag << 8) +++ + (sh->slice_loop_filter_across_slices_enabled_flag << 9) +++ + (pps->loop_filter_across_tiles_enabled_flag << 10)); // CMD_DEBLOCK +++ +++ msg_slice(rpi, ((sh->slice_cr_qp_offset&31)<<5) + (sh->slice_cb_qp_offset&31)); // CMD_QPOFF +++ +++ // collocated reads/writes +++ if (sps->sps_temporal_mvp_enabled_flag) { +++ int thread_idx = get_thread_idx(rpi, s->avctx); +++ int CurrentPicture = s->ref - s->DPB; +++ int colPic = rpi->RefPicList[sh->slice_type==HEVC_SLICE_B && rpi->collocated_from_l0_flag==0][rpi->collocated_ref_idx]; +++ rpi->mvbase64 [thread_idx] = rpi->mvstorage64 + CurrentPicture * rpi->mvframebytes64; +++ if (sh->slice_type==HEVC_SLICE_I) { +++ // Collocated picture not well defined here. Use mvbase or previous value +++ if (sh->first_slice_in_pic_flag) +++ rpi->colbase64[thread_idx] = rpi->mvbase64[thread_idx]; // Ensure we don't read garbage +++ } +++ else +++ rpi->colbase64[thread_idx] = rpi->mvstorage64 + colPic * rpi->mvframebytes64; +++ } +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// End frame +++ +++static int rpi_hevc_end_frame(AVCodecContext *avctx) { +++ RPI_T *rpi = avctx->internal->hwaccel_priv_data; +++ HEVCContext *s = avctx->priv_data; +++ const HEVCPPS *pps = s->ps.pps; +++ const HEVCSPS *sps = s->ps.sps; +++ int thread_idx = get_thread_idx(rpi, avctx); +++ int jump = sps->bit_depth>8?96:128; +++ int CurrentPicture = s->ref - s->DPB; +++ AVFrame *f = s->ref->frame; +++ int last_x = pps->col_bd[pps->num_tile_columns]-1; +++ int last_y = pps->row_bd[pps->num_tile_rows]-1; +++ +++ int i, a64, x; +++ char *buf; +++ +++ // End of phase 1 command compilation +++ if (pps->entropy_coding_sync_enabled_flag) { +++ if (rpi->wpp_entry_x<2 && rpi->PicWidthInCtbsY>2) wpp_pause(rpi, last_y); +++ } +++ p1_apb_write(rpi, RPI_STATUS, 1 + (last_x<<5) + (last_y<<18)); +++ +++ // Phase 1 ... +++ for (;;) { +++ // (Re-)allocate PU/COEFF stream space +++ a64 = alloc_stream_space(rpi, s, thread_idx); +++ // Send bitstream data +++ for (i=0; ibit_len; i++) { +++ rpi->axi_write(rpi->id, ((uint64_t)a64)<<6, rpi->bit_fifo[i].len, rpi->bit_fifo[i].ptr); +++ rpi->cmd_fifo[rpi->bit_fifo[i].cmd].data = a64 + (rpi->axi_get_addr(rpi->id)>>6); // Set BFBASE +++ a64 += (rpi->bit_fifo[i].len+63)/64; +++ } +++ // Send phase 1 commands (cache flush on real hardware) +++ rpi->axi_write(rpi->id, ((uint64_t)a64)<<6, rpi->cmd_len * sizeof(struct RPI_CMD), rpi->cmd_fifo); +++ rpi->axi_flush(rpi->id, 3); +++ phase1_begin(rpi, s, thread_idx); +++ // Trigger command FIFO +++ rpi->apb_write(rpi->id, RPI_CFNUM, rpi->cmd_len); +++ rpi->apb_dump_regs(rpi->id, 0x0, 32); +++ rpi->apb_dump_regs(rpi->id, 0x8000, 24); +++ rpi->axi_dump(rpi->id, ((uint64_t)a64)<<6, rpi->cmd_len * sizeof(struct RPI_CMD)); +++ rpi->apb_write_addr(rpi->id, RPI_CFBASE, a64); +++ rpi->wait_interrupt(rpi->id, 1); +++ if (check_status(rpi)==0) break; // No PU/COEFF overflow? +++ } +++ pthread_mutex_unlock(&rpi->mutex_phase1); +++ +++ // Phase 2 ... +++ for (;;) { +++ pthread_mutex_lock (&rpi->mutex_phase2); +++ if (rpi->thread_order[thread_idx]==rpi->phase2_order) break; +++ pthread_mutex_unlock(&rpi->mutex_phase2); +++ } +++ rpi->phase2_order++; +++ +++ rpi->apb_write_addr(rpi->id, RPI_PURBASE, rpi->pubase64[thread_idx]); +++ rpi->apb_write(rpi->id, RPI_PURSTRIDE, rpi->pustep64); +++ rpi->apb_write_addr(rpi->id, RPI_COEFFRBASE, rpi->coeffbase64[thread_idx]); +++ rpi->apb_write(rpi->id, RPI_COEFFRSTRIDE, rpi->coeffstep64); +++ +++#if !defined(AXI_BUFFERS) +++#define MANGLE(x) (((x) &~0xc0000000)>>6) +++{ +++ const AVRpiZcRefPtr fr_buf = f ? av_rpi_zc_ref(avctx, f, f->format, 0) : NULL; +++ uint32_t handle = fr_buf ? av_rpi_zc_vc_handle(fr_buf):0; +++// printf("%s cur:%d fr:%p handle:%d YUV:%x:%x ystride:%d ustride:%d ah:%d\n", __FUNCTION__, CurrentPicture, f, handle, get_vc_address_y(f), get_vc_address_u(f), f->linesize[0], f->linesize[1], f->linesize[3]); +++ rpi->apb_write(rpi->id, RPI_OUTYBASE, MANGLE(get_vc_address_y(f))); +++ rpi->apb_write(rpi->id, RPI_OUTCBASE, MANGLE(get_vc_address_u(f))); +++ rpi->apb_write(rpi->id, RPI_OUTYSTRIDE, f->linesize[3] * 128 / 64); +++ rpi->apb_write(rpi->id, RPI_OUTCSTRIDE, f->linesize[3] * 128 / 64); +++ av_rpi_zc_unref(fr_buf); +++} +++#else +++ // Output frame and reference picture locations +++ rpi->apb_write_addr(rpi->id, RPI_OUTYBASE, CurrentPicture * rpi->framebytes64); +++ rpi->apb_write_addr(rpi->id, RPI_OUTCBASE, CurrentPicture * rpi->framebytes64 + rpi->lumabytes64); +++ rpi->apb_write(rpi->id, RPI_OUTYSTRIDE, rpi->lumastride64); +++ rpi->apb_write(rpi->id, RPI_OUTCSTRIDE, rpi->chromastride64); +++#endif +++ +++#if !defined(AXI_BUFFERS) +++{ +++ SliceHeader *sh = &s->sh; +++ int rIdx; +++ for(i=0; i<16; i++) { +++ rpi->apb_write(rpi->id, 0x9000+16*i, 0); +++ rpi->apb_write(rpi->id, 0x9004+16*i, 0); +++ rpi->apb_write(rpi->id, 0x9008+16*i, 0); +++ rpi->apb_write(rpi->id, 0x900C+16*i, 0); +++ } +++ +++ for(i=L0; i<=L1; i++) +++ for(rIdx=0; rIdx nb_refs[i]; rIdx++) { +++ HEVCFrame *f1 = s->ref->refPicList[i].ref[rIdx]; +++ HEVCFrame *c = s->ref; // CurrentPicture +++ int pic = f1 - s->DPB; +++ // Make sure pictures are in range 0 to 15 +++ int adjusted_pic = f1DPB[pic]; +++ AVFrame *fr = hevc ? hevc->frame : NULL; +++ const AVRpiZcRefPtr fr_buf = fr ? av_rpi_zc_ref(avctx, fr, fr->format, 0) : NULL; +++ uint32_t handle = fr_buf ? av_rpi_zc_vc_handle(fr_buf):0; +++// printf("%s pic:%d (%d,%d,%d) fr:%p handle:%d YUV:%x:%x\n", __FUNCTION__, adjusted_pic, i, rIdx, pic, fr, handle, get_vc_address_y(fr), get_vc_address_u(fr)); +++ rpi->apb_write(rpi->id, 0x9000+16*adjusted_pic, MANGLE(get_vc_address_y(fr))); +++ rpi->apb_write(rpi->id, 0x9008+16*adjusted_pic, MANGLE(get_vc_address_u(fr))); +++ rpi->apb_write(rpi->id, RPI_OUTYSTRIDE, fr->linesize[3] * 128 / 64); +++ rpi->apb_write(rpi->id, RPI_OUTCSTRIDE, fr->linesize[3] * 128 / 64); +++ av_rpi_zc_unref(fr_buf); +++ } +++} +++#else +++ for(i=0; i<16; i++) { +++ int pic = i < CurrentPicture ? i : i+1; +++ rpi->apb_write_addr(rpi->id, 0x9000+16*i, pic * rpi->framebytes64); +++ rpi->apb_write(rpi->id, 0x9004+16*i, rpi->lumastride64); +++ rpi->apb_write_addr(rpi->id, 0x9008+16*i, pic * rpi->framebytes64 + rpi->lumabytes64); +++ rpi->apb_write(rpi->id, 0x900C+16*i, rpi->chromastride64); +++ } +++#endif +++ +++ rpi->apb_write(rpi->id, RPI_CONFIG2, +++ (sps->bit_depth << 0) // BitDepthY +++ + (sps->bit_depth << 4) // BitDepthC +++ + ((sps->bit_depth>8) << 8) // BitDepthY +++ + ((sps->bit_depth>8) << 9) // BitDepthC +++ + (sps->log2_ctb_size <<10) +++ + (pps->constrained_intra_pred_flag <<13) +++ + (sps->sps_strong_intra_smoothing_enable_flag<<14) +++ + (sps->sps_temporal_mvp_enabled_flag <<15) +++ + (pps->log2_parallel_merge_level <<16) +++ + (s->sh.slice_temporal_mvp_enabled_flag <<19) +++ + (sps->pcm.loop_filter_disable_flag <<20) +++ + ((pps->cb_qp_offset&31) <<21) +++ + ((pps->cr_qp_offset&31) <<26)); +++ +++ rpi->apb_write(rpi->id, RPI_FRAMESIZE, (sps->height<<16) + sps->width); +++ rpi->apb_write(rpi->id, RPI_CURRPOC, s->poc); +++ +++ // collocated reads/writes +++ if (sps->sps_temporal_mvp_enabled_flag) { +++ rpi->apb_write(rpi->id, RPI_COLSTRIDE, rpi->colstride64); +++ rpi->apb_write(rpi->id, RPI_MVSTRIDE, rpi->mvstride64); +++ rpi->apb_write_addr(rpi->id, RPI_MVBASE, rpi->mvbase64 [thread_idx]); +++ rpi->apb_write_addr(rpi->id, RPI_COLBASE, rpi->colbase64[thread_idx]); +++ } +++ +++ rpi->apb_dump_regs(rpi->id, 0x0, 32); +++ rpi->apb_dump_regs(rpi->id, 0x8000, 24); +++ rpi->apb_write(rpi->id, RPI_NUMROWS, rpi->PicHeightInCtbsY); +++ rpi->apb_read_drop(rpi->id, RPI_NUMROWS); // Read back to confirm write has reached block +++ rpi->wait_interrupt(rpi->id, 2); +++ +++//printf("%s: %dx%d %d\n", __FUNCTION__, f->width, f->height, f->linesize[0]); +++#if defined(AXI_BUFFERS) +++ // Copy YUV output frame +++ av_assert0(buf = malloc(128*sps->height)); +++ a64 = AXI_BASE64 + CurrentPicture * rpi->framebytes64; +++ for(x=0; xwidth; x+=jump) { +++ int bpl = bytes_per_line(sps, jump, x); +++ read_rect(rpi, buf, a64, sps->height, bpl); +++ (sps->bit_depth>8?copy_luma10:copy_luma)(buf, bpl, sps->height, x, f->data[0], f->linesize[0]); +++ a64 += rpi->lumastride64; +++ } +++ a64 = AXI_BASE64 + CurrentPicture * rpi->framebytes64 + rpi->lumabytes64; +++ for(x=0; xwidth; x+=jump) { +++ int bpl = bytes_per_line(sps, jump, x); +++ read_rect(rpi, buf, a64, sps->height/2, bpl); +++ (sps->bit_depth>8?copy_chroma10:copy_chroma)(buf, bpl, sps->height/2, x/2, f->data[1], f->data[2], f->linesize[1]); +++ a64 += rpi->chromastride64; +++ } +++ free(buf); +++#endif +++ rpi->thread_avctx[thread_idx] = 0; +++ pthread_mutex_unlock(&rpi->mutex_phase2); +++ hwaccel_mutex(avctx, pthread_mutex_lock); +++ return 0; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++static void WriteBitstream(RPI_T *rpi, HEVCContext *s) { +++ const int rpi_use_emu = 0; // FFmpeg removes emulation prevention bytes +++ const int offset = 0; // Always 64-byte aligned in sim, need not be on real hardware +++ GetBitContext *gb = &s->HEVClc->gb; +++ int len = 1 + gb->size_in_bits/8 - gb->index/8; +++ const void *ptr = &gb->buffer[gb->index/8]; +++ +++ p1_axi_write(rpi, len, ptr, p1_apb_write(rpi, RPI_BFBASE, 0)); // BFBASE set later +++ p1_apb_write(rpi, RPI_BFNUM, len); +++ p1_apb_write(rpi, RPI_BFCONTROL, offset + (1<<7)); // Stop +++ p1_apb_write(rpi, RPI_BFCONTROL, offset + (rpi_use_emu<<6)); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Wavefront mode +++ +++static void wpp_decode_slice(RPI_T *rpi, HEVCContext *s, int ctb_addr_ts) { +++ const HEVCPPS *pps = s->ps.pps; +++ +++ int i, resetQPY=1; +++ int indep = !s->sh.dependent_slice_segment_flag; +++ int ctb_col = s->sh.slice_ctb_addr_rs % rpi->PicWidthInCtbsY; +++ +++ if (ctb_addr_ts) wpp_end_previous_slice(rpi, s, ctb_addr_ts); +++ pre_slice_decode(rpi, s); +++ WriteBitstream(rpi, s); +++ if (ctb_addr_ts==0 || indep || rpi->PicWidthInCtbsY==1) WriteProb(rpi); +++ else if (ctb_col==0) p1_apb_write(rpi, RPI_TRANSFER, PROB_RELOAD); +++ else resetQPY=0; +++ program_slicecmds(rpi, s->slice_idx); +++ new_slice_segment(rpi, s); +++ wpp_entry_point(rpi, s, indep, resetQPY, ctb_addr_ts); +++ for (i=0; ish.num_entry_point_offsets; i++) { +++ int ctb_addr_rs = pps->ctb_addr_ts_to_rs[ctb_addr_ts]; +++ int ctb_row = ctb_addr_rs / rpi->PicWidthInCtbsY; +++ int last_x = rpi->PicWidthInCtbsY-1; +++ if (rpi->PicWidthInCtbsY>2) wpp_pause(rpi, ctb_row); +++ p1_apb_write(rpi, RPI_STATUS, (ctb_row<<18) + (last_x<<5) + 2); +++ if (rpi->PicWidthInCtbsY==2) p1_apb_write(rpi, RPI_TRANSFER, PROB_BACKUP); +++ if (rpi->PicWidthInCtbsY==1) WriteProb(rpi); +++ else p1_apb_write(rpi, RPI_TRANSFER, PROB_RELOAD); +++ ctb_addr_ts += pps->column_width[0]; +++ wpp_entry_point(rpi, s, 0, 1, ctb_addr_ts); +++ } +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Tiles mode +++ +++static void decode_slice(RPI_T *rpi, HEVCContext *s, int ctb_addr_ts) { +++ const HEVCPPS *pps = s->ps.pps; +++ int i, resetQPY; +++ +++ if (ctb_addr_ts) end_previous_slice(rpi, s, ctb_addr_ts); +++ pre_slice_decode(rpi, s); +++ WriteBitstream(rpi, s); +++ resetQPY = ctb_addr_ts==0 +++ || pps->tile_id[ctb_addr_ts]!=pps->tile_id[ctb_addr_ts-1] +++ || !s->sh.dependent_slice_segment_flag; +++ if (resetQPY) WriteProb(rpi); +++ program_slicecmds(rpi, s->slice_idx); +++ new_slice_segment(rpi, s); +++ new_entry_point(rpi, s, !s->sh.dependent_slice_segment_flag, resetQPY, ctb_addr_ts); +++ for (i=0; ish.num_entry_point_offsets; i++) { +++ int ctb_addr_rs = pps->ctb_addr_ts_to_rs[ctb_addr_ts]; +++ int ctb_col = ctb_addr_rs % rpi->PicWidthInCtbsY; +++ int ctb_row = ctb_addr_rs / rpi->PicWidthInCtbsY; +++ int tile_x = ctb_to_tile (ctb_col, pps->col_bd, pps->num_tile_columns); +++ int tile_y = ctb_to_tile (ctb_row, pps->row_bd, pps->num_tile_rows); +++ int last_x = pps->col_bd[tile_x+1]-1; +++ int last_y = pps->row_bd[tile_y+1]-1; +++ p1_apb_write(rpi, RPI_STATUS, 2 + (last_x<<5) + (last_y<<18)); +++ WriteProb(rpi); +++ ctb_addr_ts += pps->column_width[tile_x] * pps->row_height[tile_y]; +++ new_entry_point(rpi, s, 0, 1, ctb_addr_ts); +++ } +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++static int rpi_hevc_decode_slice( +++ AVCodecContext *avctx, +++ const uint8_t *buffer, +++ uint32_t size) { +++ +++ RPI_T *rpi = avctx->internal->hwaccel_priv_data; +++ HEVCContext *s = avctx->priv_data; +++ const HEVCPPS *pps = s->ps.pps; +++ int ctb_addr_ts = pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; +++ ff_hevc_cabac_init(s, ctb_addr_ts); +++ if (s->ps.sps->scaling_list_enable_flag) populate_scaling_factors(rpi, s); +++ populate_prob_tables(rpi, s); +++ pps->entropy_coding_sync_enabled_flag? wpp_decode_slice(rpi, s, ctb_addr_ts) +++ : decode_slice(rpi, s, ctb_addr_ts); +++ return 0; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++// Bind to socket client +++ +++static int open_socket_client(RPI_T *rpi, const char *so) { +++ *(void **) &rpi->ctrl_ffmpeg_init = rpi_ctrl_ffmpeg_init; +++ *(void **) &rpi->apb_write = rpi_apb_write; +++ *(void **) &rpi->apb_write_addr = rpi_apb_write_addr; +++ *(void **) &rpi->apb_read = rpi_apb_read; +++ *(void **) &rpi->apb_read_drop = rpi_apb_read_drop; +++ *(void **) &rpi->axi_write = rpi_axi_write; +++ *(void **) &rpi->axi_read_alloc = rpi_axi_read_alloc; +++ *(void **) &rpi->axi_read_tx = rpi_axi_read_tx; +++ *(void **) &rpi->axi_read_rx = rpi_axi_read_rx; +++ *(void **) &rpi->axi_get_addr = rpi_axi_get_addr; +++ *(void **) &rpi->apb_dump_regs = rpi_apb_dump_regs; +++ *(void **) &rpi->axi_dump = rpi_axi_dump; +++ *(void **) &rpi->axi_flush = rpi_axi_flush; +++ *(void **) &rpi->wait_interrupt = rpi_wait_interrupt; +++ *(void **) &rpi->ctrl_ffmpeg_free = rpi_ctrl_ffmpeg_free; +++ return 1; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++static int rpi_hevc_alloc_frame(AVCodecContext *avctx, AVFrame *f) { +++ HEVCContext *s = avctx->priv_data; +++ const HEVCSPS *sps = s->ps.sps; +++ const int ALIGN = 16; +++ +++ f->width = sps->width; +++ f->height = sps->height; +++ f->format = sps->pix_fmt; +++ f->buf[0] = av_buffer_alloc(1); +++ f->buf[1] = av_buffer_alloc(1); +++ f->buf[2] = av_buffer_alloc(1); +++ return av_image_alloc(f->data, f->linesize, f->width, f->height, f->format, ALIGN); +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++static int rpi_hevc_init(AVCodecContext *avctx) { +++ RPI_T *rpi = avctx->internal->hwaccel_priv_data; +++ const char *err, *so; +++ +++ so = "./rpi_ffmpeg.so"; +++ +++ if (avctx->width>4096 || avctx->height>4096) { +++ av_log(NULL, AV_LOG_FATAL, "Picture size %dx%d exceeds 4096x4096 maximum for HWAccel\n", avctx->width, avctx->height); +++ return AVERROR(ENOTSUP); +++ } +++ if (!open_socket_client(rpi, so)) { +++ av_log(NULL, AV_LOG_FATAL, "%s\n", dlerror()); +++ return AVERROR_EXTERNAL; +++ } +++ err = rpi->ctrl_ffmpeg_init(NULL, &rpi->id); +++ if (err) { +++ av_log(NULL, AV_LOG_FATAL, "Could not connect to RPI server: %s\n", err); +++ return AVERROR_EXTERNAL; +++ } +++ +++#ifdef RPI_DISPLAY +++ #include "rpi_zc.h" +++ // Whilst FFmpegs init fn is only called once the close fn is called as +++ // many times as we have threads (init_thread_copy is called for the +++ // threads). So to match init & term put the init here where it will be +++ // called by both init & copy +++ av_rpi_zc_init(avctx); +++#endif +++ +++ pthread_mutex_init(&rpi->mutex_phase1, NULL); +++ pthread_mutex_init(&rpi->mutex_phase2, NULL); +++ +++ // Initial PU/COEFF stream buffer sizes chosen so jellyfish40.265 requires 1 overflow/restart +++ rpi->max_pu_msgs = 2+340; // 7.2 says at most 1611 messages per CTU +++ rpi->max_coeff64 = 2+1404; +++ +++ av_assert0(rpi->cmd_fifo = malloc((rpi->cmd_max=1024)*sizeof(struct RPI_CMD))); +++ av_assert0(rpi->bit_fifo = malloc((rpi->bit_max=1024)*sizeof(struct RPI_BIT))); +++ return 0; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++static int rpi_hevc_free(AVCodecContext *avctx) { +++ RPI_T *rpi = avctx->internal->hwaccel_priv_data; +++ if (rpi->decode_order) wait_idle(rpi, rpi->decode_order); +++ if (rpi->cmd_fifo) free(rpi->cmd_fifo); +++ if (rpi->bit_fifo) free(rpi->bit_fifo); +++ pthread_mutex_destroy(&rpi->mutex_phase1); +++ pthread_mutex_destroy(&rpi->mutex_phase2); +++ if (rpi->id && rpi->ctrl_ffmpeg_free) rpi->ctrl_ffmpeg_free(rpi->id); +++ return 0; +++} +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++const AVHWAccel ff_hevc_rpi4_8_hwaccel = { +++ .name = "hevc_rpi4_8", +++ .type = AVMEDIA_TYPE_VIDEO, +++ .id = AV_CODEC_ID_HEVC, +++ .pix_fmt = AV_PIX_FMT_RPI4_8, +++ //.alloc_frame = rpi_hevc_alloc_frame, +++ .start_frame = rpi_hevc_start_frame, +++ .end_frame = rpi_hevc_end_frame, +++ .decode_slice = rpi_hevc_decode_slice, +++ .init = rpi_hevc_init, +++ .uninit = rpi_hevc_free, +++ .priv_data_size = sizeof(RPI_T), +++ .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +++}; +++ +++const AVHWAccel ff_hevc_rpi4_10_hwaccel = { +++ .name = "hevc_rpi4_10", +++ .type = AVMEDIA_TYPE_VIDEO, +++ .id = AV_CODEC_ID_HEVC, +++ .pix_fmt = AV_PIX_FMT_RPI4_10, +++ //.alloc_frame = rpi_hevc_alloc_frame, +++ .start_frame = rpi_hevc_start_frame, +++ .end_frame = rpi_hevc_end_frame, +++ .decode_slice = rpi_hevc_decode_slice, +++ .init = rpi_hevc_init, +++ .uninit = rpi_hevc_free, +++ .priv_data_size = sizeof(RPI_T), +++ .caps_internal = HWACCEL_CAP_ASYNC_SAFE, +++}; +++ +++ +++int rpi_init(AVCodecContext *avctx) { +++ return 0; +++} ++diff --git a/libavcodec/rpi_hevc.h b/libavcodec/rpi_hevc.h ++new file mode 100644 ++index 0000000000..f54657a957 ++--- /dev/null +++++ b/libavcodec/rpi_hevc.h ++@@ -0,0 +1,219 @@ +++// FFMPEG HEVC decoder hardware accelerator +++// Andrew Holme, Argon Design Ltd +++// Copyright (c) June 2017 Raspberry Pi Ltd +++ +++#include +++#include +++ +++#include "hevc.h" +++#include "hevcdec.h" +++ +++#define MAX_THREADS 50 +++#define NUM_SCALING_FACTORS 4064 +++ +++#define AXI_BASE64 0 +++ +++#define PROB_BACKUP ((20<<12) + (20<<6) + (0<<0)) +++#define PROB_RELOAD ((20<<12) + (20<<0) + (0<<6)) +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++#define RPI_SPS0 0 +++#define RPI_SPS1 4 +++#define RPI_PPS 8 +++#define RPI_SLICE 12 +++#define RPI_TILESTART 16 +++#define RPI_TILEEND 20 +++#define RPI_SLICESTART 24 +++#define RPI_MODE 28 +++#define RPI_LEFT0 32 +++#define RPI_LEFT1 36 +++#define RPI_LEFT2 40 +++#define RPI_LEFT3 44 +++#define RPI_QP 48 +++#define RPI_CONTROL 52 +++#define RPI_STATUS 56 +++#define RPI_VERSION 60 +++#define RPI_BFBASE 64 +++#define RPI_BFNUM 68 +++#define RPI_BFCONTROL 72 +++#define RPI_BFSTATUS 76 +++#define RPI_PUWBASE 80 +++#define RPI_PUWSTRIDE 84 +++#define RPI_COEFFWBASE 88 +++#define RPI_COEFFWSTRIDE 92 +++#define RPI_SLICECMDS 96 +++#define RPI_BEGINTILEEND 100 +++#define RPI_TRANSFER 104 +++#define RPI_CFBASE 108 +++#define RPI_CFNUM 112 +++#define RPI_CFSTATUS 116 +++ +++#define RPI_PURBASE 0x8000 +++#define RPI_PURSTRIDE 0x8004 +++#define RPI_COEFFRBASE 0x8008 +++#define RPI_COEFFRSTRIDE 0x800C +++#define RPI_NUMROWS 0x8010 +++#define RPI_CONFIG2 0x8014 +++#define RPI_OUTYBASE 0x8018 +++#define RPI_OUTYSTRIDE 0x801C +++#define RPI_OUTCBASE 0x8020 +++#define RPI_OUTCSTRIDE 0x8024 +++#define RPI_STATUS2 0x8028 +++#define RPI_FRAMESIZE 0x802C +++#define RPI_MVBASE 0x8030 +++#define RPI_MVSTRIDE 0x8034 +++#define RPI_COLBASE 0x8038 +++#define RPI_COLSTRIDE 0x803C +++#define RPI_CURRPOC 0x8040 +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++struct FFM_PROB { +++ uint8_t sao_merge_flag [ 1]; +++ uint8_t sao_type_idx [ 1]; +++ uint8_t split_coding_unit_flag [ 3]; +++ uint8_t cu_transquant_bypass_flag [ 1]; +++ uint8_t skip_flag [ 3]; +++ uint8_t cu_qp_delta [ 3]; +++ uint8_t pred_mode_flag [ 1]; +++ uint8_t part_mode [ 4]; +++ uint8_t prev_intra_luma_pred_flag [ 1]; +++ uint8_t intra_chroma_pred_mode [ 2]; +++ uint8_t merge_flag [ 1]; +++ uint8_t merge_idx [ 1]; +++ uint8_t inter_pred_idc [ 5]; +++ uint8_t ref_idx_l0 [ 2]; +++ uint8_t ref_idx_l1 [ 2]; +++ uint8_t abs_mvd_greater0_flag [ 2]; +++ uint8_t abs_mvd_greater1_flag [ 2]; +++ uint8_t mvp_lx_flag [ 1]; +++ uint8_t no_residual_data_flag [ 1]; +++ uint8_t split_transform_flag [ 3]; +++ uint8_t cbf_luma [ 2]; +++ uint8_t cbf_cb_cr [ 4]; +++ uint8_t transform_skip_flag/*[][]*/ [ 2]; +++ uint8_t explicit_rdpcm_flag/*[][]*/ [ 2]; +++ uint8_t explicit_rdpcm_dir_flag/*[][]*/ [ 2]; +++ uint8_t last_significant_coeff_x_prefix [18]; +++ uint8_t last_significant_coeff_y_prefix [18]; +++ uint8_t significant_coeff_group_flag [ 4]; +++ uint8_t significant_coeff_flag [44]; +++ uint8_t coeff_abs_level_greater1_flag [24]; +++ uint8_t coeff_abs_level_greater2_flag [ 6]; +++ uint8_t log2_res_scale_abs [ 8]; +++ uint8_t res_scale_sign_flag [ 2]; +++ uint8_t cu_chroma_qp_offset_flag [ 1]; +++ uint8_t cu_chroma_qp_offset_idx [ 1]; +++} __attribute__((packed)); +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++struct RPI_PROB { +++ uint8_t SAO_MERGE_FLAG [ 1]; +++ uint8_t SAO_TYPE_IDX [ 1]; +++ uint8_t SPLIT_FLAG [ 3]; +++ uint8_t CU_SKIP_FLAG [ 3]; +++ uint8_t CU_TRANSQUANT_BYPASS_FLAG [ 1]; +++ uint8_t PRED_MODE [ 1]; +++ uint8_t PART_SIZE [ 4]; +++ uint8_t INTRA_PRED_MODE [ 1]; +++ uint8_t CHROMA_PRED_MODE [ 1]; +++ uint8_t MERGE_FLAG_EXT [ 1]; +++ uint8_t MERGE_IDX_EXT [ 1]; +++ uint8_t INTER_DIR [ 5]; +++ uint8_t REF_PIC [ 2]; +++ uint8_t MVP_IDX [ 1]; +++ uint8_t MVD [ 2]; +++ uint8_t QT_ROOT_CBF [ 1]; +++ uint8_t TRANS_SUBDIV_FLAG [ 3]; +++ uint8_t QT_CBF [ 6]; +++ uint8_t DQP [ 2]; +++ uint8_t ONE_FLAG [24]; +++ uint8_t LASTX [18]; +++ uint8_t LASTY [18]; +++ uint8_t SIG_CG_FLAG [ 4]; +++ uint8_t ABS_FLAG [ 6]; +++ uint8_t TRANSFORMSKIP_FLAG [ 2]; +++ uint8_t SIG_FLAG [42]; +++ uint8_t SIG_FLAG_unused [ 2]; +++} __attribute__((packed)); +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++struct RPI_CMD { +++ uint32_t addr; +++ uint32_t data; +++} __attribute__((packed)); +++ +++struct RPI_BIT { +++ int cmd; +++ const void *ptr; +++ int len; +++}; +++ +++////////////////////////////////////////////////////////////////////////////// +++ +++typedef struct RPI_T { +++struct RPI_BIT *bit_fifo; +++struct RPI_CMD *cmd_fifo; +++ int bit_len, bit_max; +++ int cmd_len, cmd_max; +++ int max_pu_msgs; +++ int max_coeff64; +++AVCodecContext *thread_avctx[MAX_THREADS]; +++ int thread_order[MAX_THREADS]; +++ int decode_order; +++ int phase1_order; +++ int phase2_order; +++pthread_mutex_t mutex_phase1; +++pthread_mutex_t mutex_phase2; +++ uint8_t scaling_factors[NUM_SCALING_FACTORS]; +++struct RPI_PROB probabilities; +++ int num_slice_msgs; +++ uint16_t slice_msgs[2*HEVC_MAX_REFS*8+3]; +++ int pubase64[MAX_THREADS]; +++ int pustep64; +++ int coeffbase64[MAX_THREADS]; +++ int coeffstep64; +++ int PicWidthInCtbsY; +++ int PicHeightInCtbsY; +++#ifdef AXI_BUFFERS +++ int lumabytes64; +++ int framebytes64; +++ int lumastride64; +++ int chromastride64; +++#endif +++ int mvframebytes64; +++ int mvstorage64; +++ int colstride64; +++ int mvstride64; +++ int colbase64[MAX_THREADS]; +++ int mvbase64[MAX_THREADS]; +++ uint32_t reg_slicestart; +++ int collocated_from_l0_flag; +++ int max_num_merge_cand; +++ int RefPicList[2][HEVC_MAX_REFS]; +++ int collocated_ref_idx; +++ int wpp_entry_x; +++ int wpp_entry_y; +++ +++ void * dl_handle; +++ void * id; +++ char * (* ctrl_ffmpeg_init) (const char *hwaccel_device, void **id); +++ void (* apb_write) (void *id, uint16_t addr, uint32_t data); +++ void (* apb_write_addr) (void *id, uint16_t addr, uint32_t data); +++ uint32_t (* apb_read) (void *id, uint16_t addr); +++ void (* apb_read_drop) (void *id, uint16_t addr); +++ void (* axi_write) (void *id, uint64_t addr, uint32_t size, const void *buf); +++ void (* axi_read_alloc) (void *id, uint32_t size); +++ void (* axi_read_tx) (void *id, uint64_t addr, uint32_t size); +++ void (* axi_read_rx) (void *id, uint32_t size, void *buf); +++ uint64_t (* axi_get_addr) (void *id); +++ void (* apb_dump_regs) (void *id, uint16_t addr, int num); +++ void (* axi_dump) (void *id, uint64_t addr, uint32_t size); +++ void (* axi_flush) (void *id, int mode); +++ void (* wait_interrupt) (void *id, int phase); +++ void (* ctrl_ffmpeg_free) (void *id); +++ +++} RPI_T; ++diff --git a/libavcodec/rpi_mailbox.c b/libavcodec/rpi_mailbox.c ++new file mode 100644 ++index 0000000000..5f23e9b36c ++--- /dev/null +++++ b/libavcodec/rpi_mailbox.c ++@@ -0,0 +1,149 @@ +++/* +++Copyright (c) 2012, Broadcom Europe Ltd. +++All rights reserved. +++ +++Redistribution and use in source and binary forms, with or without +++modification, are permitted provided that the following conditions are met: +++ * Redistributions of source code must retain the above copyright +++ notice, this list of conditions and the following disclaimer. +++ * Redistributions in binary form must reproduce the above copyright +++ notice, this list of conditions and the following disclaimer in the +++ documentation and/or other materials provided with the distribution. +++ * Neither the name of the copyright holder nor the +++ names of its contributors may be used to endorse or promote products +++ derived from this software without specific prior written permission. +++ +++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +++*/ +++ +++#if 1//defined(RPI) || defined (RPI_DISPLAY) +++ +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++ +++#include +++ +++#define MAJOR_NUM 100 +++#define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char *) +++#define DEVICE_FILE_NAME "/dev/vcio" +++ +++#include "rpi_mailbox.h" +++//#include +++ +++/* +++ * use ioctl to send mbox property message +++ */ +++ +++static int mbox_property(int file_desc, void *buf) +++{ +++ int ret_val = ioctl(file_desc, IOCTL_MBOX_PROPERTY, buf); +++ +++ if (ret_val < 0) { +++ printf("ioctl_set_msg failed:%d\n", ret_val); +++ } +++ +++#ifdef DEBUG +++ unsigned *p = buf; int i; unsigned size = *(unsigned *)buf; +++ for (i=0; i +++#include +++#include +++#include +++#include +++#include "libavutil/avassert.h" +++ +++#include "config.h" +++ +++#include +++#include +++ +++#include +++ +++#include "rpi_mailbox.h" +++#include "rpi_qpu.h" +++ +++#pragma GCC diagnostic push +++// Many many redundant decls in the header files +++#pragma GCC diagnostic ignored "-Wredundant-decls" +++#include "interface/vmcs_host/vc_vchi_gpuserv.h" +++#pragma GCC diagnostic pop +++ +++// QPU "noflush" flags +++// a mixture of flushing & profiling +++ +++#define QPU_FLAGS_NO_FLUSH_VPU 1 // If unset VPU cache will be flushed +++#define QPU_FLAGS_PROF_CLEAR_AND_ENABLE 2 // Clear & Enable detailed QPU profiling registers +++#define QPU_FLAGS_PROF_OUTPUT_COUNTS 4 // Print the results +++#define QPU_FLAGS_OUTPUT_QPU_TIMES 8 // Print QPU times - independant of the profiling +++#define QPU_FLAGS_NO_FLUSH_QPU 16 // If unset flush QPU caches & TMUs (uniforms always flushed) +++ +++#define vcos_verify_ge0(x) ((x)>=0) +++ +++struct rpi_cache_flush_env_s { +++// unsigned int n; +++// struct vcsm_user_clean_invalid_s a[CFE_A_COUNT]; +++ struct vcsm_user_clean_invalid2_s v; +++}; +++ +++typedef struct gpu_env_s +++{ +++ int open_count; +++ int init_count; +++ int mb; +++ int vpu_i_cache_flushed; +++} gpu_env_t; +++ +++// Stop more than one thread trying to allocate memory or use the processing resources at once +++static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; +++static gpu_env_t * gpu = NULL; +++ +++ +++// GPU memory alloc fns (internal) +++ +++// GPU_MEM_PTR_T alloc fns +++static int gpu_malloc_cached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) { +++ p->numbytes = (numbytes + 255) & ~255; // Round up +++ p->vcsm_handle = vcsm_malloc_cache(p->numbytes, VCSM_CACHE_TYPE_HOST | 0x80, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); +++ av_assert0(p->vcsm_handle); +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ av_assert0(p->vc_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ av_assert0(p->arm); +++ p->vc = mbox_mem_lock(mb, p->vc_handle); +++ av_assert0(p->vc); +++// printf("***** %s, %d\n", __func__, numbytes); +++ +++ return 0; +++} +++ +++static int gpu_malloc_uncached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) { +++ p->numbytes = numbytes; +++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE | 0x80, (char *)"Video Frame" ); +++ av_assert0(p->vcsm_handle); +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ av_assert0(p->vc_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ av_assert0(p->arm); +++ p->vc = mbox_mem_lock(mb, p->vc_handle); +++ av_assert0(p->vc); +++// printf("***** %s, %d\n", __func__, numbytes); +++ return 0; +++} +++ +++static void gpu_free_internal(const int mb, GPU_MEM_PTR_T * const p) { +++ mbox_mem_unlock(mb, p->vc_handle); +++ vcsm_unlock_ptr(p->arm); +++ vcsm_free(p->vcsm_handle); +++ memset(p, 0, sizeof(*p)); // Ensure we crash hard if we try and use this again +++// printf("***** %s\n", __func__); +++} +++ +++ +++// GPU init, free, lock, unlock +++ +++static void gpu_term(void) +++{ +++ gpu_env_t * const ge = gpu; +++ +++ // We have to hope that eveything has terminated... +++ gpu = NULL; +++ +++ vc_gpuserv_deinit(); +++ +++ vcsm_exit(); +++ +++ mbox_close(ge->mb); +++ +++ free(ge); +++} +++ +++ +++// Connect to QPU, returns 0 on success. +++static int gpu_init(gpu_env_t ** const gpu) { +++ gpu_env_t * const ge = calloc(1, sizeof(gpu_env_t)); +++ *gpu = NULL; +++ +++ if (ge == NULL) +++ return -1; +++ +++ if ((ge->mb = mbox_open()) < 0) +++ return -1; +++ +++ vcsm_init(); +++ +++ *gpu = ge; +++ return 0; +++} +++ +++ +++ +++static void gpu_unlock(void) { +++ pthread_mutex_unlock(&gpu_mutex); +++} +++ +++// Make sure we have exclusive access to the mailbox, and enable qpu if necessary. +++static gpu_env_t * gpu_lock(void) { +++ pthread_mutex_lock(&gpu_mutex); +++ +++ av_assert0(gpu != NULL); +++ return gpu; +++} +++ +++static gpu_env_t * gpu_lock_ref(void) +++{ +++ pthread_mutex_lock(&gpu_mutex); +++ +++ if (gpu == NULL) { +++ int rv = gpu_init(&gpu); +++ if (rv != 0) { +++ gpu_unlock(); +++ return NULL; +++ } +++ } +++ +++ ++gpu->open_count; +++ return gpu; +++} +++ +++static void gpu_unlock_unref(gpu_env_t * const ge) +++{ +++ if (--ge->open_count == 0) +++ gpu_term(); +++ +++ gpu_unlock(); +++} +++ +++static inline gpu_env_t * gpu_ptr(void) +++{ +++ av_assert0(gpu != NULL); +++ return gpu; +++} +++ +++// Public gpu fns +++ +++// Allocate memory on GPU +++// Fills in structure

containing ARM pointer, videocore handle, videocore memory address, numbytes +++// Returns 0 on success. +++// This allocates memory that will not be cached in ARM's data cache. +++// Therefore safe to use without data cache flushing. +++int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) +++{ +++ int r; +++ gpu_env_t * const ge = gpu_lock_ref(); +++ if (ge == NULL) +++ return -1; +++ r = gpu_malloc_uncached_internal(ge->mb, numbytes, p); +++ gpu_unlock(); +++ return r; +++} +++ +++// This allocates data that will be +++// Cached in ARM L2 +++// Uncached in VPU L2 +++int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) +++{ +++ int r; +++ gpu_env_t * const ge = gpu_lock_ref(); +++ if (ge == NULL) +++ return -1; +++ r = gpu_malloc_cached_internal(ge->mb, numbytes, p); +++ gpu_unlock(); +++ return r; +++} +++ +++void gpu_free(GPU_MEM_PTR_T * const p) { +++ gpu_env_t * const ge = gpu_lock(); +++ gpu_free_internal(ge->mb, p); +++ gpu_unlock_unref(ge); +++} +++ +++int gpu_get_mailbox(void) +++{ +++ av_assert0(gpu); +++ return gpu->mb; +++} +++ +++void gpu_ref(void) +++{ +++ gpu_lock_ref(); +++ gpu_unlock(); +++} +++ +++void gpu_unref(void) +++{ +++ gpu_env_t * const ge = gpu_lock(); +++ gpu_unlock_unref(ge); +++} +++ +++// ---------------------------------------------------------------------------- +++// +++// Cache flush functions +++ +++#define CACHE_EL_MAX 16 +++ +++rpi_cache_flush_env_t * rpi_cache_flush_init() +++{ +++ rpi_cache_flush_env_t * const rfe = malloc(sizeof(rpi_cache_flush_env_t) + +++ sizeof(struct vcsm_user_clean_invalid2_block_s) * CACHE_EL_MAX); +++ if (rfe == NULL) +++ return NULL; +++ +++ rfe->v.op_count = 0; +++ return rfe; +++} +++ +++void rpi_cache_flush_abort(rpi_cache_flush_env_t * const rfe) +++{ +++ if (rfe != NULL) +++ free(rfe); +++} +++ +++int rpi_cache_flush_finish(rpi_cache_flush_env_t * const rfe) +++{ +++ int rc = 0; +++ +++ if (vcsm_clean_invalid2(&rfe->v) != 0) +++ rc = -1; +++ +++ free(rfe); +++ +++ if (rc == 0) +++ return 0; +++ +++ av_log(NULL, AV_LOG_ERROR, "vcsm_clean_invalid failed: errno=%d\n", errno); +++ return rc; +++} +++ +++inline void rpi_cache_flush_add_gm_blocks(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const unsigned int mode, +++ const unsigned int offset0, const unsigned int block_size, const unsigned int blocks, const unsigned int block_stride) +++{ +++ struct vcsm_user_clean_invalid2_block_s * const b = rfe->v.s + rfe->v.op_count++; +++ +++ av_assert0(rfe->v.op_count <= CACHE_EL_MAX); +++ +++ b->invalidate_mode = mode; +++ b->block_count = blocks; +++ b->start_address = gm->arm + offset0; +++ b->block_size = block_size; +++ b->inter_block_stride = block_stride; +++} +++ +++void rpi_cache_flush_add_gm_range(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const unsigned int mode, +++ const unsigned int offset, const unsigned int size) +++{ +++ // Deal with empty pointer trivially +++ if (gm == NULL || size == 0) +++ return; +++ +++ av_assert0(offset <= gm->numbytes); +++ av_assert0(size <= gm->numbytes); +++ av_assert0(offset + size <= gm->numbytes); +++ +++ rpi_cache_flush_add_gm_blocks(rfe, gm, mode, offset, size, 1, 0); +++} +++ +++void rpi_cache_flush_add_gm_ptr(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const unsigned int mode) +++{ +++ rpi_cache_flush_add_gm_blocks(rfe, gm, mode, 0, gm->numbytes, 1, 0); +++} +++ +++ +++void rpi_cache_flush_add_frame(rpi_cache_flush_env_t * const rfe, const AVFrame * const frame, const unsigned int mode) +++{ +++#if !RPI_ONE_BUF +++#error Fixme! (NIF) +++#endif +++ if (gpu_is_buf1(frame)) { +++ rpi_cache_flush_add_gm_ptr(rfe, gpu_buf1_gmem(frame), mode); +++ } +++ else +++ { +++ rpi_cache_flush_add_gm_ptr(rfe, gpu_buf3_gmem(frame, 0), mode); +++ rpi_cache_flush_add_gm_ptr(rfe, gpu_buf3_gmem(frame, 1), mode); +++ rpi_cache_flush_add_gm_ptr(rfe, gpu_buf3_gmem(frame, 2), mode); +++ } +++} +++ +++// Call this to clean and invalidate a region of memory +++void rpi_cache_flush_one_gm_ptr(const GPU_MEM_PTR_T *const p, const rpi_cache_flush_mode_t mode) +++{ +++ rpi_cache_flush_env_t * rfe = rpi_cache_flush_init(); +++ rpi_cache_flush_add_gm_ptr(rfe, p, mode); +++ rpi_cache_flush_finish(rfe); +++} +++ +++ +++// ---------------------------------------------------------------------------- +++ +++#endif // RPI ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++new file mode 100644 ++index 0000000000..485a08f8ba ++--- /dev/null +++++ b/libavcodec/rpi_qpu.h ++@@ -0,0 +1,206 @@ +++#ifndef RPI_QPU_H +++#define RPI_QPU_H +++ +++#define RPI_ONE_BUF 1 +++ +++typedef struct gpu_mem_ptr_s { +++ unsigned char *arm; // Pointer to memory mapped on ARM side +++ int vc_handle; // Videocore handle of relocatable memory +++ int vcsm_handle; // Handle for use by VCSM +++ int vc; // Address for use in GPU code +++ int numbytes; // Size of memory block +++} GPU_MEM_PTR_T; +++ +++// General GPU functions +++extern int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p); +++extern int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p); +++extern void gpu_free(GPU_MEM_PTR_T * const p); +++ +++#include "libavutil/frame.h" +++#if !RPI_ONE_BUF +++static inline uint32_t get_vc_address_y(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[0]); +++ return p->vc; +++} +++ +++static inline uint32_t get_vc_address_u(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ return p->vc; +++} +++ +++static inline uint32_t get_vc_address_v(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[2]); +++ return p->vc; +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_y(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[0]); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_u(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[1]); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_v(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[2]); +++} +++ +++#else +++ +++static inline int gpu_is_buf1(const AVFrame * const frame) +++{ +++ return frame->buf[1] == NULL; +++} +++ +++static inline GPU_MEM_PTR_T * gpu_buf1_gmem(const AVFrame * const frame) +++{ +++ return av_buffer_get_opaque(frame->buf[0]); +++} +++ +++static inline GPU_MEM_PTR_T * gpu_buf3_gmem(const AVFrame * const frame, const unsigned int n) +++{ +++ return av_buffer_pool_opaque(frame->buf[n]); +++} +++ +++static inline uint32_t get_vc_address3(const AVFrame * const frame, const unsigned int n) +++{ +++ const GPU_MEM_PTR_T * const gm = gpu_is_buf1(frame) ? gpu_buf1_gmem(frame) : gpu_buf3_gmem(frame, n); +++ return gm->vc + (frame->data[n] - gm->arm); +++} +++ +++ +++static inline uint32_t get_vc_address_y(const AVFrame * const frame) { +++ return get_vc_address3(frame, 0); +++} +++ +++static inline uint32_t get_vc_address_u(const AVFrame * const frame) { +++ return get_vc_address3(frame, 1); +++} +++ +++static inline uint32_t get_vc_address_v(const AVFrame * const frame) { +++ return get_vc_address3(frame, 2); +++} +++ +++#if 0 +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_y(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.numbytes = frame->data[1] - frame->data[0]; +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 0); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_u(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.arm += frame->data[1] - frame->data[0]; +++ g.vc += frame->data[1] - frame->data[0]; +++ g.numbytes = frame->data[2] - frame->data[1]; // chroma size +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 1); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_v(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.arm += frame->data[2] - frame->data[0]; +++ g.vc += frame->data[2] - frame->data[0]; +++ g.numbytes = frame->data[2] - frame->data[1]; // chroma size +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 2); +++} +++#endif +++#endif +++ +++// Cache flush stuff +++ +++struct rpi_cache_flush_env_s; +++typedef struct rpi_cache_flush_env_s rpi_cache_flush_env_t; +++ +++rpi_cache_flush_env_t * rpi_cache_flush_init(void); +++// Free env without flushing +++void rpi_cache_flush_abort(rpi_cache_flush_env_t * const rfe); +++// Do the accumulated flush & free the env +++int rpi_cache_flush_finish(rpi_cache_flush_env_t * const rfe); +++ +++typedef enum +++{ +++ RPI_CACHE_FLUSH_MODE_INVALIDATE = 1, +++ RPI_CACHE_FLUSH_MODE_WRITEBACK = 2, +++ RPI_CACHE_FLUSH_MODE_WB_INVALIDATE = 3 +++} rpi_cache_flush_mode_t; +++ +++void rpi_cache_flush_add_gm_ptr(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const rpi_cache_flush_mode_t mode); +++void rpi_cache_flush_add_gm_range(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const rpi_cache_flush_mode_t mode, +++ const unsigned int offset, const unsigned int size); +++void rpi_cache_flush_add_gm_blocks(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const unsigned int mode, +++ const unsigned int offset0, const unsigned int block_size, const unsigned int blocks, const unsigned int block_stride); +++void rpi_cache_flush_add_frame(rpi_cache_flush_env_t * const rfe, const AVFrame * const frame, const rpi_cache_flush_mode_t mode); +++void rpi_cache_flush_add_frame_block(rpi_cache_flush_env_t * const rfe, const AVFrame * const frame, const rpi_cache_flush_mode_t mode, +++ const unsigned int x0, const unsigned int y0, const unsigned int width, const unsigned int height, +++ const unsigned int uv_shift, const int do_luma, const int do_chroma); +++ +++// init, add, finish for one gm ptr +++void rpi_cache_flush_one_gm_ptr(const GPU_MEM_PTR_T * const p, const rpi_cache_flush_mode_t mode); +++ +++ +++// QPU specific functions +++ +++typedef struct HEVCRpiQpu { +++ uint32_t c_pxx; +++ uint32_t c_pxx_l1; +++ uint32_t c_bxx; +++ uint32_t y_pxx; +++ uint32_t y_bxx; +++ uint32_t y_p00; +++ uint32_t y_b00; +++} HEVCRpiQpu; +++ +++int rpi_hevc_qpu_init_fn(HEVCRpiQpu * const qf, const unsigned int bit_depth); +++ +++uint32_t qpu_fn(const int * const mc_fn); +++ +++#define QPU_N_GRP 4 +++#define QPU_N_MAX 12 +++ +++#define QPU_MAIL_EL_VALS 2 +++ +++struct vpu_qpu_wait_s; +++typedef struct vq_wait_s * vpu_qpu_wait_h; +++ +++// VPU specific functions +++ +++struct vpu_qpu_job_env_s; +++typedef struct vpu_qpu_job_env_s * vpu_qpu_job_h; +++ +++vpu_qpu_job_h vpu_qpu_job_new(void); +++void vpu_qpu_job_delete(const vpu_qpu_job_h vqj); +++void vpu_qpu_job_add_vpu(const vpu_qpu_job_h vqj, const uint32_t vpu_code, +++ const unsigned r0, const unsigned r1, const unsigned r2, const unsigned r3, const unsigned r4, const unsigned r5); +++void vpu_qpu_job_add_qpu(const vpu_qpu_job_h vqj, const unsigned int n, const uint32_t * const mail); +++void vpu_qpu_job_add_sync_this(const vpu_qpu_job_h vqj, vpu_qpu_wait_h * const wait_h); +++int vpu_qpu_job_start(const vpu_qpu_job_h vqj); +++int vpu_qpu_job_finish(const vpu_qpu_job_h vqj); +++ +++extern unsigned int vpu_get_fn(const unsigned int bit_depth); +++extern unsigned int vpu_get_constants(void); +++ +++// Waits for previous post_codee to complete and Will null out *wait_h after use +++void vpu_qpu_wait(vpu_qpu_wait_h * const wait_h); +++int vpu_qpu_init(void); +++void vpu_qpu_term(void); +++ +++extern int gpu_get_mailbox(void); +++void gpu_ref(void); +++void gpu_unref(void); +++ +++#endif ++diff --git a/libavcodec/rpi_zc.c b/libavcodec/rpi_zc.c ++new file mode 100644 ++index 0000000000..3bf1da4083 ++--- /dev/null +++++ b/libavcodec/rpi_zc.c ++@@ -0,0 +1,743 @@ +++#include "config.h" +++#if 1 //defined(RPI) //|| defined (RPI_DISPLAY) +++#include "libavcodec/avcodec.h" +++#include "rpi_qpu.h" +++#include "rpi_mailbox.h" +++#include "rpi_zc.h" +++#include "libavutil/avassert.h" +++#include +++ +++#include "libavutil/buffer_internal.h" +++#include +++ +++#define TRACE_ALLOC 0 +++ +++struct ZcPoolEnt; +++ +++typedef struct ZcPool +++{ +++ int numbytes; +++ unsigned int n; +++ struct ZcPoolEnt * head; +++ pthread_mutex_t lock; +++} ZcPool; +++ +++typedef struct ZcPoolEnt +++{ +++ // It is important that we start with gmem as other bits of code will expect to see that +++ GPU_MEM_PTR_T gmem; +++ unsigned int n; +++ struct ZcPoolEnt * next; +++ struct ZcPool * pool; +++} ZcPoolEnt; +++ +++#define ALLOC_PAD 0 +++#define ALLOC_ROUND 0x1000 +++#define ALLOC_N_OFFSET 0 +++#define STRIDE_ROUND 64 +++#define STRIDE_OR 0 +++ +++#define DEBUG_ZAP0_BUFFERS 0 +++ +++static inline int av_rpi_is_sand_format(const int format) +++{ +++ return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_SAND64_16) || +++ (format == AV_PIX_FMT_RPI4_8 || format == AV_PIX_FMT_RPI4_10); +++} +++ +++static inline int av_rpi_is_sand_frame(const AVFrame * const frame) +++{ +++ return av_rpi_is_sand_format(frame->format); +++} +++ +++static ZcPoolEnt * zc_pool_ent_alloc(ZcPool * const pool, const unsigned int req_size) +++{ +++ ZcPoolEnt * const zp = av_malloc(sizeof(ZcPoolEnt)); +++ +++ // Round up to 4k & add 4k +++ const unsigned int alloc_size = (req_size + ALLOC_PAD + ALLOC_ROUND - 1) & ~(ALLOC_ROUND - 1); +++ +++ if (zp == NULL) { +++ av_log(NULL, AV_LOG_ERROR, "av_malloc(ZcPoolEnt) failed\n"); +++ goto fail0; +++ } +++ +++ if (gpu_malloc_cached(alloc_size, &zp->gmem) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_gpu_malloc_cached(%d) failed\n", alloc_size); +++ goto fail1; +++ } +++ +++#if TRACE_ALLOC +++ printf("%s: Alloc %#x bytes @ %p\n", __func__, zp->gmem.numbytes, zp->gmem.arm); +++#endif +++ +++ pool->numbytes = zp->gmem.numbytes; +++ zp->next = NULL; +++ zp->pool = pool; +++ zp->n = pool->n++; +++ return zp; +++ +++fail1: +++ av_free(zp); +++fail0: +++ return NULL; +++} +++ +++static void zc_pool_ent_free(ZcPoolEnt * const zp) +++{ +++#if TRACE_ALLOC +++ printf("%s: Free %#x bytes @ %p\n", __func__, zp->gmem.numbytes, zp->gmem.arm); +++#endif +++ +++ gpu_free(&zp->gmem); +++ av_free(zp); +++} +++ +++static void zc_pool_flush(ZcPool * const pool) +++{ +++ ZcPoolEnt * p = pool->head; +++ pool->head = NULL; +++ pool->numbytes = -1; +++ +++ while (p != NULL) +++ { +++ ZcPoolEnt * const zp = p; +++ p = p->next; +++ zc_pool_ent_free(zp); +++ } +++} +++ +++static ZcPoolEnt * zc_pool_alloc(ZcPool * const pool, const int req_bytes) +++{ +++ ZcPoolEnt * zp; +++ int numbytes; +++ +++ pthread_mutex_lock(&pool->lock); +++ +++ numbytes = pool->numbytes; +++ +++ // If size isn't close then dump the pool +++ // Close in this context means within 128k +++ if (req_bytes > numbytes || req_bytes + 0x20000 < numbytes) +++ { +++ zc_pool_flush(pool); +++ numbytes = req_bytes; +++ } +++ +++ if (pool->head != NULL) +++ { +++ zp = pool->head; +++ pool->head = zp->next; +++ } +++ else +++ { +++ zp = zc_pool_ent_alloc(pool, numbytes); +++ } +++ +++ pthread_mutex_unlock(&pool->lock); +++ +++ // Start with our buffer empty of preconceptions +++// rpi_cache_flush_one_gm_ptr(&zp->gmem, RPI_CACHE_FLUSH_MODE_INVALIDATE); +++ +++ return zp; +++} +++ +++static void zc_pool_free(ZcPoolEnt * const zp) +++{ +++ ZcPool * const pool = zp == NULL ? NULL : zp->pool; +++ if (zp != NULL) +++ { +++ pthread_mutex_lock(&pool->lock); +++#if TRACE_ALLOC +++ printf("%s: Recycle %#x, %#x\n", __func__, pool->numbytes, zp->gmem.numbytes); +++#endif +++ +++ if (pool->numbytes == zp->gmem.numbytes) +++ { +++ zp->next = pool->head; +++ pool->head = zp; +++ pthread_mutex_unlock(&pool->lock); +++ } +++ else +++ { +++ pthread_mutex_unlock(&pool->lock); +++ zc_pool_ent_free(zp); +++ } +++ } +++} +++ +++static void +++zc_pool_init(ZcPool * const pool) +++{ +++ pool->numbytes = -1; +++ pool->head = NULL; +++ pthread_mutex_init(&pool->lock, NULL); +++} +++ +++static void +++zc_pool_destroy(ZcPool * const pool) +++{ +++ pool->numbytes = -1; +++ zc_pool_flush(pool); +++ pthread_mutex_destroy(&pool->lock); +++} +++ +++typedef struct ZcOldCtxVals +++{ +++ int thread_safe_callbacks; +++ int (*get_buffer2)(struct AVCodecContext *s, AVFrame *frame, int flags); +++ void * get_buffer_context; +++} ZcOldCtxVals; +++ +++typedef struct AVZcEnv +++{ +++ unsigned int refcount; +++ ZcPool pool; +++ ZcOldCtxVals old; +++} ZcEnv; +++ +++// Callback when buffer unrefed to zero +++static void rpi_free_display_buffer(void *opaque, uint8_t *data) +++{ +++ ZcPoolEnt *const zp = opaque; +++// printf("%s: data=%p\n", __func__, data); +++ zc_pool_free(zp); +++} +++ +++static inline GPU_MEM_PTR_T * pic_gm_ptr(AVBufferRef * const buf) +++{ +++ // Kludge where we check the free fn to check this is really +++ // one of our buffers - can't think of a better way +++ return buf == NULL || buf->buffer->free != rpi_free_display_buffer ? NULL : +++ av_buffer_get_opaque(buf); +++} +++ +++AVRpiZcFrameGeometry av_rpi_zc_frame_geometry( +++ const int format, const unsigned int video_width, const unsigned int video_height) +++{ +++ AVRpiZcFrameGeometry geo; +++ +++ switch (format) +++ { +++ case AV_PIX_FMT_YUV420P: +++ geo.stride_y = ((video_width + 32 + STRIDE_ROUND - 1) & ~(STRIDE_ROUND - 1)) | STRIDE_OR; +++ geo.stride_c = geo.stride_y / 2; +++ geo.height_y = (video_height + 32 + 31) & ~31; +++ geo.height_c = geo.height_y / 2; +++ geo.planes_c = 2; +++ geo.stripes = 1; +++ geo.bytes_per_pel = 1; +++ geo.stripe_is_yc = 1; +++ break; +++ +++ case AV_PIX_FMT_YUV420P10: +++ geo.stride_y = ((video_width * 2 + 64 + STRIDE_ROUND - 1) & ~(STRIDE_ROUND - 1)) | STRIDE_OR; +++ geo.stride_c = geo.stride_y / 2; +++ geo.height_y = (video_height + 32 + 31) & ~31; +++ geo.height_c = geo.height_y / 2; +++ geo.planes_c = 2; +++ geo.stripes = 1; +++ geo.bytes_per_pel = 2; +++ geo.stripe_is_yc = 1; +++ break; +++ +++ case AV_PIX_FMT_SAND128: +++ case AV_PIX_FMT_RPI4_8: +++ { +++ const unsigned int stripe_w = 128; +++ +++ static pthread_mutex_t sand_lock = PTHREAD_MUTEX_INITIALIZER; +++ static VC_IMAGE_T img = {0}; +++ +++ // Given the overhead of calling the mailbox keep a stashed +++ // copy as we will almost certainly just want the same numbers again +++ // but that means we need a lock +++ pthread_mutex_lock(&sand_lock); +++ +++ if (img.width != video_width || img.height != video_height) +++ { +++ VC_IMAGE_T new_img = { +++ .type = VC_IMAGE_YUV_UV, +++ .width = video_width, +++ .height = video_height +++ }; +++ +++ gpu_ref(); +++ mbox_get_image_params(gpu_get_mailbox(), &new_img); +++ gpu_unref(); +++ img = new_img; +++ } +++ +++ geo.stride_y = stripe_w; +++ geo.stride_c = stripe_w; +++ geo.height_y = ((intptr_t)img.extra.uv.u - (intptr_t)img.image_data) / stripe_w; +++ geo.height_c = img.pitch / stripe_w - geo.height_y; +++ geo.stripe_is_yc = 1; +++ if (geo.height_y * stripe_w > img.pitch) +++ { +++ // "tall" sand - all C blocks now follow Y +++ geo.height_y = img.pitch / stripe_w; +++ geo.height_c = geo.height_y; +++ geo.stripe_is_yc = 0; +++ } +++ geo.planes_c = 1; +++ geo.stripes = (video_width + stripe_w - 1) / stripe_w; +++ geo.bytes_per_pel = 1; +++ +++ pthread_mutex_unlock(&sand_lock); +++#if 0 +++ printf("Req: %dx%d: stride=%d/%d, height=%d/%d, stripes=%d, img.pitch=%d\n", +++ video_width, video_height, +++ geo.stride_y, geo.stride_c, +++ geo.height_y, geo.height_c, +++ geo.stripes, img.pitch); +++#endif +++ av_assert0((int)geo.height_y > 0 && (int)geo.height_c > 0); +++ av_assert0(geo.height_y >= video_height && geo.height_c >= video_height / 2); +++ break; +++ } +++ +++ case AV_PIX_FMT_RPI4_10: +++ { +++ const unsigned int stripe_w = 128; // bytes +++ +++ static pthread_mutex_t sand_lock = PTHREAD_MUTEX_INITIALIZER; +++ static VC_IMAGE_T img = {0}; +++ +++ // Given the overhead of calling the mailbox keep a stashed +++ // copy as we will almost certainly just want the same numbers again +++ // but that means we need a lock +++ pthread_mutex_lock(&sand_lock); +++ +++ if (img.width != video_width || img.height != video_height) +++ { +++ VC_IMAGE_T new_img = { +++ .type = VC_IMAGE_YUV10COL, +++ .width = video_width, +++ .height = video_height +++ }; +++ +++ gpu_ref(); +++ mbox_get_image_params(gpu_get_mailbox(), &new_img); +++ gpu_unref(); +++ img = new_img; +++ } +++ +++ geo.stride_y = stripe_w; +++ geo.stride_c = stripe_w; +++ geo.height_y = ((intptr_t)img.extra.uv.u - (intptr_t)img.image_data) / stripe_w; +++ geo.height_c = img.pitch / stripe_w - geo.height_y; +++ geo.planes_c = 1; +++ geo.stripes = ((video_width * 4 + 2) / 3 + stripe_w - 1) / stripe_w; +++ geo.bytes_per_pel = 1; +++ geo.stripe_is_yc = 1; +++ +++ pthread_mutex_unlock(&sand_lock); +++ +++ av_assert0((int)geo.height_y > 0 && (int)geo.height_c > 0); +++ av_assert0(geo.height_y >= video_height && geo.height_c >= video_height / 2); +++ break; +++ } +++ +++ case AV_PIX_FMT_SAND64_16: +++ case AV_PIX_FMT_SAND64_10: +++ { +++ const unsigned int stripe_w = 128; // bytes +++ +++ static pthread_mutex_t sand_lock = PTHREAD_MUTEX_INITIALIZER; +++ static VC_IMAGE_T img = {0}; +++ +++ // Given the overhead of calling the mailbox keep a stashed +++ // copy as we will almost certainly just want the same numbers again +++ // but that means we need a lock +++ pthread_mutex_lock(&sand_lock); +++ +++ if (img.width != video_width || img.height != video_height) +++ { +++ VC_IMAGE_T new_img = { +++ .type = VC_IMAGE_YUV_UV_16, +++ .width = video_width, +++ .height = video_height +++ }; +++ +++ gpu_ref(); +++ mbox_get_image_params(gpu_get_mailbox(), &new_img); +++ gpu_unref(); +++ img = new_img; +++ } +++ +++ geo.stride_y = stripe_w; +++ geo.stride_c = stripe_w; +++ geo.height_y = ((intptr_t)img.extra.uv.u - (intptr_t)img.image_data) / stripe_w; +++ geo.height_c = img.pitch / stripe_w - geo.height_y; +++ geo.planes_c = 1; +++ geo.stripes = (video_width * 2 + stripe_w - 1) / stripe_w; +++ geo.bytes_per_pel = 2; +++ geo.stripe_is_yc = 1; +++ +++ pthread_mutex_unlock(&sand_lock); +++ break; +++ } +++ +++ default: +++ memset(&geo, 0, sizeof(geo)); +++ break; +++ } +++ return geo; +++} +++ +++ +++static AVBufferRef * rpi_buf_pool_alloc(ZcPool * const pool, int size) +++{ +++ ZcPoolEnt *const zp = zc_pool_alloc(pool, size); +++ AVBufferRef * buf; +++ intptr_t idata = (intptr_t)zp->gmem.arm; +++#if ALLOC_N_OFFSET != 0 +++ intptr_t noff = (zp->n * ALLOC_N_OFFSET) & (ALLOC_PAD - 1); +++#endif +++ +++ if (zp == NULL) { +++ av_log(NULL, AV_LOG_ERROR, "zc_pool_alloc(%d) failed\n", size); +++ goto fail0; +++ } +++ +++#if ALLOC_N_OFFSET != 0 +++ idata = ((idata & ~(ALLOC_PAD - 1)) | noff) + (((idata & (ALLOC_PAD - 1)) > noff) ? ALLOC_PAD : 0); +++#endif +++ +++#if DEBUG_ZAP0_BUFFERS +++ memset((void*)idata, 0, size); +++#endif +++ +++ if ((buf = av_buffer_create((void *)idata, size, rpi_free_display_buffer, zp, AV_BUFFER_FLAG_READONLY)) == NULL) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_buffer_create() failed\n"); +++ goto fail2; +++ } +++ +++ return buf; +++ +++fail2: +++ zc_pool_free(zp); +++fail0: +++ return NULL; +++} +++ +++static int rpi_get_display_buffer(ZcEnv *const zc, AVFrame * const frame) +++{ +++ const AVRpiZcFrameGeometry geo = av_rpi_zc_frame_geometry(frame->format, frame->width, frame->height); +++ const unsigned int size_y = geo.stride_y * geo.height_y; +++ const unsigned int size_c = geo.stride_c * geo.height_c; +++ const unsigned int size_pic = (size_y + size_c * geo.planes_c) * geo.stripes; +++ AVBufferRef * buf; +++ unsigned int i; +++ +++// printf("Do local alloc: format=%#x, %dx%d: %u\n", frame->format, frame->width, frame->height, size_pic); +++ +++ if ((buf = rpi_buf_pool_alloc(&zc->pool, size_pic)) == NULL) +++ { +++ av_log(NULL, AV_LOG_ERROR, "rpi_get_display_buffer: Failed to get buffer from pool\n"); +++ return AVERROR(ENOMEM); +++ } +++ +++ for (i = 0; i < AV_NUM_DATA_POINTERS; i++) { +++ frame->buf[i] = NULL; +++ frame->data[i] = NULL; +++ frame->linesize[i] = 0; +++ } +++ +++ frame->buf[0] = buf; +++ +++ frame->linesize[0] = geo.stride_y; +++ frame->linesize[1] = geo.stride_c; +++ frame->linesize[2] = geo.stride_c; +++ // abuse: linesize[3] = "stripe stride" +++ // stripe_stride is NOT the stride between slices it is (that / geo.stride_y). +++ // In a general case this makes the calculation an xor and multiply rather +++ // than a divide and multiply +++ if (geo.stripes > 1) +++ frame->linesize[3] = geo.stripe_is_yc ? geo.height_y + geo.height_c : geo.height_y; +++ +++ frame->data[0] = buf->data; +++ frame->data[1] = frame->data[0] + (geo.stripe_is_yc ? size_y : size_y * geo.stripes); +++ if (geo.planes_c > 1) +++ frame->data[2] = frame->data[1] + size_c; +++ +++ frame->extended_data = frame->data; +++ // Leave extended buf alone +++ +++#if RPI_ZC_SAND_8_IN_10_BUF != 0 +++ // *** If we intend to use this for real we will want a 2nd buffer pool +++ frame->buf[RPI_ZC_SAND_8_IN_10_BUF] = rpi_buf_pool_alloc(&zc->pool, size_pic); // *** 2 * wanted size - kludge +++#endif +++ +++ return 0; +++} +++ +++#define RPI_GET_BUFFER2 1 +++ +++int av_rpi_zc_get_buffer2(struct AVCodecContext *s, AVFrame *frame, int flags) +++{ +++#if !RPI_GET_BUFFER2 +++ return avcodec_default_get_buffer2(s, frame, flags); +++#else +++ int rv; +++ +++ if ((s->codec->capabilities & AV_CODEC_CAP_DR1) == 0) +++ { +++// printf("Do default alloc: format=%#x\n", frame->format); +++ rv = avcodec_default_get_buffer2(s, frame, flags); +++ } +++ else if (frame->format == AV_PIX_FMT_YUV420P || +++ av_rpi_is_sand_frame(frame)) +++ { +++ rv = rpi_get_display_buffer(s->get_buffer_context, frame); +++ } +++ else +++ { +++ rv = avcodec_default_get_buffer2(s, frame, flags); +++ } +++ +++#if 0 +++ printf("%s: fmt:%d, %dx%d lsize=%d/%d/%d/%d data=%p/%p/%p bref=%p/%p/%p opaque[0]=%p\n", __func__, +++ frame->format, frame->width, frame->height, +++ frame->linesize[0], frame->linesize[1], frame->linesize[2], frame->linesize[3], +++ frame->data[0], frame->data[1], frame->data[2], +++ frame->buf[0], frame->buf[1], frame->buf[2], +++ av_buffer_get_opaque(frame->buf[0])); +++#endif +++ return rv; +++#endif +++} +++ +++ +++static AVBufferRef * zc_copy(struct AVCodecContext * const s, +++ const AVFrame * const src) +++{ +++ AVFrame dest_frame; +++ AVFrame * const dest = &dest_frame; +++ unsigned int i; +++ uint8_t * psrc, * pdest; +++ +++ dest->format = src->format; +++ dest->width = src->width; +++ dest->height = src->height; +++ +++ if (rpi_get_display_buffer(s->get_buffer_context, dest) != 0) +++ { +++ return NULL; +++ } +++ +++ for (i = 0, psrc = src->data[0], pdest = dest->data[0]; +++ i != dest->height; +++ ++i, psrc += src->linesize[0], pdest += dest->linesize[0]) +++ { +++ memcpy(pdest, psrc, dest->width); +++ } +++ for (i = 0, psrc = src->data[1], pdest = dest->data[1]; +++ i != dest->height / 2; +++ ++i, psrc += src->linesize[1], pdest += dest->linesize[1]) +++ { +++ memcpy(pdest, psrc, dest->width / 2); +++ } +++ for (i = 0, psrc = src->data[2], pdest = dest->data[2]; +++ i != dest->height / 2; +++ ++i, psrc += src->linesize[2], pdest += dest->linesize[2]) +++ { +++ memcpy(pdest, psrc, dest->width / 2); +++ } +++ +++ return dest->buf[0]; +++} +++ +++ +++static AVBufferRef * zc_420p10_to_sand128(struct AVCodecContext * const s, +++ const AVFrame * const src) +++{ +++ assert(0); +++ return NULL; +++} +++ +++ +++static AVBufferRef * zc_sand64_16_to_sand128(struct AVCodecContext * const s, +++ const AVFrame * const src, const unsigned int src_bits) +++{ +++ assert(0); +++ return NULL; +++} +++ +++ +++ +++AVRpiZcRefPtr av_rpi_zc_ref(struct AVCodecContext * const s, +++ const AVFrame * const frame, const enum AVPixelFormat expected_format, const int maycopy) +++{ +++ assert(s != NULL); +++ +++ if (frame->format != AV_PIX_FMT_YUV420P && +++ frame->format != AV_PIX_FMT_YUV420P10 && +++ !av_rpi_is_sand_frame(frame)) +++ { +++ av_log(s, AV_LOG_WARNING, "%s: *** Format not SAND/YUV420P: %d\n", __func__, frame->format); +++ return NULL; +++ } +++ +++ if (frame->buf[1] != NULL || frame->format != expected_format) +++ { +++#if RPI_ZC_SAND_8_IN_10_BUF +++ if (frame->format == AV_PIX_FMT_SAND64_10 && expected_format == AV_PIX_FMT_SAND128 && frame->buf[RPI_ZC_SAND_8_IN_10_BUF] != NULL) +++ { +++// av_log(s, AV_LOG_INFO, "%s: --- found buf[4]\n", __func__); +++ return av_buffer_ref(frame->buf[RPI_ZC_SAND_8_IN_10_BUF]); +++ } +++#endif +++ +++ if (maycopy) +++ { +++ if (frame->buf[1] != NULL) +++ av_log(s, AV_LOG_INFO, "%s: *** Not a single buf frame: copying\n", __func__); +++ else +++ av_log(s, AV_LOG_INFO, "%s: *** Unexpected frame format %d: copying to %d\n", __func__, frame->format, expected_format); +++ +++ switch (frame->format) +++ { +++ case AV_PIX_FMT_YUV420P10: +++ return zc_420p10_to_sand128(s, frame); +++ +++ case AV_PIX_FMT_SAND64_10: +++ return zc_sand64_16_to_sand128(s, frame, 10); +++ +++ default: +++ return zc_copy(s, frame); +++ } +++ } +++ else +++ { +++ if (frame->buf[1] != NULL) +++ av_log(s, AV_LOG_WARNING, "%s: *** Not a single buf frame: buf[1] != NULL\n", __func__); +++ else +++ av_log(s, AV_LOG_INFO, "%s: *** Unexpected frame format: %d != %d\n", __func__, frame->format, expected_format); +++ return NULL; +++ } +++ } +++ +++ if (pic_gm_ptr(frame->buf[0]) == NULL) +++ { +++ if (maycopy) +++ { +++ av_log(s, AV_LOG_INFO, "%s: *** Not one of our buffers: copying\n", __func__); +++ return zc_copy(s, frame); +++ } +++ else +++ { +++ av_log(s, AV_LOG_WARNING, "%s: *** Not one of our buffers: NULL\n", __func__); +++ return NULL; +++ } +++ } +++ +++ return av_buffer_ref(frame->buf[0]); +++} +++ +++int av_rpi_zc_vc_handle(const AVRpiZcRefPtr fr_ref) +++{ +++ const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref); +++ return p == NULL ? -1 : p->vc_handle; +++} +++ +++int av_rpi_zc_offset(const AVRpiZcRefPtr fr_ref) +++{ +++ const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref); +++ return p == NULL ? 0 : fr_ref->data - p->arm; +++} +++ +++int av_rpi_zc_length(const AVRpiZcRefPtr fr_ref) +++{ +++ return fr_ref == NULL ? 0 : fr_ref->size; +++} +++ +++ +++int av_rpi_zc_numbytes(const AVRpiZcRefPtr fr_ref) +++{ +++ const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref); +++ return p == NULL ? 0 : p->numbytes; +++} +++ +++void av_rpi_zc_unref(AVRpiZcRefPtr fr_ref) +++{ +++ if (fr_ref != NULL) +++ { +++ av_buffer_unref(&fr_ref); +++ } +++} +++ +++AVZcEnvPtr av_rpi_zc_env_alloc(void) +++{ +++ ZcEnv * const zc = av_mallocz(sizeof(ZcEnv)); +++ if (zc == NULL) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_rpi_zc_env_alloc: Context allocation failed\n"); +++ return NULL; +++ } +++ +++ zc_pool_init(&zc->pool); +++ return zc; +++} +++ +++void av_rpi_zc_env_free(AVZcEnvPtr zc) +++{ +++ if (zc != NULL) +++ { +++ zc_pool_destroy(&zc->pool); ; +++ av_free(zc); +++ } +++} +++ +++int av_rpi_zc_in_use(const struct AVCodecContext * const s) +++{ +++ return s->get_buffer2 == av_rpi_zc_get_buffer2; +++} +++ +++int av_rpi_zc_init(struct AVCodecContext * const s) +++{ +++ if (av_rpi_zc_in_use(s)) +++ { +++ ZcEnv * const zc = s->get_buffer_context; +++ ++zc->refcount; +++ } +++ else +++ { +++ ZcEnv *const zc = av_rpi_zc_env_alloc(); +++ if (zc == NULL) +++ { +++ return AVERROR(ENOMEM); +++ } +++ +++ zc->refcount = 1; +++ zc->old.get_buffer_context = s->get_buffer_context; +++ zc->old.get_buffer2 = s->get_buffer2; +++ zc->old.thread_safe_callbacks = s->thread_safe_callbacks; +++ +++ s->get_buffer_context = zc; +++ s->get_buffer2 = av_rpi_zc_get_buffer2; +++ s->thread_safe_callbacks = 1; +++ } +++ return 0; +++} +++ +++void av_rpi_zc_uninit(struct AVCodecContext * const s) +++{ +++ if (av_rpi_zc_in_use(s)) +++ { +++ ZcEnv * const zc = s->get_buffer_context; +++ if (--zc->refcount == 0) +++ { +++ s->get_buffer2 = zc->old.get_buffer2; +++ s->get_buffer_context = zc->old.get_buffer_context; +++ s->thread_safe_callbacks = zc->old.thread_safe_callbacks; +++ av_rpi_zc_env_free(zc); +++ } +++ } +++} +++ +++#endif // RPI +++ ++diff --git a/libavcodec/rpi_zc.h b/libavcodec/rpi_zc.h ++new file mode 100644 ++index 0000000000..0e39b8e3b3 ++--- /dev/null +++++ b/libavcodec/rpi_zc.h ++@@ -0,0 +1,106 @@ +++#ifndef LIBAVCODEC_RPI_ZC_H +++#define LIBAVCODEC_RPI_ZC_H +++ +++// Zero-Copy frame code for RPi +++// RPi needs Y/U/V planes to be contiguous for display. By default +++// ffmpeg will allocate separated planes so a memcpy is needed before +++// display. This code provides a method a making ffmpeg allocate a single +++// bit of memory for the frame when can then be reference counted until +++// display has finished with it. +++ +++// Frame buffer number in which to stuff an 8-bit copy of a 16-bit frame +++// 0 disables +++// *** This option still in development +++// Only works if SAO active +++// Allocates buffers that are twice the required size +++#define RPI_ZC_SAND_8_IN_10_BUF 0 +++ +++struct AVBufferRef; +++struct AVFrame; +++struct AVCodecContext; +++enum AVPixelFormat; +++ +++// "Opaque" pointer to whatever we are using as a buffer reference +++typedef struct AVBufferRef * AVRpiZcRefPtr; +++ +++struct AVZcEnv; +++typedef struct AVZcEnv * AVZcEnvPtr; +++ +++typedef struct AVRpiZcFrameGeometry +++{ +++ unsigned int stride_y; // Luma stride (bytes) +++ unsigned int height_y; // Luma height (lines) +++ unsigned int stride_c; // Chroma stride (bytes) +++ unsigned int height_c; // Chroma stride (lines) +++ unsigned int planes_c; // Chroma plane count (U, V = 2, interleaved = 1) +++ unsigned int stripes; // Number of stripes (sand) +++ unsigned int bytes_per_pel; +++ int stripe_is_yc; // A single stripe is Y then C (false for tall sand) +++} AVRpiZcFrameGeometry; +++ +++ +++AVRpiZcFrameGeometry av_rpi_zc_frame_geometry( +++ const int format, +++ const unsigned int video_width, const unsigned int video_height); +++ +++// Replacement fn for avctx->get_buffer2 +++// Should be set before calling avcodec_decode_open2 +++// +++// N.B. in addition to to setting avctx->get_buffer2, avctx->refcounted_frames +++// must be set to 1 as otherwise the buffer info is killed before being returned +++// by avcodec_decode_video2. Note also that this means that the AVFrame that is +++// returned must be manually derefed with av_frame_unref. This should be done +++// after av_rpi_zc_ref has been called. +++int av_rpi_zc_get_buffer2(struct AVCodecContext *s, AVFrame *frame, int flags); +++ +++// Generate a ZC reference to the buffer(s) in this frame +++// If the buffer doesn't appear to be one allocated by _get_buffer_2 +++// then the behaviour depends on maycopy: +++// If maycopy=0 then return NULL +++// If maycopy=1 && the src frame is in a form where we can easily copy +++// the data, then allocate a new buffer and copy the data into it +++// Otherwise return NULL +++AVRpiZcRefPtr av_rpi_zc_ref(struct AVCodecContext * const s, +++ const struct AVFrame * const frame, const enum AVPixelFormat expected_format, const int maycopy); +++ +++// Get the vc_handle from the frame ref +++// Returns -1 if ref doesn't look valid +++int av_rpi_zc_vc_handle(const AVRpiZcRefPtr fr_ref); +++// Get offset from the start of the memory referenced +++// by the vc_handle to valid data +++int av_rpi_zc_offset(const AVRpiZcRefPtr fr_ref); +++// Length of buffer data +++int av_rpi_zc_length(const AVRpiZcRefPtr fr_ref); +++// Get the number of bytes allocated from the frame ref +++// Returns 0 if ref doesn't look valid +++int av_rpi_zc_numbytes(const AVRpiZcRefPtr fr_ref); +++ +++// Unreference the buffer refed/allocated by _zc_ref +++// If fr_ref is NULL then this will NOP +++void av_rpi_zc_unref(AVRpiZcRefPtr fr_ref); +++ +++// Allocate an environment for the buffer pool used by the ZC code +++// This should be put in avctx->get_buffer_context so it can be found by +++// av_rpi_zc_get_buffer2 when it is called from ffmpeg +++AVZcEnvPtr av_rpi_zc_env_alloc(void); +++ +++// Allocate the environment used by the ZC code +++void av_rpi_zc_env_free(AVZcEnvPtr); +++ +++// Test to see if the context is using zc (checks get_buffer2) +++int av_rpi_zc_in_use(const struct AVCodecContext * const s); +++ +++// Init ZC into a context +++// There is nothing magic in this fn - it just packages setting +++// get_buffer2 & get_buffer_context +++int av_rpi_zc_init(struct AVCodecContext * const s); +++ +++// Free ZC from a context +++// There is nothing magic in this fn - it just packages unsetting +++// get_buffer2 & get_buffer_context +++void av_rpi_zc_uninit(struct AVCodecContext * const s); +++ +++ +++ +++#endif +++ ++diff --git a/libavutil/buffer.c b/libavutil/buffer.c ++index 8d1aa5fa84..649876db77 100644 ++--- a/libavutil/buffer.c +++++ b/libavutil/buffer.c ++@@ -355,3 +355,9 @@ AVBufferRef *av_buffer_pool_get(AVBufferPool *pool) ++ ++ return ret; ++ } +++ +++// Return the opaque for the underlying frame (gives us a GPU_MEM_PTR_T) +++void *av_buffer_pool_opaque(AVBufferRef *ref) { +++ BufferPoolEntry *buf = av_buffer_get_opaque(ref); +++ return buf->opaque; +++} ++diff --git a/libavutil/buffer.h b/libavutil/buffer.h ++index 73b6bd0b14..d907de3f1c 100644 ++--- a/libavutil/buffer.h +++++ b/libavutil/buffer.h ++@@ -284,6 +284,9 @@ void av_buffer_pool_uninit(AVBufferPool **pool); ++ */ ++ AVBufferRef *av_buffer_pool_get(AVBufferPool *pool); ++ +++// Return the opaque for the underlying frame +++void *av_buffer_pool_opaque(AVBufferRef *ref); +++ ++ /** ++ * @} ++ */ ++diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c ++index 8ed52751c1..5e2b5ec3bc 100644 ++--- a/libavutil/pixdesc.c +++++ b/libavutil/pixdesc.c ++@@ -1989,6 +1989,18 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = { ++ .name = "cuda", ++ .flags = AV_PIX_FMT_FLAG_HWACCEL, ++ }, +++ [AV_PIX_FMT_RPI] = { +++ .name = "rpi", +++ .flags = AV_PIX_FMT_FLAG_HWACCEL, +++ }, +++ [AV_PIX_FMT_RPI4_10] = { +++ .name = "rpi", +++ .flags = AV_PIX_FMT_FLAG_HWACCEL, +++ }, +++ [AV_PIX_FMT_RPI4_8] = { +++ .name = "rpi", +++ .flags = AV_PIX_FMT_FLAG_HWACCEL, +++ }, ++ [AV_PIX_FMT_AYUV64LE] = { ++ .name = "ayuv64le", ++ .nb_components = 4, ++diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h ++index 34a1531489..0a6ff1f482 100644 ++--- a/libavutil/pixfmt.h +++++ b/libavutil/pixfmt.h ++@@ -234,6 +234,11 @@ enum AVPixelFormat { ++ */ ++ AV_PIX_FMT_CUDA, ++ +++ /** +++ * HW acceleration through RPI. +++ */ +++ AV_PIX_FMT_RPI, +++ ++ AV_PIX_FMT_0RGB, ///< packed RGB 8:8:8, 32bpp, XRGBXRGB... X=unused/undefined ++ AV_PIX_FMT_RGB0, ///< packed RGB 8:8:8, 32bpp, RGBXRGBX... X=unused/undefined ++ AV_PIX_FMT_0BGR, ///< packed BGR 8:8:8, 32bpp, XBGRXBGR... X=unused/undefined ++@@ -334,6 +339,14 @@ enum AVPixelFormat { ++ */ ++ AV_PIX_FMT_OPENCL, ++ +++// RPI - not on ifdef so can be got at by calling progs +++ AV_PIX_FMT_SAND128, ///< 4:2:0 8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding +++ AV_PIX_FMT_SAND64_10, ///< 4:2:0 10-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding +++ AV_PIX_FMT_SAND64_16, ///< 4:2:0 16-bit 64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding +++ +++ AV_PIX_FMT_RPI4_8, +++ AV_PIX_FMT_RPI4_10, +++ ++ AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions ++ }; ++ ++diff --git a/pi-util/conf_pi1.sh b/pi-util/conf_pi1.sh ++new file mode 100755 ++index 0000000000..ec25b81c31 ++--- /dev/null +++++ b/pi-util/conf_pi1.sh ++@@ -0,0 +1,31 @@ +++echo "Configure for Pi1" +++ +++RPI_TOOLROOT=`pwd`/../tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf +++RPI_OPT_VC=`pwd`/../firmware/opt/vc +++ +++RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux" +++RPI_DEFS="-D__VCCOREVER__=0x04000000 -DRPI=1" +++RPI_LIBDIRS="-L$RPI_TOOLROOT/lib -L$RPI_OPT_VC/lib" +++#RPI_KEEPS="-save-temps=obj" +++RPI_KEEPS="" +++ +++./configure --enable-cross-compile\ +++ --cpu=arm1176jzf-s\ +++ --arch=arm\ +++ --disable-neon\ +++ --target-os=linux\ +++ --disable-stripping\ +++ --enable-mmal\ +++ --extra-cflags="-g $RPI_KEEPS $RPI_DEFS $RPI_INCLUDES"\ +++ --extra-cxxflags="$RPI_DEFS $RPI_INCLUDES"\ +++ --extra-ldflags="$RPI_LIBDIRS -Wl,-rpath=/opt/vc/lib,-rpath-link=$RPI_OPT_VC/lib,-rpath=/lib,-rpath=/usr/lib,-rpath-link=$RPI_TOOLROOT/lib,-rpath-link=$RPI_TOOLROOT/lib"\ +++ --extra-libs="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm"\ +++ --cross-prefix=$RPI_TOOLROOT/bin/arm-linux-gnueabihf- +++ +++ +++# --enable-extra-warnings\ +++# --arch=armv71\ +++# --enable-shared\ +++ +++# gcc option for getting asm listing +++# -Wa,-ahls ++diff --git a/pi-util/conf_pi2.sh b/pi-util/conf_pi2.sh ++new file mode 100755 ++index 0000000000..7ec0402ce8 ++--- /dev/null +++++ b/pi-util/conf_pi2.sh ++@@ -0,0 +1,34 @@ +++echo "Configure for Pi2/3" +++ +++RPI_TOOLROOT=/home/dom/tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf +++RPI_OPT_VC=/opt/bcm-rootfs/opt/vc +++ +++RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux" +++RPI_DEFS="-D__VCCOREVER__=0x04000000 -DRPI_DISPLAY=1" +++RPI_LIBDIRS="-L$RPI_TOOLROOT/lib -L$RPI_OPT_VC/lib" +++#RPI_KEEPS="-save-temps=obj" +++RPI_KEEPS="" +++ +++./configure --enable-cross-compile\ +++ --arch=armv6t2\ +++ --cpu=cortex-a7\ +++ --target-os=linux\ +++ --disable-stripping\ +++ --disable-thumb\ +++ --enable-mmal\ +++ --enable-rpi\ +++ --extra-cflags="-g $RPI_KEEPS $RPI_DEFS $RPI_INCLUDES"\ +++ --extra-cxxflags="$RPI_DEFS $RPI_INCLUDES"\ +++ --extra-ldflags="$RPI_LIBDIRS -Wl,-rpath=/opt/vc/lib,-rpath-link=$RPI_OPT_VC/lib,-rpath=/lib,-rpath=/usr/lib,-rpath-link=$RPI_TOOLROOT/lib,-rpath-link=$RPI_TOOLROOT/lib"\ +++ --extra-libs="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm"\ +++ --cross-prefix=$RPI_TOOLROOT/bin/arm-linux-gnueabihf- \ +++ --prefix=$HOME/buster/home/pi/projects/fpga \ +++ --extra-libs="-ldl" +++ +++# --disable-decoders --enable-decoder=hevc --disable-hwaccels --enable-hwaccel=hevc_rpi --disable-encoders --enable-encoder=rawvideo --enable-muxer=rawvideo \ +++# --enable-extra-warnings\ +++# --arch=armv71\ +++# --enable-shared\ +++ +++# gcc option for getting asm listing +++# -Wa,-ahls +-- +2.20.1 + + +From b6633c36d3855a81d32dbf0dd68358267d0e7c2c Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Mon, 26 Jun 2017 20:17:09 +0100 +Subject: [PATCH 03/14] MMAL: Add hevc support by allowing 4 planes + +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp | 3 +++ + xbmc/cores/VideoPlayer/Process/VideoBuffer.h | 2 +- + .../VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp | 3 +++ + 3 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +index 2cf3575ca9..854f34fa62 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +@@ -77,6 +77,8 @@ void CMMALYUVBuffer::GetStrides(int(&strides)[YuvImage::MAX_PLANES]) + strides[0] = geo.getStrideY(); + strides[1] = geo.getStrideC(); + strides[2] = geo.getStrideC(); ++ if (geo.getStripes() > 1) ++ strides[3] = geo.getHeightY() + geo.getHeightC(); // abuse: strides[3] = stripe stride + } + + void CMMALYUVBuffer::SetDimensions(int width, int height, const int (&strides)[YuvImage::MAX_PLANES], const int (&planeOffsets)[YuvImage::MAX_PLANES]) +@@ -284,6 +286,7 @@ CDVDVideoCodec::VCReturn CDecoder::Decode(AVCodecContext* avctx, AVFrame* frame) + if (frame) + { + if ((frame->format != AV_PIX_FMT_YUV420P && frame->format != AV_PIX_FMT_YUV420P10 && frame->format != AV_PIX_FMT_YUV420P12 && frame->format != AV_PIX_FMT_YUV420P14 && frame->format != AV_PIX_FMT_YUV420P16 && ++ frame->format != AV_PIX_FMT_SAND128 && frame->format != AV_PIX_FMT_SAND64_10 && frame->format != AV_PIX_FMT_SAND64_16 && + frame->format != AV_PIX_FMT_BGR0 && frame->format != AV_PIX_FMT_RGB565LE) || + frame->buf[1] != nullptr || frame->buf[0] == nullptr) + { +diff --git a/xbmc/cores/VideoPlayer/Process/VideoBuffer.h b/xbmc/cores/VideoPlayer/Process/VideoBuffer.h +index 1c1ba21c48..86a71bb558 100644 +--- a/xbmc/cores/VideoPlayer/Process/VideoBuffer.h ++++ b/xbmc/cores/VideoPlayer/Process/VideoBuffer.h +@@ -23,7 +23,7 @@ extern "C" { + + struct YuvImage + { +- static const int MAX_PLANES = 3; ++ static const int MAX_PLANES = 4; + + uint8_t* plane[MAX_PLANES]; + int planesize[MAX_PLANES]; +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index aa5b0e06c0..fae5df73e2 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -193,6 +193,9 @@ std::vector CMMALPool::mmal_encoding_table = + { AV_PIX_FMT_YUV420P12,MMAL_ENCODING_I420_16, }, + { AV_PIX_FMT_YUV420P14,MMAL_ENCODING_I420_16, }, + { AV_PIX_FMT_YUV420P16,MMAL_ENCODING_I420_16, }, ++ { AV_PIX_FMT_SAND128, MMAL_ENCODING_YUVUV128 }, ++ { AV_PIX_FMT_SAND64_10,MMAL_ENCODING_YUVUV64_16 }, ++ { AV_PIX_FMT_SAND64_16,MMAL_ENCODING_YUVUV64_16 }, + { AV_PIX_FMT_RGBA, MMAL_ENCODING_RGBA, }, + { AV_PIX_FMT_BGRA, MMAL_ENCODING_BGRA }, + { AV_PIX_FMT_RGB0, MMAL_ENCODING_RGBA }, +-- +2.20.1 + + +From 0f8645a751d04e657a82a66d42a0c56205097466 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 17 May 2019 18:52:34 +0100 +Subject: [PATCH 04/14] RPI: Apply ffmpeg patches through cmake + +--- + cmake/modules/FindFFMPEG.cmake | 5 ++++- + tools/depends/target/ffmpeg/CMakeLists.txt | 5 +++++ + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/cmake/modules/FindFFMPEG.cmake b/cmake/modules/FindFFMPEG.cmake +index ef74671d40..a6e5de5917 100644 +--- a/cmake/modules/FindFFMPEG.cmake ++++ b/cmake/modules/FindFFMPEG.cmake +@@ -270,7 +270,10 @@ if(NOT FFMPEG_FOUND) + && + ${CMAKE_COMMAND} -E copy + ${CMAKE_SOURCE_DIR}/tools/depends/target/ffmpeg/FindGnuTls.cmake +- ) ++ && ++ patch -p1 < ${CMAKE_SOURCE_DIR}/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch ++ ) ++ + + find_program(BASH_COMMAND bash) + if(NOT BASH_COMMAND) +diff --git a/tools/depends/target/ffmpeg/CMakeLists.txt b/tools/depends/target/ffmpeg/CMakeLists.txt +index 4a2622216c..3afb989dcf 100644 +--- a/tools/depends/target/ffmpeg/CMakeLists.txt ++++ b/tools/depends/target/ffmpeg/CMakeLists.txt +@@ -18,6 +18,11 @@ if(CROSSCOMPILING) + message(STATUS "CROSS: ${ffmpeg_conf}") + endif() + ++#if(CORE_PLATFORM_NAME STREQUAL rbpi) ++ string(CONCAT CMAKE_C_FLAGS ${CMAKE_C_FLAGS} " -I/opt/vc/include -I/opt/vc/include/interface/vcos/pthreads -I/opt/vc/include/interface/vmcs_host/linux") ++ list(APPEND ffmpeg_conf --enable-rpi --disable-ffmpeg --disable-ffprobe) ++#endif() ++ + if(CMAKE_C_FLAGS) + list(APPEND ffmpeg_conf --extra-cflags=${CMAKE_C_FLAGS}) + endif() +-- +2.20.1 + + +From a9a7f16a39db1fcab3c47df6024fb6c98c314063 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Mon, 29 Apr 2019 20:52:03 +0100 +Subject: [PATCH 05/14] HACK: add videoplayer.usemmal setting to gbm.xml + +This reverts commit 2c94f63f62a124ed4024f01c3dafcf8f6dfebda0. +--- + system/settings/gbm.xml | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/system/settings/gbm.xml b/system/settings/gbm.xml +index c5e4d98e0b..2335b5843e 100644 +--- a/system/settings/gbm.xml ++++ b/system/settings/gbm.xml +@@ -9,6 +9,11 @@ + + + ++ ++ 2 ++ true ++ ++ + + false + 2 +-- +2.20.1 + + +From cc4e38058d7d1afacfb8febd59ea68ae01c18d3f Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 25 Apr 2019 17:14:32 +0100 +Subject: [PATCH 06/14] gbm: Expose videoplayer.usedisplayasclock + +--- + system/settings/gbm.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/system/settings/gbm.xml b/system/settings/gbm.xml +index 2335b5843e..dffea009fe 100644 +--- a/system/settings/gbm.xml ++++ b/system/settings/gbm.xml +@@ -4,7 +4,7 @@ + + + +- false ++ true + false + + +-- +2.20.1 + + +From 18e97485734156146bc796bde8f0b82c0550d554 Mon Sep 17 00:00:00 2001 +From: Matthias Reichl +Date: Sun, 24 Mar 2019 11:07:25 +0100 +Subject: [PATCH 07/14] HACK: try to use MMAL with gbm + +Signed-off-by: Matthias Reichl +--- + cmake/platform/linux/gbm.cmake | 2 +- + xbmc/SystemGlobals.cpp | 4 -- + .../VideoRenderers/HwDecRender/MMALRenderer.h | 1 + + xbmc/platform/linux/CMakeLists.txt | 3 + + xbmc/platform/linux/RBP.cpp | 18 ++++++ + xbmc/platform/linux/RBP.h | 10 +++ + xbmc/windowing/gbm/CMakeLists.txt | 2 + + xbmc/windowing/gbm/VideoSyncPi.cpp | 62 +++++++++++++++++++ + xbmc/windowing/gbm/VideoSyncPi.h | 27 ++++++++ + .../windowing/gbm/WinSystemGbmGLESContext.cpp | 42 +++++++------ + xbmc/windowing/gbm/WinSystemGbmGLESContext.h | 3 +- + 11 files changed, 148 insertions(+), 26 deletions(-) + create mode 100644 xbmc/windowing/gbm/VideoSyncPi.cpp + create mode 100644 xbmc/windowing/gbm/VideoSyncPi.h + +diff --git a/cmake/platform/linux/gbm.cmake b/cmake/platform/linux/gbm.cmake +index e5b44ada46..b25ea7b0eb 100644 +--- a/cmake/platform/linux/gbm.cmake ++++ b/cmake/platform/linux/gbm.cmake +@@ -1,4 +1,4 @@ +-set(PLATFORM_REQUIRED_DEPS EGL GBM LibDRM LibInput Xkbcommon) ++set(PLATFORM_REQUIRED_DEPS EGL GBM LibDRM LibInput Xkbcommon MMAL) + set(PLATFORM_OPTIONAL_DEPS VAAPI) + + set(GBM_RENDER_SYSTEM "" CACHE STRING "Render system to use with GBM: \"gl\" or \"gles\"") +diff --git a/xbmc/SystemGlobals.cpp b/xbmc/SystemGlobals.cpp +index 435b1f5090..df1ae2cced 100644 +--- a/xbmc/SystemGlobals.cpp ++++ b/xbmc/SystemGlobals.cpp +@@ -24,9 +24,7 @@ std::map CSpecialProtocol::m_pathMap; + + #include "filesystem/ZipManager.h" + +-#ifdef TARGET_RASPBERRY_PI + #include "platform/linux/RBP.h" +-#endif + + CLangCodeExpander g_LangCodeExpander; + CLocalizeStrings g_localizeStrings; +@@ -42,8 +40,6 @@ std::map CSpecialProtocol::m_pathMap; + CAlarmClock g_alarmClock; + CSectionLoader g_sectionLoader; + +-#ifdef TARGET_RASPBERRY_PI + CRBP g_RBP; +-#endif + + CZipManager g_ZipManager; +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h +index 9718fb056d..03042edfb1 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h +@@ -21,6 +21,7 @@ + #include "threads/Thread.h" + #include "threads/IRunnable.h" + #include "utils/Geometry.h" ++#include "platform/linux/RBP.h" + + // worst case number of buffers. 12 for decoder. 8 for multi-threading in ffmpeg. NUM_BUFFERS for renderer. + // Note, generally these won't necessarily result in allocated pictures +diff --git a/xbmc/platform/linux/CMakeLists.txt b/xbmc/platform/linux/CMakeLists.txt +index f3d9b535bc..88076c3d5a 100644 +--- a/xbmc/platform/linux/CMakeLists.txt ++++ b/xbmc/platform/linux/CMakeLists.txt +@@ -48,6 +48,9 @@ if(CORE_PLATFORM_NAME_LC STREQUAL rbpi) + DllOMX.h + OMXClock.h + OMXCore.h) ++else() ++ list(APPEND SOURCES RBP.cpp) ++ list(APPEND HEADERS RBP.h) + endif() + + if(HAVE_SSE4_1) +diff --git a/xbmc/platform/linux/RBP.cpp b/xbmc/platform/linux/RBP.cpp +index 9558e80afa..e7b11a5f73 100644 +--- a/xbmc/platform/linux/RBP.cpp ++++ b/xbmc/platform/linux/RBP.cpp +@@ -14,7 +14,9 @@ + #include "settings/SettingsComponent.h" + #include "utils/log.h" + ++#ifdef TARGET_RASPBERRY_PI + #include "cores/omxplayer/OMXImage.h" ++#endif + #include + + #include +@@ -66,9 +68,13 @@ typedef int vc_image_t_size_check[(sizeof(VC_IMAGE_T) == 64) * 2 - 1]; + CRBP::CRBP() + { + m_initialized = false; ++#ifdef TARGET_RASPBERRY_PI + m_omx_initialized = false; ++#endif + m_DllBcmHost = new DllBcmHost(); ++#ifdef TARGET_RASPBERRY_PI + m_OMX = new COMXCore(); ++#endif + m_display = DISPMANX_NO_HANDLE; + m_mb = mbox_open(); + vcsm_init(); +@@ -79,7 +85,9 @@ CRBP::CRBP() + CRBP::~CRBP() + { + Deinitialize(); ++#ifdef TARGET_RASPBERRY_PI + delete m_OMX; ++#endif + delete m_DllBcmHost; + } + +@@ -95,9 +103,11 @@ bool CRBP::Initialize() + + m_DllBcmHost->bcm_host_init(); + ++#ifdef TARGET_RASPBERRY_PI + m_omx_initialized = m_OMX->Initialize(); + if(!m_omx_initialized) + return false; ++#endif + + char response[80] = ""; + m_arm_mem = 0; +@@ -122,8 +132,10 @@ bool CRBP::Initialize() + if (!m_gui_resolution_limit) + m_gui_resolution_limit = m_gpu_mem < 128 ? 720:1080; + ++#ifdef TARGET_RASPBERRY_PI + g_OMXImage.Initialize(); + m_omx_image_init = true; ++#endif + return true; + } + +@@ -264,20 +276,26 @@ uint32_t CRBP::LastVsync() + + void CRBP::Deinitialize() + { ++#ifdef TARGET_RASPBERRY_PI + if (m_omx_image_init) + g_OMXImage.Deinitialize(); + + if(m_omx_initialized) + m_OMX->Deinitialize(); ++#endif + + m_DllBcmHost->bcm_host_deinit(); + + if(m_initialized) + m_DllBcmHost->Unload(); + ++#ifdef TARGET_RASPBERRY_PI + m_omx_image_init = false; ++#endif + m_initialized = false; ++#ifdef TARGET_RASPBERRY_PI + m_omx_initialized = false; ++#endif + if (m_mb) + mbox_close(m_mb); + m_mb = 0; +diff --git a/xbmc/platform/linux/RBP.h b/xbmc/platform/linux/RBP.h +index aaddd14591..c4741fd60f 100644 +--- a/xbmc/platform/linux/RBP.h ++++ b/xbmc/platform/linux/RBP.h +@@ -19,7 +19,9 @@ + #endif + + #include "DllBCM.h" ++#ifdef TARGET_RASPBERRY_PI + #include "OMXCore.h" ++#endif + #include "xbmc/utils/CPUInfo.h" + #include "threads/CriticalSection.h" + #include "threads/Event.h" +@@ -91,7 +93,9 @@ public: + int GetGUIResolutionLimit() { return m_gui_resolution_limit; } + // stride can be null for packed output + unsigned char *CaptureDisplay(int width, int height, int *stride, bool swap_red_blue, bool video_only = true); ++#ifdef TARGET_RASPBERRY_PI + DllOMX *GetDllOMX() { return m_OMX ? m_OMX->GetDll() : NULL; } ++#endif + uint32_t LastVsync(int64_t &time); + uint32_t LastVsync(); + uint32_t WaitVsync(uint32_t target = ~0U); +@@ -102,20 +106,26 @@ public: + private: + DllBcmHost *m_DllBcmHost; + bool m_initialized; ++#ifdef TARGET_RASPBERRY_PI + bool m_omx_initialized; + bool m_omx_image_init; ++#endif + int m_arm_mem; + int m_gpu_mem; + int m_gui_resolution_limit; + bool m_codec_mpg2_enabled; + bool m_codec_wvc1_enabled; ++#ifdef TARGET_RASPBERRY_PI + COMXCore *m_OMX; ++#endif + DISPMANX_DISPLAY_HANDLE_T m_display; + CCriticalSection m_vsync_lock; + XbmcThreads::ConditionVariable m_vsync_cond; + uint32_t m_vsync_count; + int64_t m_vsync_time; ++#ifdef TARGET_RASPBERRY_PI + class DllLibOMXCore; ++#endif + CCriticalSection m_critSection; + + int m_mb; +diff --git a/xbmc/windowing/gbm/CMakeLists.txt b/xbmc/windowing/gbm/CMakeLists.txt +index 78c788c158..c3377e9a52 100644 +--- a/xbmc/windowing/gbm/CMakeLists.txt ++++ b/xbmc/windowing/gbm/CMakeLists.txt +@@ -1,5 +1,6 @@ + set(SOURCES OptionalsReg.cpp + WinSystemGbm.cpp ++ VideoSyncPi.cpp + GBMUtils.cpp + DRMUtils.cpp + DRMLegacy.cpp +@@ -9,6 +10,7 @@ set(SOURCES OptionalsReg.cpp + + set(HEADERS OptionalsReg.h + WinSystemGbm.h ++ VideoSyncPi.h + GBMUtils.h + DRMUtils.h + DRMLegacy.h +diff --git a/xbmc/windowing/gbm/VideoSyncPi.cpp b/xbmc/windowing/gbm/VideoSyncPi.cpp +new file mode 100644 +index 0000000000..fd12528011 +--- /dev/null ++++ b/xbmc/windowing/gbm/VideoSyncPi.cpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (C) 2005-2018 Team Kodi ++ * This file is part of Kodi - https://kodi.tv ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ * See LICENSES/README.md for more information. ++ */ ++ ++#include "VideoSyncPi.h" ++#include "ServiceBroker.h" ++#include "windowing/GraphicContext.h" ++#include "windowing/WinSystem.h" ++#include "utils/TimeUtils.h" ++#include "utils/log.h" ++#include "platform/linux/RBP.h" ++#include "threads/Thread.h" ++ ++bool CVideoSyncPi::Setup(PUPDATECLOCK func) ++{ ++ UpdateClock = func; ++ m_abort = false; ++ CServiceBroker::GetWinSystem()->Register(this); ++ CLog::Log(LOGDEBUG, "CVideoReferenceClock: setting up RPi"); ++ return true; ++} ++ ++void CVideoSyncPi::Run(CEvent& stopEvent) ++{ ++ /* This shouldn't be very busy and timing is important so increase priority */ ++ CThread::GetCurrentThread()->SetPriority(CThread::GetCurrentThread()->GetPriority()+1); ++ ++ while (!stopEvent.Signaled() && !m_abort) ++ { ++ g_RBP.WaitVsync(); ++ uint64_t now = CurrentHostCounter(); ++ UpdateClock(1, now, m_refClock); ++ } ++} ++ ++void CVideoSyncPi::Cleanup() ++{ ++ CLog::Log(LOGDEBUG, "CVideoReferenceClock: cleaning up RPi"); ++ CServiceBroker::GetWinSystem()->Unregister(this); ++} ++ ++float CVideoSyncPi::GetFps() ++{ ++ m_fps = CServiceBroker::GetWinSystem()->GetGfxContext().GetFPS(); ++ CLog::Log(LOGDEBUG, "CVideoReferenceClock: fps: %.2f", m_fps); ++ return m_fps; ++} ++ ++void CVideoSyncPi::OnResetDisplay() ++{ ++ m_abort = true; ++} ++ ++void CVideoSyncPi::RefreshChanged() ++{ ++ if (m_fps != CServiceBroker::GetWinSystem()->GetGfxContext().GetFPS()) ++ m_abort = true; ++} +diff --git a/xbmc/windowing/gbm/VideoSyncPi.h b/xbmc/windowing/gbm/VideoSyncPi.h +new file mode 100644 +index 0000000000..e0c759fa7f +--- /dev/null ++++ b/xbmc/windowing/gbm/VideoSyncPi.h +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (C) 2005-2018 Team Kodi ++ * This file is part of Kodi - https://kodi.tv ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ * See LICENSES/README.md for more information. ++ */ ++ ++#pragma once ++ ++#include "windowing/VideoSync.h" ++#include "guilib/DispResource.h" ++ ++class CVideoSyncPi : public CVideoSync, IDispResource ++{ ++public: ++ CVideoSyncPi(void *clock) : CVideoSync(clock) {}; ++ virtual bool Setup(PUPDATECLOCK func); ++ virtual void Run(CEvent& stopEvent); ++ virtual void Cleanup(); ++ virtual float GetFps(); ++ virtual void OnResetDisplay(); ++ virtual void RefreshChanged(); ++ ++private: ++ volatile bool m_abort; ++}; +diff --git a/xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp b/xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp +index f763577117..e73381f350 100644 +--- a/xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp ++++ b/xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp +@@ -6,15 +6,12 @@ + * See LICENSES/README.md for more information. + */ + +-#include "cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecDRMPRIME.h" +-#include "cores/VideoPlayer/VideoRenderers/HwDecRender/RendererDRMPRIME.h" +-#include "cores/VideoPlayer/VideoRenderers/HwDecRender/RendererDRMPRIMEGLES.h" +- + #include "cores/RetroPlayer/process/gbm/RPProcessInfoGbm.h" + #include "cores/RetroPlayer/rendering/VideoRenderers/RPRendererGBM.h" + #include "cores/RetroPlayer/rendering/VideoRenderers/RPRendererOpenGLES.h" + #include "cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h" +-#include "cores/VideoPlayer/Process/gbm/ProcessInfoGBM.h" ++#include "cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.h" ++#include "cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h" + #include "cores/VideoPlayer/VideoRenderers/LinuxRendererGLES.h" + #include "cores/VideoPlayer/VideoRenderers/RenderFactory.h" + +@@ -22,6 +19,7 @@ + #include "platform/linux/XTimeUtils.h" + #include "utils/log.h" + #include "WinSystemGbmGLESContext.h" ++#include "VideoSyncPi.h" + + #include + #include +@@ -43,9 +41,10 @@ bool CWinSystemGbmGLESContext::InitWindowSystem() + { + VIDEOPLAYER::CRendererFactory::ClearRenderer(); + CDVDFactoryCodec::ClearHWAccels(); ++ CDVDFactoryCodec::ClearHWVideoCodecs(); ++ + CLinuxRendererGLES::Register(); + RETRO::CRPProcessInfoGbm::Register(); +- RETRO::CRPProcessInfoGbm::RegisterRendererFactory(new RETRO::CRendererFactoryGBM); + RETRO::CRPProcessInfoGbm::RegisterRendererFactory(new RETRO::CRendererFactoryOpenGLES); + + if (!CWinSystemGbmEGLContext::InitWindowSystemEGL(EGL_OPENGL_ES2_BIT, EGL_OPENGL_ES_API)) +@@ -53,20 +52,9 @@ bool CWinSystemGbmGLESContext::InitWindowSystem() + return false; + } + +- bool general, deepColor; +- m_vaapiProxy.reset(GBM::VaapiProxyCreate(m_DRM->GetRenderNodeFileDescriptor())); +- GBM::VaapiProxyConfig(m_vaapiProxy.get(), m_eglContext.GetEGLDisplay()); +- GBM::VAAPIRegisterRender(m_vaapiProxy.get(), general, deepColor); +- +- if (general) +- { +- GBM::VAAPIRegister(m_vaapiProxy.get(), deepColor); +- } +- +- CRendererDRMPRIMEGLES::Register(); +- CRendererDRMPRIME::Register(); +- CDVDVideoCodecDRMPRIME::Register(); +- VIDEOPLAYER::CProcessInfoGBM::Register(); ++ MMAL::CDecoder::Register(); ++ MMAL::CMMALRenderer::Register(); ++ MMAL::CMMALVideo::Register(); + + return true; + } +@@ -144,3 +132,17 @@ bool CWinSystemGbmGLESContext::CreateContext() + } + return true; + } ++ ++std::unique_ptr CWinSystemGbmGLESContext::GetVideoSync(void *clock) ++{ ++ std::unique_ptr pVSync(new CVideoSyncPi(clock)); ++ return pVSync; ++} ++ ++void CWinSystemGbmGLESContext::SetVSyncImpl(bool enable) ++{ ++ if (!m_eglContext.SetVSync(enable)) ++ { ++ CLog::Log(LOGERROR, "%s,Could not set egl vsync", __FUNCTION__); ++ } ++} +diff --git a/xbmc/windowing/gbm/WinSystemGbmGLESContext.h b/xbmc/windowing/gbm/WinSystemGbmGLESContext.h +index d80d9770d5..95954a606c 100644 +--- a/xbmc/windowing/gbm/WinSystemGbmGLESContext.h ++++ b/xbmc/windowing/gbm/WinSystemGbmGLESContext.h +@@ -34,7 +34,8 @@ public: + bool SetFullScreen(bool fullScreen, RESOLUTION_INFO& res, bool blankOtherDisplays) override; + void PresentRender(bool rendered, bool videoLayer) override; + protected: +- void SetVSyncImpl(bool enable) override { return; }; ++ void SetVSyncImpl(bool enable) override; ++ virtual std::unique_ptr GetVideoSync(void *clock) override; + void PresentRenderImpl(bool rendered) override {}; + bool CreateContext() override; + }; +-- +2.20.1 + + +From 21a4e178608b931f35128eb378d99657b5ecdf43 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Wed, 3 Apr 2019 19:19:45 +0100 +Subject: [PATCH 08/14] MMALRenderer: Avoid advanced deinterlace in gbm mode + +--- + .../VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index fae5df73e2..e4a9a2c9ea 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -745,7 +745,9 @@ void CMMALRenderer::Run() + bool interlace = (omvb->mmal_buffer->flags & MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED) ? true:false; + + // advanced deinterlace requires 3 frames of context so disable when showing stills ++#if !defined(HAVE_GBM) + if (omvb->m_stills) ++#endif + { + if (interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED) + interlace_method = VS_INTERLACEMETHOD_MMAL_BOB; +-- +2.20.1 + + +From 9a03bf88878532faa76532105bb6298f28a78cd1 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Wed, 24 Apr 2019 16:22:27 +0100 +Subject: [PATCH 09/14] MMALRender: Move video plane behind GUI + +--- + .../VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index e4a9a2c9ea..2114712c29 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -1280,6 +1280,9 @@ void CMMALRenderer::SetVideoRect(const CRect& InSrcRect, const CRect& InDestRect + region.noaspect = MMAL_TRUE; + region.mode = MMAL_DISPLAY_MODE_LETTERBOX; + ++ region.set |= MMAL_DISPLAY_SET_LAYER; ++ region.layer = -128; ++ + if (m_renderOrientation == 90) + region.transform = MMAL_DISPLAY_ROT90; + else if (m_renderOrientation == 180) +-- +2.20.1 + + +From 736818a17b48faf18e3bcdaa9f3069cded8ea6f9 Mon Sep 17 00:00:00 2001 +From: Matthias Reichl +Date: Fri, 26 Apr 2019 17:33:36 +0200 +Subject: [PATCH 10/14] WinSystemGbmGLESContext: register CProcessInfoPi + +Signed-off-by: Matthias Reichl +--- + xbmc/cores/VideoPlayer/Process/rbpi/CMakeLists.txt | 4 ++-- + xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp | 2 ++ + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/Process/rbpi/CMakeLists.txt b/xbmc/cores/VideoPlayer/Process/rbpi/CMakeLists.txt +index 1a41576405..2b0c66ebc7 100644 +--- a/xbmc/cores/VideoPlayer/Process/rbpi/CMakeLists.txt ++++ b/xbmc/cores/VideoPlayer/Process/rbpi/CMakeLists.txt +@@ -1,7 +1,7 @@ +-if(CORE_PLATFORM_NAME_LC STREQUAL rbpi) ++# if(CORE_PLATFORM_NAME_LC STREQUAL rbpi) + set(SOURCES ProcessInfoPi.cpp) + + set(HEADERS ProcessInfoPi.h) + + core_add_library(processPi) +-endif() ++# endif() +diff --git a/xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp b/xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp +index e73381f350..c4997cc2f4 100644 +--- a/xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp ++++ b/xbmc/windowing/gbm/WinSystemGbmGLESContext.cpp +@@ -14,6 +14,7 @@ + #include "cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h" + #include "cores/VideoPlayer/VideoRenderers/LinuxRendererGLES.h" + #include "cores/VideoPlayer/VideoRenderers/RenderFactory.h" ++#include "cores/VideoPlayer/Process/rbpi/ProcessInfoPi.h" + + #include "OptionalsReg.h" + #include "platform/linux/XTimeUtils.h" +@@ -44,6 +45,7 @@ bool CWinSystemGbmGLESContext::InitWindowSystem() + CDVDFactoryCodec::ClearHWVideoCodecs(); + + CLinuxRendererGLES::Register(); ++ CProcessInfoPi::Register(); + RETRO::CRPProcessInfoGbm::Register(); + RETRO::CRPProcessInfoGbm::RegisterRendererFactory(new RETRO::CRendererFactoryOpenGLES); + +-- +2.20.1 + + +From 2958fa06af85e097f811a984cbaf718884e6a17e Mon Sep 17 00:00:00 2001 +From: Matthias Reichl +Date: Sun, 28 Apr 2019 10:57:11 +0200 +Subject: [PATCH 11/14] Screenshot: use RPi/dispmanx code to capture screenshot + +Signed-off-by: Matthias Reichl +--- + xbmc/utils/Screenshot.cpp | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xbmc/utils/Screenshot.cpp b/xbmc/utils/Screenshot.cpp +index 0616e24a18..ad82156384 100644 +--- a/xbmc/utils/Screenshot.cpp ++++ b/xbmc/utils/Screenshot.cpp +@@ -17,7 +17,7 @@ + + #include "pictures/Picture.h" + +-#ifdef TARGET_RASPBERRY_PI ++#if 1 //#ifdef TARGET_RASPBERRY_PI + #include "platform/linux/RBP.h" + #endif + +@@ -62,7 +62,7 @@ CScreenshotSurface::~CScreenshotSurface() + + bool CScreenshotSurface::capture() + { +-#if defined(TARGET_RASPBERRY_PI) ++#if 1 //#if defined(TARGET_RASPBERRY_PI) + g_RBP.GetDisplaySize(m_width, m_height); + m_buffer = g_RBP.CaptureDisplay(m_width, m_height, &m_stride, true, false); + if (!m_buffer) +-- +2.20.1 + + +From 0412bf4ffbd7bc1695a2e9e975ce7ca6ef7fd51c Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 25 Apr 2019 17:11:53 +0100 +Subject: [PATCH 12/14] RBP: Hack: open display after changing hdmi mode + +--- + xbmc/platform/linux/RBP.cpp | 6 ++++-- + xbmc/platform/xbmc.cpp | 4 ++-- + xbmc/windowing/gbm/DRMAtomic.cpp | 12 +++++++++++- + xbmc/windowing/gbm/DRMAtomic.h | 2 ++ + 4 files changed, 19 insertions(+), 5 deletions(-) + +diff --git a/xbmc/platform/linux/RBP.cpp b/xbmc/platform/linux/RBP.cpp +index e7b11a5f73..255f4640c0 100644 +--- a/xbmc/platform/linux/RBP.cpp ++++ b/xbmc/platform/linux/RBP.cpp +@@ -162,6 +162,7 @@ static void vsync_callback_static(DISPMANX_UPDATE_HANDLE_T u, void *arg) + + DISPMANX_DISPLAY_HANDLE_T CRBP::OpenDisplay(uint32_t device) + { ++ DISPMANX_DISPLAY_HANDLE_T last_display = m_display; + CSingleLock lock(m_critSection); + if (m_display == DISPMANX_NO_HANDLE) + { +@@ -169,12 +170,14 @@ DISPMANX_DISPLAY_HANDLE_T CRBP::OpenDisplay(uint32_t device) + int s = vc_dispmanx_vsync_callback(m_display, vsync_callback_static, (void *)this); + assert(s == 0); + } ++ CLog::Log(LOGDEBUG, "CRBP::%s device:%d m_display:%x (%x)", __FUNCTION__, device, m_display, last_display); + return m_display; + } + + void CRBP::CloseDisplay(DISPMANX_DISPLAY_HANDLE_T display) + { + CSingleLock lock(m_critSection); ++ CLog::Log(LOGDEBUG, "CRBP::%s display:%x m_display:%x", __FUNCTION__, display, m_display); + assert(display == m_display); + int s = vc_dispmanx_vsync_callback(m_display, NULL, NULL); + assert(s == 0); +@@ -244,7 +247,6 @@ void CRBP::VSyncCallback() + uint32_t CRBP::WaitVsync(uint32_t target) + { + CSingleLock vlock(m_vsync_lock); +- DISPMANX_DISPLAY_HANDLE_T display = m_display; + XbmcThreads::EndTime delay(50); + if (target == ~0U) + target = m_vsync_count+1; +@@ -256,7 +258,7 @@ uint32_t CRBP::WaitVsync(uint32_t target) + break; + } + if ((signed)(m_vsync_count - target) < 0) +- CLog::Log(LOGDEBUG, "CRBP::%s no vsync %d/%d display:%x(%x) delay:%d", __FUNCTION__, m_vsync_count, target, m_display, display, delay.MillisLeft()); ++ CLog::Log(LOGDEBUG, "CRBP::%s no vsync %d/%d display:%x delay:%d", __FUNCTION__, m_vsync_count, target, m_display, delay.MillisLeft()); + + return m_vsync_count; + } +diff --git a/xbmc/platform/xbmc.cpp b/xbmc/platform/xbmc.cpp +index e9f39178ca..4617c196d8 100644 +--- a/xbmc/platform/xbmc.cpp ++++ b/xbmc/platform/xbmc.cpp +@@ -8,7 +8,7 @@ + + #include "Application.h" + +-#ifdef TARGET_RASPBERRY_PI ++#if 1//def TARGET_RASPBERRY_PI + #include "platform/linux/RBP.h" + #endif + +@@ -36,7 +36,7 @@ extern "C" int XBMC_Run(bool renderGUI, const CAppParamParser ¶ms) + return status; + } + +-#ifdef TARGET_RASPBERRY_PI ++#if 1//def TARGET_RASPBERRY_PI + if(!g_RBP.Initialize()) + return false; + g_RBP.LogFirmwareVersion(); +diff --git a/xbmc/windowing/gbm/DRMAtomic.cpp b/xbmc/windowing/gbm/DRMAtomic.cpp +index 0299fc9caa..de2c905e61 100644 +--- a/xbmc/windowing/gbm/DRMAtomic.cpp ++++ b/xbmc/windowing/gbm/DRMAtomic.cpp +@@ -118,10 +118,20 @@ void CDRMAtomic::FlipPage(struct gbm_bo *bo, bool rendered, bool videoLayer) + if (m_need_modeset) + { + flags |= DRM_MODE_ATOMIC_ALLOW_MODESET; +- m_need_modeset = false; ++ if (m_dispman_display != DISPMANX_NO_HANDLE) ++ { ++ g_RBP.CloseDisplay(m_dispman_display); ++ m_dispman_display = DISPMANX_NO_HANDLE; ++ } + } + + DrmAtomicCommit(!drm_fb ? 0 : drm_fb->fb_id, flags, rendered, videoLayer); ++ ++ if (m_need_modeset) ++ { ++ m_need_modeset = false; ++ m_dispman_display = g_RBP.OpenDisplay(0); ++ } + } + + bool CDRMAtomic::InitDrm() +diff --git a/xbmc/windowing/gbm/DRMAtomic.h b/xbmc/windowing/gbm/DRMAtomic.h +index 1aa96127f4..3dcb449103 100644 +--- a/xbmc/windowing/gbm/DRMAtomic.h ++++ b/xbmc/windowing/gbm/DRMAtomic.h +@@ -9,6 +9,7 @@ + #pragma once + + #include "DRMUtils.h" ++#include "platform/linux/RBP.h" + + namespace KODI + { +@@ -36,6 +37,7 @@ private: + bool m_need_modeset; + bool m_active = true; + drmModeAtomicReq *m_req = nullptr; ++ DISPMANX_ELEMENT_HANDLE_T m_dispman_display = 0; + }; + + } +-- +2.20.1 + + +From bd3966f39af236f5d332781e1446a7eb3aa56c49 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 17 May 2019 18:54:03 +0100 +Subject: [PATCH 13/14] RPI: Some fixes for 10-bit geometry + +--- + .../DVDCodecs/Video/MMALFFmpeg.cpp | 12 +++++--- + .../HwDecRender/MMALRenderer.cpp | 5 +++- + .../VideoRenderers/HwDecRender/MMALRenderer.h | 4 +-- + xbmc/platform/linux/RBP.cpp | 29 +++++++++++++++++++ + xbmc/platform/linux/RBP.h | 4 +++ + 5 files changed, 47 insertions(+), 7 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +index 854f34fa62..030ca1b7bf 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +@@ -58,11 +58,13 @@ void CMMALYUVBuffer::GetPlanes(uint8_t*(&planes)[YuvImage::MAX_PLANES]) + AVRpiZcFrameGeometry geo = pool->GetGeometry(); + + if (VERBOSE) +- CLog::Log(LOGDEBUG, LOGVIDEO, "%s::%s %dx%d %dx%d (%dx%d %dx%d)", CLASSNAME, __FUNCTION__, geo.getStrideY(), geo.getHeightY(), geo.getStrideC(), geo.getHeightC(), Width(), Height(), AlignedWidth(), AlignedHeight()); ++ CLog::Log(LOGDEBUG, LOGVIDEO, "%s::%s %dx%d %dx%d (%dx%d %dx%d) ofu:%d szy:%d szu:%d yc:%d", CLASSNAME, __FUNCTION__, ++ geo.getHeightY(), geo.getHeightC(), geo.getStrideY(), geo.getStrideC(), Width(), Height(), AlignedWidth(), AlignedHeight(), ++ geo.getOffsetU(), geo.getSizeY(), geo.getSizeC(), geo.getStripeIsYc() ); + + planes[0] = GetMemPtr(); + if (planes[0] && geo.getPlanesC() >= 1) +- planes[1] = planes[0] + geo.getSizeY(); ++ planes[1] = planes[0] + geo.getOffsetU(); + if (planes[1] && geo.getPlanesC() >= 2) + planes[2] = planes[1] + geo.getSizeC(); + } +@@ -78,7 +80,7 @@ void CMMALYUVBuffer::GetStrides(int(&strides)[YuvImage::MAX_PLANES]) + strides[1] = geo.getStrideC(); + strides[2] = geo.getStrideC(); + if (geo.getStripes() > 1) +- strides[3] = geo.getHeightY() + geo.getHeightC(); // abuse: strides[3] = stripe stride ++ strides[3] = geo.getStripeIsYc() ? geo.getHeightY() + geo.getHeightC() : geo.getHeightY(); // abuse: strides[3] = stripe stride + } + + void CMMALYUVBuffer::SetDimensions(int width, int height, const int (&strides)[YuvImage::MAX_PLANES], const int (&planeOffsets)[YuvImage::MAX_PLANES]) +@@ -190,7 +192,8 @@ int CDecoder::FFGetBuffer(AVCodecContext *avctx, AVFrame *frame, int flags) + { + int aligned_width = frame->width; + int aligned_height = frame->height; +- if (pool->Encoding() != MMAL_ENCODING_YUVUV128 && pool->Encoding() != MMAL_ENCODING_YUVUV64_16) ++ if (dec->m_fmt != AV_PIX_FMT_SAND128 && dec->m_fmt != AV_PIX_FMT_SAND64_10 && dec->m_fmt != AV_PIX_FMT_SAND64_16 && ++ dec->m_fmt != AV_PIX_FMT_RPI && dec->m_fmt != AV_PIX_FMT_RPI4_8 && dec->m_fmt != AV_PIX_FMT_RPI4_10) + { + // ffmpeg requirements + AlignedSize(dec->m_avctx, aligned_width, aligned_height); +@@ -287,6 +290,7 @@ CDVDVideoCodec::VCReturn CDecoder::Decode(AVCodecContext* avctx, AVFrame* frame) + { + if ((frame->format != AV_PIX_FMT_YUV420P && frame->format != AV_PIX_FMT_YUV420P10 && frame->format != AV_PIX_FMT_YUV420P12 && frame->format != AV_PIX_FMT_YUV420P14 && frame->format != AV_PIX_FMT_YUV420P16 && + frame->format != AV_PIX_FMT_SAND128 && frame->format != AV_PIX_FMT_SAND64_10 && frame->format != AV_PIX_FMT_SAND64_16 && ++ frame->format != AV_PIX_FMT_RPI && frame->format != AV_PIX_FMT_RPI4_8 && frame->format != AV_PIX_FMT_RPI4_10 && + frame->format != AV_PIX_FMT_BGR0 && frame->format != AV_PIX_FMT_RGB565LE) || + frame->buf[1] != nullptr || frame->buf[0] == nullptr) + { +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index 2114712c29..d2f3d21781 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -194,6 +194,9 @@ std::vector CMMALPool::mmal_encoding_table = + { AV_PIX_FMT_YUV420P14,MMAL_ENCODING_I420_16, }, + { AV_PIX_FMT_YUV420P16,MMAL_ENCODING_I420_16, }, + { AV_PIX_FMT_SAND128, MMAL_ENCODING_YUVUV128 }, ++ { AV_PIX_FMT_RPI, MMAL_ENCODING_YUVUV128 }, ++ { AV_PIX_FMT_RPI4_8, MMAL_ENCODING_YUVUV128 }, ++ { AV_PIX_FMT_RPI4_10, MMAL_ENCODING_YUV10_COL }, + { AV_PIX_FMT_SAND64_10,MMAL_ENCODING_YUVUV64_16 }, + { AV_PIX_FMT_SAND64_16,MMAL_ENCODING_YUVUV64_16 }, + { AV_PIX_FMT_RGBA, MMAL_ENCODING_RGBA, }, +@@ -234,7 +237,7 @@ void CMMALPool::Configure(AVPixelFormat format, int width, int height, int align + if (m_mmal_format != MMAL_ENCODING_UNKNOWN) + { + m_geo = g_RBP.GetFrameGeometry(m_mmal_format, alignedWidth, alignedHeight); +- if (m_mmal_format != MMAL_ENCODING_YUVUV128 && m_mmal_format != MMAL_ENCODING_YUVUV64_16 ) ++ if (m_mmal_format != MMAL_ENCODING_YUVUV128 && m_mmal_format != MMAL_ENCODING_YUVUV64_16 && m_mmal_format != MMAL_ENCODING_YUV10_COL ) + { + if (alignedWidth) + { +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h +index 03042edfb1..644174dfeb 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h +@@ -59,8 +59,8 @@ public: + static uint32_t TranslateFormat(AVPixelFormat pixfmt); + virtual int Width() { return m_width; } + virtual int Height() { return m_height; } +- virtual int AlignedWidth() { return m_mmal_format == MMAL_ENCODING_YUVUV128 || m_mmal_format == MMAL_ENCODING_YUVUV64_16 || m_geo.getBytesPerPixel() == 0 ? 0 : m_geo.getStrideY() / m_geo.getBytesPerPixel(); } +- virtual int AlignedHeight() { return m_mmal_format == MMAL_ENCODING_YUVUV128 || m_mmal_format == MMAL_ENCODING_YUVUV64_16 ? 0 : m_geo.getHeightY(); } ++ virtual int AlignedWidth() { return m_mmal_format == MMAL_ENCODING_YUVUV128 || m_mmal_format == MMAL_ENCODING_YUVUV64_16 || m_mmal_format == MMAL_ENCODING_YUV10_COL || m_geo.getBytesPerPixel() == 0 ? 0 : m_geo.getStrideY() / m_geo.getBytesPerPixel(); } ++ virtual int AlignedHeight() { return m_mmal_format == MMAL_ENCODING_YUVUV128 || m_mmal_format == MMAL_ENCODING_YUVUV64_16 || m_mmal_format == MMAL_ENCODING_YUV10_COL ? 0 : m_geo.getHeightY(); } + virtual int BitsPerPixel() { return m_geo.getBitsPerPixel(); } + virtual uint32_t &Encoding() { return m_mmal_format; } + virtual int Size() { return m_size; } +diff --git a/xbmc/platform/linux/RBP.cpp b/xbmc/platform/linux/RBP.cpp +index 255f4640c0..6c045e9674 100644 +--- a/xbmc/platform/linux/RBP.cpp ++++ b/xbmc/platform/linux/RBP.cpp +@@ -459,6 +459,7 @@ AVRpiZcFrameGeometry CRBP::GetFrameGeometry(uint32_t encoding, unsigned short vi + geo.setHeightY((video_height + 15) & ~15); + geo.setHeightC(geo.getHeightY() >> 1); + geo.setPlanesC(2); ++ geo.setStripeIsYc(1); + break; + case MMAL_ENCODING_I420_16: + geo.setBitsPerPixel(10); +@@ -467,6 +468,7 @@ AVRpiZcFrameGeometry CRBP::GetFrameGeometry(uint32_t encoding, unsigned short vi + geo.setHeightY((video_height + 15) & ~15); + geo.setHeightC(geo.getHeightY() >> 1); + geo.setPlanesC(2); ++ geo.setStripeIsYc(1); + break; + case MMAL_ENCODING_OPAQUE: + geo.setStrideY(video_width); +@@ -487,6 +489,14 @@ AVRpiZcFrameGeometry CRBP::GetFrameGeometry(uint32_t encoding, unsigned short vi + geo.setHeightC(img.pitch / stripe_w - geo.getHeightY()); + geo.setPlanesC(1); + geo.setStripes((video_width + stripe_w - 1) / stripe_w); ++ geo.setStripeIsYc(1); ++ if (geo.getHeightY() * stripe_w > img.pitch) ++ { ++ // "tall" sand - all C blocks now follow Y ++ geo.setHeightY(img.pitch / stripe_w); ++ geo.setHeightC(geo.getHeightY()); ++ geo.setStripeIsYc(0); ++ } + break; + } + case MMAL_ENCODING_YUVUV64_16: +@@ -505,6 +515,25 @@ AVRpiZcFrameGeometry CRBP::GetFrameGeometry(uint32_t encoding, unsigned short vi + geo.setHeightC(img.pitch / stripe_w - geo.getHeightY()); + geo.setPlanesC(1); + geo.setStripes((video_width * 2 + stripe_w - 1) / stripe_w); ++ geo.setStripeIsYc(1); ++ break; ++ } ++ case MMAL_ENCODING_YUV10_COL: ++ { ++ VC_IMAGE_T img = {}; ++ img.type = VC_IMAGE_YUV10COL; ++ img.width = video_width; ++ img.height = video_height; ++ int rc = get_image_params(GetMBox(), &img); ++ assert(rc == 0); ++ const unsigned int stripe_w = 128; ++ geo.setStrideY(stripe_w); ++ geo.setStrideC(stripe_w); ++ geo.setHeightY(((intptr_t)img.extra.uv.u - (intptr_t)img.image_data) / stripe_w); ++ geo.setHeightC(img.pitch / stripe_w - geo.getHeightY()); ++ geo.setPlanesC(1); ++ geo.setStripes(((video_width * 4 + 2) / 3 + stripe_w - 1) / stripe_w); ++ geo.setStripeIsYc(1); + break; + } + default: assert(0); +diff --git a/xbmc/platform/linux/RBP.h b/xbmc/platform/linux/RBP.h +index c4741fd60f..63e02a1826 100644 +--- a/xbmc/platform/linux/RBP.h ++++ b/xbmc/platform/linux/RBP.h +@@ -38,9 +38,11 @@ public: + unsigned int getStripes() { return stripes; } + unsigned int getBitsPerPixel() { return bits_per_pixel; } + unsigned int getBytesPerPixel() { return (bits_per_pixel + 7) >> 3; } ++ unsigned int getStripeIsYc() { return stripe_is_yc; } + unsigned int getSizeY() { return stride_y * height_y; } + unsigned int getSizeC() { return stride_c * height_c; } + unsigned int getSize() { return (getSizeY() + getSizeC() * getPlanesC()) * getStripes(); } ++ unsigned int getOffsetU() { return stripe_is_yc ? getSizeY() : getSizeY() * stripes; } + void setStrideY(unsigned int v) { stride_y = v; } + void setHeightY(unsigned int v) { height_y = v; } + void setStrideC(unsigned int v) { stride_c = v; } +@@ -49,6 +51,7 @@ public: + void setStripes(unsigned int v) { stripes = v; } + void setBitsPerPixel(unsigned int v) { bits_per_pixel = v; } + void setBytesPerPixel(unsigned int v) { bits_per_pixel = v * 8; } ++ void setStripeIsYc(unsigned int v) { stripe_is_yc = v; } + private: + unsigned int stride_y = 0; + unsigned int height_y = 0; +@@ -57,6 +60,7 @@ private: + unsigned int planes_c = 0; + unsigned int stripes = 0; + unsigned int bits_per_pixel = 0; ++ unsigned int stripe_is_yc = 0; + }; + + class CGPUMEM +-- +2.20.1 + + +From 3a51e92daaeb528d001f071d00f01d9b513e5e56 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 17 May 2019 18:59:25 +0100 +Subject: [PATCH 14/14] RPI: Increase number of referenced frames + +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +index 030ca1b7bf..1dfcf1cef2 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +@@ -364,7 +364,7 @@ CDVDVideoCodec::VCReturn CDecoder::Check(AVCodecContext* avctx) + + unsigned CDecoder::GetAllowedReferences() + { +- return 6; ++ return 7; + } + + IHardwareDecoder* CDecoder::Create(CDVDStreamInfo &hint, CProcessInfo &processInfo, AVPixelFormat fmt) +-- +2.20.1 +